c tokenizer escape sequences
This commit is contained in:
parent
9f0e83a571
commit
ab60c8e28f
@ -40,12 +40,15 @@ pub const CToken = struct {
|
||||
};
|
||||
};
|
||||
|
||||
pub fn tokenizeCMacro(tl: *TokenList, chars: [*]const u8) !void {
|
||||
pub fn tokenizeCMacro(tl: *TokenList, chars: [*:0]const u8) !void {
|
||||
var index: usize = 0;
|
||||
var first = true;
|
||||
while (true) {
|
||||
const tok = try next(chars, &index);
|
||||
try tl.push(tok);
|
||||
if (tok.id == .StrLit or tok.id == .CharLit)
|
||||
try tl.push(try zigifyEscapeSequences(tl.allocator, tok))
|
||||
else
|
||||
try tl.push(tok);
|
||||
if (tok.id == .Eof)
|
||||
return;
|
||||
if (first) {
|
||||
@ -61,7 +64,83 @@ pub fn tokenizeCMacro(tl: *TokenList, chars: [*]const u8) !void {
|
||||
}
|
||||
}
|
||||
|
||||
fn next(chars: [*]const u8, i: *usize) !CToken {
|
||||
fn zigifyEscapeSequences(allocator: *std.mem.Allocator, tok: CToken) !CToken {
|
||||
for (tok.bytes) |c| {
|
||||
if (c == '\\') {
|
||||
break;
|
||||
}
|
||||
} else return tok;
|
||||
var bytes = try allocator.alloc(u8, tok.bytes.len * 2);
|
||||
var escape = false;
|
||||
var i: usize = 0;
|
||||
for (tok.bytes) |c| {
|
||||
if (escape) {
|
||||
switch (c) {
|
||||
'n', 'r', 't', '\\', '\'', '\"', 'x' => {
|
||||
bytes[i] = c;
|
||||
},
|
||||
'a' => {
|
||||
bytes[i] = 'x';
|
||||
i += 1;
|
||||
bytes[i] = '0';
|
||||
i += 1;
|
||||
bytes[i] = '7';
|
||||
},
|
||||
'b' => {
|
||||
bytes[i] = 'x';
|
||||
i += 1;
|
||||
bytes[i] = '0';
|
||||
i += 1;
|
||||
bytes[i] = '8';
|
||||
},
|
||||
'f' => {
|
||||
bytes[i] = 'x';
|
||||
i += 1;
|
||||
bytes[i] = '0';
|
||||
i += 1;
|
||||
bytes[i] = 'C';
|
||||
},
|
||||
'v' => {
|
||||
bytes[i] = 'x';
|
||||
i += 1;
|
||||
bytes[i] = '0';
|
||||
i += 1;
|
||||
bytes[i] = 'B';
|
||||
},
|
||||
'?' => {
|
||||
i -= 1;
|
||||
bytes[i] = '?';
|
||||
},
|
||||
'u', 'U' => {
|
||||
// TODO unicode escape sequences
|
||||
return error.TokenizingFailed;
|
||||
},
|
||||
'0'...'7' => {
|
||||
// TODO octal escape sequences
|
||||
return error.TokenizingFailed;
|
||||
},
|
||||
else => {
|
||||
// unknown escape sequence
|
||||
return error.TokenizingFailed;
|
||||
},
|
||||
}
|
||||
i += 1;
|
||||
escape = false;
|
||||
} else {
|
||||
if (c == '\\') {
|
||||
escape = true;
|
||||
}
|
||||
bytes[i] = c;
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
return CToken{
|
||||
.id = tok.id,
|
||||
.bytes = bytes[0..i],
|
||||
};
|
||||
}
|
||||
|
||||
fn next(chars: [*:0]const u8, i: *usize) !CToken {
|
||||
var state: enum {
|
||||
Start,
|
||||
GotLt,
|
||||
@ -462,7 +541,7 @@ fn next(chars: [*]const u8, i: *usize) !CToken {
|
||||
.String => { // TODO char escapes
|
||||
switch (c) {
|
||||
'\"' => {
|
||||
result.bytes = chars[begin_index + 1 .. i.* - 1];
|
||||
result.bytes = chars[begin_index..i.*];
|
||||
return result;
|
||||
},
|
||||
else => {},
|
||||
@ -471,7 +550,7 @@ fn next(chars: [*]const u8, i: *usize) !CToken {
|
||||
.CharLit => {
|
||||
switch (c) {
|
||||
'\'' => {
|
||||
result.bytes = chars[begin_index + 1 .. i.* - 1];
|
||||
result.bytes = chars[begin_index..i.*];
|
||||
return result;
|
||||
},
|
||||
else => {},
|
||||
|
@ -734,7 +734,7 @@ pub extern fn ZigClangSourceManager_getSpellingLoc(self: ?*const struct_ZigClang
|
||||
pub extern fn ZigClangSourceManager_getFilename(self: *const struct_ZigClangSourceManager, SpellingLoc: struct_ZigClangSourceLocation) ?[*:0]const u8;
|
||||
pub extern fn ZigClangSourceManager_getSpellingLineNumber(self: ?*const struct_ZigClangSourceManager, Loc: struct_ZigClangSourceLocation) c_uint;
|
||||
pub extern fn ZigClangSourceManager_getSpellingColumnNumber(self: ?*const struct_ZigClangSourceManager, Loc: struct_ZigClangSourceLocation) c_uint;
|
||||
pub extern fn ZigClangSourceManager_getCharacterData(self: ?*const struct_ZigClangSourceManager, SL: struct_ZigClangSourceLocation) [*c]const u8;
|
||||
pub extern fn ZigClangSourceManager_getCharacterData(self: ?*const struct_ZigClangSourceManager, SL: struct_ZigClangSourceLocation) [*:0]const u8;
|
||||
pub extern fn ZigClangASTContext_getPointerType(self: ?*const struct_ZigClangASTContext, T: struct_ZigClangQualType) struct_ZigClangQualType;
|
||||
pub extern fn ZigClangASTUnit_getASTContext(self: ?*struct_ZigClangASTUnit) ?*struct_ZigClangASTContext;
|
||||
pub extern fn ZigClangASTUnit_getSourceManager(self: *struct_ZigClangASTUnit) *struct_ZigClangSourceManager;
|
||||
|
@ -2629,9 +2629,9 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void {
|
||||
} else false;
|
||||
|
||||
(if (macro_fn)
|
||||
transMacroFnDefine(c, &tok_it, name, begin_c, begin_loc)
|
||||
transMacroFnDefine(c, &tok_it, name, begin_loc)
|
||||
else
|
||||
transMacroDefine(c, &tok_it, name, begin_c, begin_loc)) catch |err| switch (err) {
|
||||
transMacroDefine(c, &tok_it, name, begin_loc)) catch |err| switch (err) {
|
||||
error.UnsupportedTranslation,
|
||||
error.ParseError,
|
||||
=> try failDecl(c, begin_loc, name, "unable to translate macro", .{}),
|
||||
@ -2643,7 +2643,7 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void {
|
||||
}
|
||||
}
|
||||
|
||||
fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, char_ptr: [*]const u8, source_loc: ZigClangSourceLocation) ParseError!void {
|
||||
fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void {
|
||||
const rp = makeRestorePoint(c);
|
||||
|
||||
const visib_tok = try appendToken(c, .Keyword_pub, "pub");
|
||||
@ -2674,7 +2674,7 @@ fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8,
|
||||
_ = try c.macro_table.put(name, &node.base);
|
||||
}
|
||||
|
||||
fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, char_ptr: [*]const u8, source_loc: ZigClangSourceLocation) ParseError!void {
|
||||
fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void {
|
||||
const rp = makeRestorePoint(c);
|
||||
const pub_tok = try appendToken(c, .Keyword_pub, "pub");
|
||||
const inline_tok = try appendToken(c, .Keyword_inline, "inline");
|
||||
@ -2829,11 +2829,7 @@ fn parseCPrimaryExpr(rp: RestorePoint, it: *ctok.TokenList.Iterator, source_loc:
|
||||
const tok = it.next().?;
|
||||
switch (tok.id) {
|
||||
.CharLit => {
|
||||
const buf = try rp.c.a().alloc(u8, tok.bytes.len + "''".len);
|
||||
buf[0] = '\'';
|
||||
writeEscapedString(buf[1..], tok.bytes);
|
||||
buf[buf.len - 1] = '\'';
|
||||
const token = try appendToken(rp.c, .CharLiteral, buf);
|
||||
const token = try appendToken(rp.c, .CharLiteral, tok.bytes);
|
||||
const node = try rp.c.a().create(ast.Node.CharLiteral);
|
||||
node.* = ast.Node.CharLiteral{
|
||||
.token = token,
|
||||
@ -2841,11 +2837,7 @@ fn parseCPrimaryExpr(rp: RestorePoint, it: *ctok.TokenList.Iterator, source_loc:
|
||||
return &node.base;
|
||||
},
|
||||
.StrLit => {
|
||||
const buf = try rp.c.a().alloc(u8, tok.bytes.len + "\"\"".len);
|
||||
buf[0] = '"';
|
||||
writeEscapedString(buf[1..], tok.bytes);
|
||||
buf[buf.len - 1] = '"';
|
||||
const token = try appendToken(rp.c, .StringLiteral, buf);
|
||||
const token = try appendToken(rp.c, .StringLiteral, tok.bytes);
|
||||
const node = try rp.c.a().create(ast.Node.StringLiteral);
|
||||
node.* = ast.Node.StringLiteral{
|
||||
.token = token,
|
||||
|
@ -411,6 +411,15 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
|
||||
\\}
|
||||
});
|
||||
|
||||
cases.add_2("macro escape sequences",
|
||||
\\#define FOO "aoeu\xab derp"
|
||||
\\#define FOO2 "aoeu\a derp"
|
||||
, &[_][]const u8{
|
||||
\\pub const FOO = "aoeu\xab derp";
|
||||
,
|
||||
\\pub const FOO2 = "aoeu\x07 derp";
|
||||
});
|
||||
|
||||
/////////////// Cases for only stage1 which are TODO items for stage2 ////////////////
|
||||
|
||||
cases.add_both("typedef of function in struct field",
|
||||
|
Loading…
x
Reference in New Issue
Block a user