c tokenizer escape sequences

This commit is contained in:
Vexu 2019-12-16 00:22:41 +02:00
parent 9f0e83a571
commit ab60c8e28f
No known key found for this signature in database
GPG Key ID: 59AEB8936E16A6AC
4 changed files with 100 additions and 20 deletions

View File

@ -40,11 +40,14 @@ pub const CToken = struct {
};
};
pub fn tokenizeCMacro(tl: *TokenList, chars: [*]const u8) !void {
pub fn tokenizeCMacro(tl: *TokenList, chars: [*:0]const u8) !void {
var index: usize = 0;
var first = true;
while (true) {
const tok = try next(chars, &index);
if (tok.id == .StrLit or tok.id == .CharLit)
try tl.push(try zigifyEscapeSequences(tl.allocator, tok))
else
try tl.push(tok);
if (tok.id == .Eof)
return;
@ -61,7 +64,83 @@ pub fn tokenizeCMacro(tl: *TokenList, chars: [*]const u8) !void {
}
}
fn next(chars: [*]const u8, i: *usize) !CToken {
fn zigifyEscapeSequences(allocator: *std.mem.Allocator, tok: CToken) !CToken {
for (tok.bytes) |c| {
if (c == '\\') {
break;
}
} else return tok;
var bytes = try allocator.alloc(u8, tok.bytes.len * 2);
var escape = false;
var i: usize = 0;
for (tok.bytes) |c| {
if (escape) {
switch (c) {
'n', 'r', 't', '\\', '\'', '\"', 'x' => {
bytes[i] = c;
},
'a' => {
bytes[i] = 'x';
i += 1;
bytes[i] = '0';
i += 1;
bytes[i] = '7';
},
'b' => {
bytes[i] = 'x';
i += 1;
bytes[i] = '0';
i += 1;
bytes[i] = '8';
},
'f' => {
bytes[i] = 'x';
i += 1;
bytes[i] = '0';
i += 1;
bytes[i] = 'C';
},
'v' => {
bytes[i] = 'x';
i += 1;
bytes[i] = '0';
i += 1;
bytes[i] = 'B';
},
'?' => {
i -= 1;
bytes[i] = '?';
},
'u', 'U' => {
// TODO unicode escape sequences
return error.TokenizingFailed;
},
'0'...'7' => {
// TODO octal escape sequences
return error.TokenizingFailed;
},
else => {
// unknown escape sequence
return error.TokenizingFailed;
},
}
i += 1;
escape = false;
} else {
if (c == '\\') {
escape = true;
}
bytes[i] = c;
i += 1;
}
}
return CToken{
.id = tok.id,
.bytes = bytes[0..i],
};
}
fn next(chars: [*:0]const u8, i: *usize) !CToken {
var state: enum {
Start,
GotLt,
@ -462,7 +541,7 @@ fn next(chars: [*]const u8, i: *usize) !CToken {
.String => { // TODO char escapes
switch (c) {
'\"' => {
result.bytes = chars[begin_index + 1 .. i.* - 1];
result.bytes = chars[begin_index..i.*];
return result;
},
else => {},
@ -471,7 +550,7 @@ fn next(chars: [*]const u8, i: *usize) !CToken {
.CharLit => {
switch (c) {
'\'' => {
result.bytes = chars[begin_index + 1 .. i.* - 1];
result.bytes = chars[begin_index..i.*];
return result;
},
else => {},

View File

@ -734,7 +734,7 @@ pub extern fn ZigClangSourceManager_getSpellingLoc(self: ?*const struct_ZigClang
pub extern fn ZigClangSourceManager_getFilename(self: *const struct_ZigClangSourceManager, SpellingLoc: struct_ZigClangSourceLocation) ?[*:0]const u8;
pub extern fn ZigClangSourceManager_getSpellingLineNumber(self: ?*const struct_ZigClangSourceManager, Loc: struct_ZigClangSourceLocation) c_uint;
pub extern fn ZigClangSourceManager_getSpellingColumnNumber(self: ?*const struct_ZigClangSourceManager, Loc: struct_ZigClangSourceLocation) c_uint;
pub extern fn ZigClangSourceManager_getCharacterData(self: ?*const struct_ZigClangSourceManager, SL: struct_ZigClangSourceLocation) [*c]const u8;
pub extern fn ZigClangSourceManager_getCharacterData(self: ?*const struct_ZigClangSourceManager, SL: struct_ZigClangSourceLocation) [*:0]const u8;
pub extern fn ZigClangASTContext_getPointerType(self: ?*const struct_ZigClangASTContext, T: struct_ZigClangQualType) struct_ZigClangQualType;
pub extern fn ZigClangASTUnit_getASTContext(self: ?*struct_ZigClangASTUnit) ?*struct_ZigClangASTContext;
pub extern fn ZigClangASTUnit_getSourceManager(self: *struct_ZigClangASTUnit) *struct_ZigClangSourceManager;

View File

@ -2629,9 +2629,9 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void {
} else false;
(if (macro_fn)
transMacroFnDefine(c, &tok_it, name, begin_c, begin_loc)
transMacroFnDefine(c, &tok_it, name, begin_loc)
else
transMacroDefine(c, &tok_it, name, begin_c, begin_loc)) catch |err| switch (err) {
transMacroDefine(c, &tok_it, name, begin_loc)) catch |err| switch (err) {
error.UnsupportedTranslation,
error.ParseError,
=> try failDecl(c, begin_loc, name, "unable to translate macro", .{}),
@ -2643,7 +2643,7 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void {
}
}
fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, char_ptr: [*]const u8, source_loc: ZigClangSourceLocation) ParseError!void {
fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void {
const rp = makeRestorePoint(c);
const visib_tok = try appendToken(c, .Keyword_pub, "pub");
@ -2674,7 +2674,7 @@ fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8,
_ = try c.macro_table.put(name, &node.base);
}
fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, char_ptr: [*]const u8, source_loc: ZigClangSourceLocation) ParseError!void {
fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void {
const rp = makeRestorePoint(c);
const pub_tok = try appendToken(c, .Keyword_pub, "pub");
const inline_tok = try appendToken(c, .Keyword_inline, "inline");
@ -2829,11 +2829,7 @@ fn parseCPrimaryExpr(rp: RestorePoint, it: *ctok.TokenList.Iterator, source_loc:
const tok = it.next().?;
switch (tok.id) {
.CharLit => {
const buf = try rp.c.a().alloc(u8, tok.bytes.len + "''".len);
buf[0] = '\'';
writeEscapedString(buf[1..], tok.bytes);
buf[buf.len - 1] = '\'';
const token = try appendToken(rp.c, .CharLiteral, buf);
const token = try appendToken(rp.c, .CharLiteral, tok.bytes);
const node = try rp.c.a().create(ast.Node.CharLiteral);
node.* = ast.Node.CharLiteral{
.token = token,
@ -2841,11 +2837,7 @@ fn parseCPrimaryExpr(rp: RestorePoint, it: *ctok.TokenList.Iterator, source_loc:
return &node.base;
},
.StrLit => {
const buf = try rp.c.a().alloc(u8, tok.bytes.len + "\"\"".len);
buf[0] = '"';
writeEscapedString(buf[1..], tok.bytes);
buf[buf.len - 1] = '"';
const token = try appendToken(rp.c, .StringLiteral, buf);
const token = try appendToken(rp.c, .StringLiteral, tok.bytes);
const node = try rp.c.a().create(ast.Node.StringLiteral);
node.* = ast.Node.StringLiteral{
.token = token,

View File

@ -411,6 +411,15 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
\\}
});
cases.add_2("macro escape sequences",
\\#define FOO "aoeu\xab derp"
\\#define FOO2 "aoeu\a derp"
, &[_][]const u8{
\\pub const FOO = "aoeu\xab derp";
,
\\pub const FOO2 = "aoeu\x07 derp";
});
/////////////// Cases for only stage1 which are TODO items for stage2 ////////////////
cases.add_both("typedef of function in struct field",