c tokenizer escape sequences
This commit is contained in:
parent
9f0e83a571
commit
ab60c8e28f
@ -40,12 +40,15 @@ pub const CToken = struct {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn tokenizeCMacro(tl: *TokenList, chars: [*]const u8) !void {
|
pub fn tokenizeCMacro(tl: *TokenList, chars: [*:0]const u8) !void {
|
||||||
var index: usize = 0;
|
var index: usize = 0;
|
||||||
var first = true;
|
var first = true;
|
||||||
while (true) {
|
while (true) {
|
||||||
const tok = try next(chars, &index);
|
const tok = try next(chars, &index);
|
||||||
try tl.push(tok);
|
if (tok.id == .StrLit or tok.id == .CharLit)
|
||||||
|
try tl.push(try zigifyEscapeSequences(tl.allocator, tok))
|
||||||
|
else
|
||||||
|
try tl.push(tok);
|
||||||
if (tok.id == .Eof)
|
if (tok.id == .Eof)
|
||||||
return;
|
return;
|
||||||
if (first) {
|
if (first) {
|
||||||
@ -61,7 +64,83 @@ pub fn tokenizeCMacro(tl: *TokenList, chars: [*]const u8) !void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn next(chars: [*]const u8, i: *usize) !CToken {
|
fn zigifyEscapeSequences(allocator: *std.mem.Allocator, tok: CToken) !CToken {
|
||||||
|
for (tok.bytes) |c| {
|
||||||
|
if (c == '\\') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else return tok;
|
||||||
|
var bytes = try allocator.alloc(u8, tok.bytes.len * 2);
|
||||||
|
var escape = false;
|
||||||
|
var i: usize = 0;
|
||||||
|
for (tok.bytes) |c| {
|
||||||
|
if (escape) {
|
||||||
|
switch (c) {
|
||||||
|
'n', 'r', 't', '\\', '\'', '\"', 'x' => {
|
||||||
|
bytes[i] = c;
|
||||||
|
},
|
||||||
|
'a' => {
|
||||||
|
bytes[i] = 'x';
|
||||||
|
i += 1;
|
||||||
|
bytes[i] = '0';
|
||||||
|
i += 1;
|
||||||
|
bytes[i] = '7';
|
||||||
|
},
|
||||||
|
'b' => {
|
||||||
|
bytes[i] = 'x';
|
||||||
|
i += 1;
|
||||||
|
bytes[i] = '0';
|
||||||
|
i += 1;
|
||||||
|
bytes[i] = '8';
|
||||||
|
},
|
||||||
|
'f' => {
|
||||||
|
bytes[i] = 'x';
|
||||||
|
i += 1;
|
||||||
|
bytes[i] = '0';
|
||||||
|
i += 1;
|
||||||
|
bytes[i] = 'C';
|
||||||
|
},
|
||||||
|
'v' => {
|
||||||
|
bytes[i] = 'x';
|
||||||
|
i += 1;
|
||||||
|
bytes[i] = '0';
|
||||||
|
i += 1;
|
||||||
|
bytes[i] = 'B';
|
||||||
|
},
|
||||||
|
'?' => {
|
||||||
|
i -= 1;
|
||||||
|
bytes[i] = '?';
|
||||||
|
},
|
||||||
|
'u', 'U' => {
|
||||||
|
// TODO unicode escape sequences
|
||||||
|
return error.TokenizingFailed;
|
||||||
|
},
|
||||||
|
'0'...'7' => {
|
||||||
|
// TODO octal escape sequences
|
||||||
|
return error.TokenizingFailed;
|
||||||
|
},
|
||||||
|
else => {
|
||||||
|
// unknown escape sequence
|
||||||
|
return error.TokenizingFailed;
|
||||||
|
},
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
escape = false;
|
||||||
|
} else {
|
||||||
|
if (c == '\\') {
|
||||||
|
escape = true;
|
||||||
|
}
|
||||||
|
bytes[i] = c;
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return CToken{
|
||||||
|
.id = tok.id,
|
||||||
|
.bytes = bytes[0..i],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next(chars: [*:0]const u8, i: *usize) !CToken {
|
||||||
var state: enum {
|
var state: enum {
|
||||||
Start,
|
Start,
|
||||||
GotLt,
|
GotLt,
|
||||||
@ -462,7 +541,7 @@ fn next(chars: [*]const u8, i: *usize) !CToken {
|
|||||||
.String => { // TODO char escapes
|
.String => { // TODO char escapes
|
||||||
switch (c) {
|
switch (c) {
|
||||||
'\"' => {
|
'\"' => {
|
||||||
result.bytes = chars[begin_index + 1 .. i.* - 1];
|
result.bytes = chars[begin_index..i.*];
|
||||||
return result;
|
return result;
|
||||||
},
|
},
|
||||||
else => {},
|
else => {},
|
||||||
@ -471,7 +550,7 @@ fn next(chars: [*]const u8, i: *usize) !CToken {
|
|||||||
.CharLit => {
|
.CharLit => {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
'\'' => {
|
'\'' => {
|
||||||
result.bytes = chars[begin_index + 1 .. i.* - 1];
|
result.bytes = chars[begin_index..i.*];
|
||||||
return result;
|
return result;
|
||||||
},
|
},
|
||||||
else => {},
|
else => {},
|
||||||
|
@ -734,7 +734,7 @@ pub extern fn ZigClangSourceManager_getSpellingLoc(self: ?*const struct_ZigClang
|
|||||||
pub extern fn ZigClangSourceManager_getFilename(self: *const struct_ZigClangSourceManager, SpellingLoc: struct_ZigClangSourceLocation) ?[*:0]const u8;
|
pub extern fn ZigClangSourceManager_getFilename(self: *const struct_ZigClangSourceManager, SpellingLoc: struct_ZigClangSourceLocation) ?[*:0]const u8;
|
||||||
pub extern fn ZigClangSourceManager_getSpellingLineNumber(self: ?*const struct_ZigClangSourceManager, Loc: struct_ZigClangSourceLocation) c_uint;
|
pub extern fn ZigClangSourceManager_getSpellingLineNumber(self: ?*const struct_ZigClangSourceManager, Loc: struct_ZigClangSourceLocation) c_uint;
|
||||||
pub extern fn ZigClangSourceManager_getSpellingColumnNumber(self: ?*const struct_ZigClangSourceManager, Loc: struct_ZigClangSourceLocation) c_uint;
|
pub extern fn ZigClangSourceManager_getSpellingColumnNumber(self: ?*const struct_ZigClangSourceManager, Loc: struct_ZigClangSourceLocation) c_uint;
|
||||||
pub extern fn ZigClangSourceManager_getCharacterData(self: ?*const struct_ZigClangSourceManager, SL: struct_ZigClangSourceLocation) [*c]const u8;
|
pub extern fn ZigClangSourceManager_getCharacterData(self: ?*const struct_ZigClangSourceManager, SL: struct_ZigClangSourceLocation) [*:0]const u8;
|
||||||
pub extern fn ZigClangASTContext_getPointerType(self: ?*const struct_ZigClangASTContext, T: struct_ZigClangQualType) struct_ZigClangQualType;
|
pub extern fn ZigClangASTContext_getPointerType(self: ?*const struct_ZigClangASTContext, T: struct_ZigClangQualType) struct_ZigClangQualType;
|
||||||
pub extern fn ZigClangASTUnit_getASTContext(self: ?*struct_ZigClangASTUnit) ?*struct_ZigClangASTContext;
|
pub extern fn ZigClangASTUnit_getASTContext(self: ?*struct_ZigClangASTUnit) ?*struct_ZigClangASTContext;
|
||||||
pub extern fn ZigClangASTUnit_getSourceManager(self: *struct_ZigClangASTUnit) *struct_ZigClangSourceManager;
|
pub extern fn ZigClangASTUnit_getSourceManager(self: *struct_ZigClangASTUnit) *struct_ZigClangSourceManager;
|
||||||
|
@ -2629,9 +2629,9 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void {
|
|||||||
} else false;
|
} else false;
|
||||||
|
|
||||||
(if (macro_fn)
|
(if (macro_fn)
|
||||||
transMacroFnDefine(c, &tok_it, name, begin_c, begin_loc)
|
transMacroFnDefine(c, &tok_it, name, begin_loc)
|
||||||
else
|
else
|
||||||
transMacroDefine(c, &tok_it, name, begin_c, begin_loc)) catch |err| switch (err) {
|
transMacroDefine(c, &tok_it, name, begin_loc)) catch |err| switch (err) {
|
||||||
error.UnsupportedTranslation,
|
error.UnsupportedTranslation,
|
||||||
error.ParseError,
|
error.ParseError,
|
||||||
=> try failDecl(c, begin_loc, name, "unable to translate macro", .{}),
|
=> try failDecl(c, begin_loc, name, "unable to translate macro", .{}),
|
||||||
@ -2643,7 +2643,7 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, char_ptr: [*]const u8, source_loc: ZigClangSourceLocation) ParseError!void {
|
fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void {
|
||||||
const rp = makeRestorePoint(c);
|
const rp = makeRestorePoint(c);
|
||||||
|
|
||||||
const visib_tok = try appendToken(c, .Keyword_pub, "pub");
|
const visib_tok = try appendToken(c, .Keyword_pub, "pub");
|
||||||
@ -2674,7 +2674,7 @@ fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8,
|
|||||||
_ = try c.macro_table.put(name, &node.base);
|
_ = try c.macro_table.put(name, &node.base);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, char_ptr: [*]const u8, source_loc: ZigClangSourceLocation) ParseError!void {
|
fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void {
|
||||||
const rp = makeRestorePoint(c);
|
const rp = makeRestorePoint(c);
|
||||||
const pub_tok = try appendToken(c, .Keyword_pub, "pub");
|
const pub_tok = try appendToken(c, .Keyword_pub, "pub");
|
||||||
const inline_tok = try appendToken(c, .Keyword_inline, "inline");
|
const inline_tok = try appendToken(c, .Keyword_inline, "inline");
|
||||||
@ -2829,11 +2829,7 @@ fn parseCPrimaryExpr(rp: RestorePoint, it: *ctok.TokenList.Iterator, source_loc:
|
|||||||
const tok = it.next().?;
|
const tok = it.next().?;
|
||||||
switch (tok.id) {
|
switch (tok.id) {
|
||||||
.CharLit => {
|
.CharLit => {
|
||||||
const buf = try rp.c.a().alloc(u8, tok.bytes.len + "''".len);
|
const token = try appendToken(rp.c, .CharLiteral, tok.bytes);
|
||||||
buf[0] = '\'';
|
|
||||||
writeEscapedString(buf[1..], tok.bytes);
|
|
||||||
buf[buf.len - 1] = '\'';
|
|
||||||
const token = try appendToken(rp.c, .CharLiteral, buf);
|
|
||||||
const node = try rp.c.a().create(ast.Node.CharLiteral);
|
const node = try rp.c.a().create(ast.Node.CharLiteral);
|
||||||
node.* = ast.Node.CharLiteral{
|
node.* = ast.Node.CharLiteral{
|
||||||
.token = token,
|
.token = token,
|
||||||
@ -2841,11 +2837,7 @@ fn parseCPrimaryExpr(rp: RestorePoint, it: *ctok.TokenList.Iterator, source_loc:
|
|||||||
return &node.base;
|
return &node.base;
|
||||||
},
|
},
|
||||||
.StrLit => {
|
.StrLit => {
|
||||||
const buf = try rp.c.a().alloc(u8, tok.bytes.len + "\"\"".len);
|
const token = try appendToken(rp.c, .StringLiteral, tok.bytes);
|
||||||
buf[0] = '"';
|
|
||||||
writeEscapedString(buf[1..], tok.bytes);
|
|
||||||
buf[buf.len - 1] = '"';
|
|
||||||
const token = try appendToken(rp.c, .StringLiteral, buf);
|
|
||||||
const node = try rp.c.a().create(ast.Node.StringLiteral);
|
const node = try rp.c.a().create(ast.Node.StringLiteral);
|
||||||
node.* = ast.Node.StringLiteral{
|
node.* = ast.Node.StringLiteral{
|
||||||
.token = token,
|
.token = token,
|
||||||
|
@ -411,6 +411,15 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
|
|||||||
\\}
|
\\}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
cases.add_2("macro escape sequences",
|
||||||
|
\\#define FOO "aoeu\xab derp"
|
||||||
|
\\#define FOO2 "aoeu\a derp"
|
||||||
|
, &[_][]const u8{
|
||||||
|
\\pub const FOO = "aoeu\xab derp";
|
||||||
|
,
|
||||||
|
\\pub const FOO2 = "aoeu\x07 derp";
|
||||||
|
});
|
||||||
|
|
||||||
/////////////// Cases for only stage1 which are TODO items for stage2 ////////////////
|
/////////////// Cases for only stage1 which are TODO items for stage2 ////////////////
|
||||||
|
|
||||||
cases.add_both("typedef of function in struct field",
|
cases.add_both("typedef of function in struct field",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user