std.zig.tokenizer: fix handling of line comment / doc comment

master
Andrew Kelley 2018-04-30 00:52:09 -04:00
parent fd2cd38bdb
commit 0bf7ebcfea
2 changed files with 91 additions and 25 deletions

View File

@ -124,6 +124,7 @@ test "zig fmt: comments before statements" {
\\ _ = @import("foo/bar.zig"); \\ _ = @import("foo/bar.zig");
\\ \\
\\ // middle \\ // middle
\\ // middle2
\\ \\
\\ // end \\ // end
\\} \\}

View File

@ -260,6 +260,7 @@ pub const Tokenizer = struct {
Slash, Slash,
LineCommentStart, LineCommentStart,
LineComment, LineComment,
DocComment,
Zero, Zero,
IntegerLiteral, IntegerLiteral,
IntegerLiteralWithRadix, IntegerLiteralWithRadix,
@ -825,6 +826,7 @@ pub const Tokenizer = struct {
State.Slash => switch (c) { State.Slash => switch (c) {
'/' => { '/' => {
state = State.LineCommentStart; state = State.LineCommentStart;
result.id = Token.Id.LineComment;
}, },
'=' => { '=' => {
result.id = Token.Id.SlashEqual; result.id = Token.Id.SlashEqual;
@ -839,15 +841,15 @@ pub const Tokenizer = struct {
State.LineCommentStart => switch (c) { State.LineCommentStart => switch (c) {
'/' => { '/' => {
result.id = Token.Id.DocComment; result.id = Token.Id.DocComment;
state = State.DocComment;
},
'\n' => break,
else => {
state = State.LineComment; state = State.LineComment;
self.checkLiteralCharacter();
}, },
'\n' => {
result.id = Token.Id.LineComment;
break;
}, },
else => self.checkLiteralCharacter(), State.LineComment, State.DocComment => switch (c) {
},
State.LineComment => switch (c) {
'\n' => break, '\n' => break,
else => self.checkLiteralCharacter(), else => self.checkLiteralCharacter(),
}, },
@ -934,7 +936,10 @@ pub const Tokenizer = struct {
}, },
State.LineCommentStart, State.LineCommentStart,
State.LineComment => { State.LineComment => {
result.id = Token.Id.Eof; result.id = Token.Id.LineComment;
},
State.DocComment => {
result.id = Token.Id.DocComment;
}, },
State.NumberDot, State.NumberDot,
@ -1105,41 +1110,77 @@ test "tokenizer - invalid literal/comment characters" {
Token.Id.Invalid, Token.Id.Invalid,
}); });
testTokenize("//\x00", []Token.Id { testTokenize("//\x00", []Token.Id {
Token.Id.LineComment,
Token.Id.Invalid, Token.Id.Invalid,
}); });
testTokenize("//\x1f", []Token.Id { testTokenize("//\x1f", []Token.Id {
Token.Id.LineComment,
Token.Id.Invalid, Token.Id.Invalid,
}); });
testTokenize("//\x7f", []Token.Id { testTokenize("//\x7f", []Token.Id {
Token.Id.LineComment,
Token.Id.Invalid, Token.Id.Invalid,
}); });
} }
test "tokenizer - utf8" { test "tokenizer - utf8" {
testTokenize("//\xc2\x80", []Token.Id{}); testTokenize("//\xc2\x80", []Token.Id{Token.Id.LineComment});
testTokenize("//\xf4\x8f\xbf\xbf", []Token.Id{}); testTokenize("//\xf4\x8f\xbf\xbf", []Token.Id{Token.Id.LineComment});
} }
test "tokenizer - invalid utf8" { test "tokenizer - invalid utf8" {
testTokenize("//\x80", []Token.Id{Token.Id.Invalid}); testTokenize("//\x80", []Token.Id{
testTokenize("//\xbf", []Token.Id{Token.Id.Invalid}); Token.Id.LineComment,
testTokenize("//\xf8", []Token.Id{Token.Id.Invalid}); Token.Id.Invalid,
testTokenize("//\xff", []Token.Id{Token.Id.Invalid}); });
testTokenize("//\xc2\xc0", []Token.Id{Token.Id.Invalid}); testTokenize("//\xbf", []Token.Id{
testTokenize("//\xe0", []Token.Id{Token.Id.Invalid}); Token.Id.LineComment,
testTokenize("//\xf0", []Token.Id{Token.Id.Invalid}); Token.Id.Invalid,
testTokenize("//\xf0\x90\x80\xc0", []Token.Id{Token.Id.Invalid}); });
testTokenize("//\xf8", []Token.Id{
Token.Id.LineComment,
Token.Id.Invalid,
});
testTokenize("//\xff", []Token.Id{
Token.Id.LineComment,
Token.Id.Invalid,
});
testTokenize("//\xc2\xc0", []Token.Id{
Token.Id.LineComment,
Token.Id.Invalid,
});
testTokenize("//\xe0", []Token.Id{
Token.Id.LineComment,
Token.Id.Invalid,
});
testTokenize("//\xf0", []Token.Id{
Token.Id.LineComment,
Token.Id.Invalid,
});
testTokenize("//\xf0\x90\x80\xc0", []Token.Id{
Token.Id.LineComment,
Token.Id.Invalid,
});
} }
test "tokenizer - illegal unicode codepoints" { test "tokenizer - illegal unicode codepoints" {
// unicode newline characters.U+0085, U+2028, U+2029 // unicode newline characters.U+0085, U+2028, U+2029
testTokenize("//\xc2\x84", []Token.Id{}); testTokenize("//\xc2\x84", []Token.Id{Token.Id.LineComment});
testTokenize("//\xc2\x85", []Token.Id{Token.Id.Invalid}); testTokenize("//\xc2\x85", []Token.Id{
testTokenize("//\xc2\x86", []Token.Id{}); Token.Id.LineComment,
testTokenize("//\xe2\x80\xa7", []Token.Id{}); Token.Id.Invalid,
testTokenize("//\xe2\x80\xa8", []Token.Id{Token.Id.Invalid}); });
testTokenize("//\xe2\x80\xa9", []Token.Id{Token.Id.Invalid}); testTokenize("//\xc2\x86", []Token.Id{Token.Id.LineComment});
testTokenize("//\xe2\x80\xaa", []Token.Id{}); testTokenize("//\xe2\x80\xa7", []Token.Id{Token.Id.LineComment});
testTokenize("//\xe2\x80\xa8", []Token.Id{
Token.Id.LineComment,
Token.Id.Invalid,
});
testTokenize("//\xe2\x80\xa9", []Token.Id{
Token.Id.LineComment,
Token.Id.Invalid,
});
testTokenize("//\xe2\x80\xaa", []Token.Id{Token.Id.LineComment});
} }
test "tokenizer - string identifier and builtin fns" { test "tokenizer - string identifier and builtin fns" {
@ -1166,11 +1207,35 @@ test "tokenizer - pipe and then invalid" {
}); });
} }
test "tokenizer - line comment and doc comment" {
testTokenize("//", []Token.Id{Token.Id.LineComment});
testTokenize("// a / b", []Token.Id{Token.Id.LineComment});
testTokenize("// /", []Token.Id{Token.Id.LineComment});
testTokenize("/// a", []Token.Id{Token.Id.DocComment});
testTokenize("///", []Token.Id{Token.Id.DocComment});
}
test "tokenizer - line comment followed by identifier" {
testTokenize(
\\ Unexpected,
\\ // another
\\ Another,
, []Token.Id{
Token.Id.Identifier,
Token.Id.Comma,
Token.Id.LineComment,
Token.Id.Identifier,
Token.Id.Comma,
});
}
fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void { fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
var tokenizer = Tokenizer.init(source); var tokenizer = Tokenizer.init(source);
for (expected_tokens) |expected_token_id| { for (expected_tokens) |expected_token_id| {
const token = tokenizer.next(); const token = tokenizer.next();
std.debug.assert(@TagType(Token.Id)(token.id) == @TagType(Token.Id)(expected_token_id)); if (@TagType(Token.Id)(token.id) != @TagType(Token.Id)(expected_token_id)) {
std.debug.panic("expected {}, found {}\n", @tagName(@TagType(Token.Id)(expected_token_id)), @tagName(@TagType(Token.Id)(token.id)));
}
switch (expected_token_id) { switch (expected_token_id) {
Token.Id.StringLiteral => |expected_kind| { Token.Id.StringLiteral => |expected_kind| {
std.debug.assert(expected_kind == switch (token.id) { Token.Id.StringLiteral => |kind| kind, else => unreachable }); std.debug.assert(expected_kind == switch (token.id) { Token.Id.StringLiteral => |kind| kind, else => unreachable });