1288 lines
40 KiB
Zig
1288 lines
40 KiB
Zig
const std = @import("std");
|
||
const mem = std.mem;
|
||
|
||
pub const Source = struct {
|
||
buffer: []const u8,
|
||
file_name: []const u8,
|
||
};
|
||
|
||
pub const Token = struct {
|
||
id: Id,
|
||
start: usize,
|
||
end: usize,
|
||
source: *Source,
|
||
|
||
pub const Id = union(enum) {
|
||
Invalid,
|
||
Eof,
|
||
Nl,
|
||
Identifier,
|
||
|
||
/// special case for #include <...>
|
||
MacroString,
|
||
StringLiteral: StrKind,
|
||
CharLiteral: StrKind,
|
||
IntegerLiteral: NumSuffix,
|
||
FloatLiteral: NumSuffix,
|
||
Bang,
|
||
BangEqual,
|
||
Pipe,
|
||
PipePipe,
|
||
PipeEqual,
|
||
Equal,
|
||
EqualEqual,
|
||
LParen,
|
||
RParen,
|
||
LBrace,
|
||
RBrace,
|
||
LBracket,
|
||
RBracket,
|
||
Period,
|
||
Ellipsis,
|
||
Caret,
|
||
CaretEqual,
|
||
Plus,
|
||
PlusPlus,
|
||
PlusEqual,
|
||
Minus,
|
||
MinusMinus,
|
||
MinusEqual,
|
||
Asterisk,
|
||
AsteriskEqual,
|
||
Percent,
|
||
PercentEqual,
|
||
Arrow,
|
||
Colon,
|
||
Semicolon,
|
||
Slash,
|
||
SlashEqual,
|
||
Comma,
|
||
Ampersand,
|
||
AmpersandAmpersand,
|
||
AmpersandEqual,
|
||
QuestionMark,
|
||
AngleBracketLeft,
|
||
AngleBracketLeftEqual,
|
||
AngleBracketAngleBracketLeft,
|
||
AngleBracketAngleBracketLeftEqual,
|
||
AngleBracketRight,
|
||
AngleBracketRightEqual,
|
||
AngleBracketAngleBracketRight,
|
||
AngleBracketAngleBracketRightEqual,
|
||
Tilde,
|
||
LineComment,
|
||
MultiLineComment,
|
||
Hash,
|
||
HashHash,
|
||
|
||
Keyword_auto,
|
||
Keyword_break,
|
||
Keyword_case,
|
||
Keyword_char,
|
||
Keyword_const,
|
||
Keyword_continue,
|
||
Keyword_default,
|
||
Keyword_do,
|
||
Keyword_double,
|
||
Keyword_else,
|
||
Keyword_enum,
|
||
Keyword_extern,
|
||
Keyword_float,
|
||
Keyword_for,
|
||
Keyword_goto,
|
||
Keyword_if,
|
||
Keyword_int,
|
||
Keyword_long,
|
||
Keyword_register,
|
||
Keyword_return,
|
||
Keyword_short,
|
||
Keyword_signed,
|
||
Keyword_sizeof,
|
||
Keyword_static,
|
||
Keyword_struct,
|
||
Keyword_switch,
|
||
Keyword_typedef,
|
||
Keyword_union,
|
||
Keyword_unsigned,
|
||
Keyword_void,
|
||
Keyword_volatile,
|
||
Keyword_while,
|
||
|
||
// ISO C99
|
||
Keyword_bool,
|
||
Keyword_complex,
|
||
Keyword_imaginary,
|
||
Keyword_inline,
|
||
Keyword_restrict,
|
||
|
||
// ISO C11
|
||
Keyword_alignas,
|
||
Keyword_alignof,
|
||
Keyword_atomic,
|
||
Keyword_generic,
|
||
Keyword_noreturn,
|
||
Keyword_static_assert,
|
||
Keyword_thread_local,
|
||
|
||
// Preprocessor directives
|
||
Keyword_include,
|
||
Keyword_define,
|
||
Keyword_ifdef,
|
||
Keyword_ifndef,
|
||
Keyword_error,
|
||
Keyword_pragma,
|
||
};
|
||
|
||
pub const Keyword = struct {
|
||
bytes: []const u8,
|
||
id: Id,
|
||
hash: u32,
|
||
|
||
fn init(bytes: []const u8, id: Id) Keyword {
|
||
@setEvalBranchQuota(2000);
|
||
return .{
|
||
.bytes = bytes,
|
||
.id = id,
|
||
.hash = std.hash_map.hashString(bytes),
|
||
};
|
||
}
|
||
};
|
||
|
||
// TODO extensions
|
||
pub const keywords = [_]Keyword{
|
||
Keyword.init("auto", .Keyword_auto),
|
||
Keyword.init("break", .Keyword_break),
|
||
Keyword.init("case", .Keyword_case),
|
||
Keyword.init("char", .Keyword_char),
|
||
Keyword.init("const", .Keyword_const),
|
||
Keyword.init("continue", .Keyword_continue),
|
||
Keyword.init("default", .Keyword_default),
|
||
Keyword.init("do", .Keyword_do),
|
||
Keyword.init("double", .Keyword_double),
|
||
Keyword.init("else", .Keyword_else),
|
||
Keyword.init("enum", .Keyword_enum),
|
||
Keyword.init("extern", .Keyword_extern),
|
||
Keyword.init("float", .Keyword_float),
|
||
Keyword.init("for", .Keyword_for),
|
||
Keyword.init("goto", .Keyword_goto),
|
||
Keyword.init("if", .Keyword_if),
|
||
Keyword.init("int", .Keyword_int),
|
||
Keyword.init("long", .Keyword_long),
|
||
Keyword.init("register", .Keyword_register),
|
||
Keyword.init("return", .Keyword_return),
|
||
Keyword.init("short", .Keyword_short),
|
||
Keyword.init("signed", .Keyword_signed),
|
||
Keyword.init("sizeof", .Keyword_sizeof),
|
||
Keyword.init("static", .Keyword_static),
|
||
Keyword.init("struct", .Keyword_struct),
|
||
Keyword.init("switch", .Keyword_switch),
|
||
Keyword.init("typedef", .Keyword_typedef),
|
||
Keyword.init("union", .Keyword_union),
|
||
Keyword.init("unsigned", .Keyword_unsigned),
|
||
Keyword.init("void", .Keyword_void),
|
||
Keyword.init("volatile", .Keyword_volatile),
|
||
Keyword.init("while", .Keyword_while),
|
||
|
||
// ISO C99
|
||
Keyword.init("_Bool", .Keyword_bool),
|
||
Keyword.init("_Complex", .Keyword_complex),
|
||
Keyword.init("_Imaginary", .Keyword_imaginary),
|
||
Keyword.init("inline", .Keyword_inline),
|
||
Keyword.init("restrict", .Keyword_restrict),
|
||
|
||
// ISO C11
|
||
Keyword.init("_Alignas", .Keyword_alignas),
|
||
Keyword.init("_Alignof", .Keyword_alignof),
|
||
Keyword.init("_Atomic", .Keyword_atomic),
|
||
Keyword.init("_Generic", .Keyword_generic),
|
||
Keyword.init("_Noreturn", .Keyword_noreturn),
|
||
Keyword.init("_Static_assert", .Keyword_static_assert),
|
||
Keyword.init("_Thread_local", .Keyword_thread_local),
|
||
|
||
// Preprocessor directives
|
||
Keyword.init("include", .Keyword_include),
|
||
Keyword.init("define", .Keyword_define),
|
||
Keyword.init("ifdef", .Keyword_ifdef),
|
||
Keyword.init("ifndef", .Keyword_ifndef),
|
||
Keyword.init("error", .Keyword_error),
|
||
Keyword.init("pragma", .Keyword_pragma),
|
||
};
|
||
|
||
// TODO perfect hash at comptime
|
||
pub fn getKeyword(bytes: []const u8, pp_directive: bool) ?Id {
|
||
var hash = std.hash_map.hashString(bytes);
|
||
for (keywords) |kw| {
|
||
if (kw.hash == hash and mem.eql(u8, kw.bytes, bytes)) {
|
||
switch (kw.id) {
|
||
.Keyword_include,
|
||
.Keyword_define,
|
||
.Keyword_ifdef,
|
||
.Keyword_ifndef,
|
||
.Keyword_error,
|
||
.Keyword_pragma,
|
||
=> if (!pp_directive) return null,
|
||
else => {},
|
||
}
|
||
return kw.id;
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
|
||
pub const NumSuffix = enum {
|
||
None,
|
||
F,
|
||
L,
|
||
U,
|
||
LU,
|
||
LL,
|
||
LLU,
|
||
};
|
||
|
||
pub const StrKind = enum {
|
||
None,
|
||
Wide,
|
||
Utf8,
|
||
Utf16,
|
||
Utf32,
|
||
};
|
||
};
|
||
|
||
pub const Tokenizer = struct {
|
||
source: *Source,
|
||
index: usize = 0,
|
||
prev_tok_id: @TagType(Token.Id) = .Invalid,
|
||
pp_directive: bool = false,
|
||
|
||
pub fn next(self: *Tokenizer) Token {
|
||
const start_index = self.index;
|
||
var result = Token{
|
||
.id = .Eof,
|
||
.start = self.index,
|
||
.end = undefined,
|
||
.source = self.source,
|
||
};
|
||
var state: enum {
|
||
Start,
|
||
Cr,
|
||
u,
|
||
u8,
|
||
U,
|
||
L,
|
||
StringLiteral,
|
||
CharLiteral,
|
||
EscapeSequence,
|
||
OctalEscape,
|
||
HexEscape,
|
||
UnicodeEscape,
|
||
Identifier,
|
||
Equal,
|
||
Bang,
|
||
Pipe,
|
||
Percent,
|
||
Asterisk,
|
||
Plus,
|
||
|
||
/// special case for #include <...>
|
||
MacroString,
|
||
AngleBracketLeft,
|
||
AngleBracketAngleBracketLeft,
|
||
AngleBracketRight,
|
||
AngleBracketAngleBracketRight,
|
||
Caret,
|
||
Period,
|
||
Period2,
|
||
Minus,
|
||
Slash,
|
||
Ampersand,
|
||
Hash,
|
||
LineComment,
|
||
MultiLineComment,
|
||
MultiLineCommentAsterisk,
|
||
Zero,
|
||
IntegerLiteralOct,
|
||
IntegerLiteralBinary,
|
||
IntegerLiteralHex,
|
||
IntegerLiteral,
|
||
IntegerSuffix,
|
||
IntegerSuffixU,
|
||
IntegerSuffixL,
|
||
IntegerSuffixLL,
|
||
IntegerSuffixUL,
|
||
FloatFraction,
|
||
FloatFractionHex,
|
||
FloatExponent,
|
||
FloatExponentDigits,
|
||
FloatSuffix,
|
||
} = .Start;
|
||
var string = false;
|
||
var counter: u32 = 0;
|
||
while (self.index < self.source.buffer.len) : (self.index += 1) {
|
||
const c = self.source.buffer[self.index];
|
||
switch (state) {
|
||
.Start => switch (c) {
|
||
'\n' => {
|
||
if (!self.pp_directive) {
|
||
result.start = self.index + 1;
|
||
continue;
|
||
}
|
||
self.pp_directive = false;
|
||
result.id = .Nl;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'\r' => {
|
||
if (!self.pp_directive) {
|
||
result.start = self.index + 1;
|
||
continue;
|
||
}
|
||
state = .Cr;
|
||
},
|
||
'"' => {
|
||
result.id = .{ .StringLiteral = .None };
|
||
state = .StringLiteral;
|
||
},
|
||
'\'' => {
|
||
result.id = .{ .CharLiteral = .None };
|
||
state = .CharLiteral;
|
||
},
|
||
'u' => {
|
||
state = .u;
|
||
},
|
||
'U' => {
|
||
state = .U;
|
||
},
|
||
'L' => {
|
||
state = .L;
|
||
},
|
||
'a'...'t', 'v'...'z', 'A'...'K', 'M'...'T', 'V'...'Z', '_' => {
|
||
state = .Identifier;
|
||
},
|
||
'=' => {
|
||
state = .Equal;
|
||
},
|
||
'!' => {
|
||
state = .Bang;
|
||
},
|
||
'|' => {
|
||
state = .Pipe;
|
||
},
|
||
'(' => {
|
||
result.id = .LParen;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
')' => {
|
||
result.id = .RParen;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'[' => {
|
||
result.id = .LBracket;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
']' => {
|
||
result.id = .RBracket;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
';' => {
|
||
result.id = .Semicolon;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
',' => {
|
||
result.id = .Comma;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'?' => {
|
||
result.id = .QuestionMark;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
':' => {
|
||
result.id = .Colon;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'%' => {
|
||
state = .Percent;
|
||
},
|
||
'*' => {
|
||
state = .Asterisk;
|
||
},
|
||
'+' => {
|
||
state = .Plus;
|
||
},
|
||
'<' => {
|
||
if (self.prev_tok_id == .Keyword_include)
|
||
state = .MacroString
|
||
else
|
||
state = .AngleBracketLeft;
|
||
},
|
||
'>' => {
|
||
state = .AngleBracketRight;
|
||
},
|
||
'^' => {
|
||
state = .Caret;
|
||
},
|
||
'{' => {
|
||
result.id = .LBrace;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'}' => {
|
||
result.id = .RBrace;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'~' => {
|
||
result.id = .Tilde;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'.' => {
|
||
state = .Period;
|
||
},
|
||
'-' => {
|
||
state = .Minus;
|
||
},
|
||
'/' => {
|
||
state = .Slash;
|
||
},
|
||
'&' => {
|
||
state = .Ampersand;
|
||
},
|
||
'#' => {
|
||
state = .Hash;
|
||
},
|
||
'0' => {
|
||
state = .Zero;
|
||
},
|
||
'1'...'9' => {
|
||
state = .IntegerLiteral;
|
||
},
|
||
else => {
|
||
result.start = self.index + 1;
|
||
},
|
||
},
|
||
.Cr => switch (c) {
|
||
'\n' => {
|
||
self.pp_directive = false;
|
||
result.id = .Nl;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .Invalid;
|
||
break;
|
||
},
|
||
},
|
||
.u => switch (c) {
|
||
'8' => {
|
||
state = .u8;
|
||
},
|
||
'\'' => {
|
||
result.id = .{ .CharLiteral = .Utf16 };
|
||
state = .CharLiteral;
|
||
},
|
||
'\"' => {
|
||
result.id = .{ .StringLiteral = .Utf16 };
|
||
state = .StringLiteral;
|
||
},
|
||
else => {
|
||
state = .Identifier;
|
||
},
|
||
},
|
||
.u8 => switch (c) {
|
||
'\"' => {
|
||
result.id = .{ .StringLiteral = .Utf8 };
|
||
state = .StringLiteral;
|
||
},
|
||
else => {
|
||
state = .Identifier;
|
||
},
|
||
},
|
||
.U => switch (c) {
|
||
'\'' => {
|
||
result.id = .{ .CharLiteral = .Utf32 };
|
||
state = .CharLiteral;
|
||
},
|
||
'\"' => {
|
||
result.id = .{ .StringLiteral = .Utf32 };
|
||
state = .StringLiteral;
|
||
},
|
||
else => {
|
||
state = .Identifier;
|
||
},
|
||
},
|
||
.L => switch (c) {
|
||
'\'' => {
|
||
result.id = .{ .CharLiteral = .Wide };
|
||
state = .CharLiteral;
|
||
},
|
||
'\"' => {
|
||
result.id = .{ .StringLiteral = .Wide };
|
||
state = .StringLiteral;
|
||
},
|
||
else => {
|
||
state = .Identifier;
|
||
},
|
||
},
|
||
.StringLiteral => switch (c) {
|
||
'\\' => {
|
||
string = true;
|
||
state = .EscapeSequence;
|
||
},
|
||
'"' => {
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'\n', '\r' => {
|
||
result.id = .Invalid;
|
||
break;
|
||
},
|
||
else => {},
|
||
},
|
||
.CharLiteral => switch (c) {
|
||
'\\' => {
|
||
string = false;
|
||
state = .EscapeSequence;
|
||
},
|
||
'\'', '\n' => {
|
||
result.id = .Invalid;
|
||
break;
|
||
},
|
||
else => {},
|
||
},
|
||
.EscapeSequence => switch (c) {
|
||
'\'', '"', '?', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v' => {},
|
||
'0'...'7' => {
|
||
counter = 1;
|
||
state = .OctalEscape;
|
||
},
|
||
'x' => {
|
||
state = .HexEscape;
|
||
},
|
||
'u' => {
|
||
counter = 4;
|
||
state = .OctalEscape;
|
||
},
|
||
'U' => {
|
||
counter = 8;
|
||
state = .OctalEscape;
|
||
},
|
||
else => {
|
||
result.id = .Invalid;
|
||
break;
|
||
},
|
||
},
|
||
.OctalEscape => switch (c) {
|
||
'0'...'7' => {
|
||
counter += 1;
|
||
if (counter == 3) {
|
||
state = if (string) .StringLiteral else .CharLiteral;
|
||
}
|
||
},
|
||
else => {
|
||
state = if (string) .StringLiteral else .CharLiteral;
|
||
},
|
||
},
|
||
.HexEscape => switch (c) {
|
||
'0'...'9', 'a'...'f', 'A'...'F' => {},
|
||
else => {
|
||
state = if (string) .StringLiteral else .CharLiteral;
|
||
},
|
||
},
|
||
.UnicodeEscape => switch (c) {
|
||
'0'...'9', 'a'...'f', 'A'...'F' => {
|
||
counter -= 1;
|
||
if (counter == 0) {
|
||
state = if (string) .StringLiteral else .CharLiteral;
|
||
}
|
||
},
|
||
else => {
|
||
if (counter != 0) {
|
||
result.id = .Invalid;
|
||
break;
|
||
}
|
||
state = if (string) .StringLiteral else .CharLiteral;
|
||
},
|
||
},
|
||
.Identifier => switch (c) {
|
||
'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
|
||
else => {
|
||
result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash and !self.pp_directive) orelse .Identifier;
|
||
if (self.prev_tok_id == .Hash)
|
||
self.pp_directive = true;
|
||
break;
|
||
},
|
||
},
|
||
.Equal => switch (c) {
|
||
'=' => {
|
||
result.id = .EqualEqual;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .Equal;
|
||
break;
|
||
},
|
||
},
|
||
.Bang => switch (c) {
|
||
'=' => {
|
||
result.id = .BangEqual;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .Bang;
|
||
break;
|
||
},
|
||
},
|
||
.Pipe => switch (c) {
|
||
'=' => {
|
||
result.id = .PipeEqual;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'|' => {
|
||
result.id = .PipePipe;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .Pipe;
|
||
break;
|
||
},
|
||
},
|
||
.Percent => switch (c) {
|
||
'=' => {
|
||
result.id = .PercentEqual;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .Percent;
|
||
break;
|
||
},
|
||
},
|
||
.Asterisk => switch (c) {
|
||
'=' => {
|
||
result.id = .AsteriskEqual;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .Asterisk;
|
||
break;
|
||
},
|
||
},
|
||
.Plus => switch (c) {
|
||
'=' => {
|
||
result.id = .PlusEqual;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'+' => {
|
||
result.id = .PlusPlus;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .Plus;
|
||
break;
|
||
},
|
||
},
|
||
.MacroString => switch (c) {
|
||
'>' => {
|
||
result.id = .MacroString;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {},
|
||
},
|
||
.AngleBracketLeft => switch (c) {
|
||
'<' => {
|
||
state = .AngleBracketAngleBracketLeft;
|
||
},
|
||
'=' => {
|
||
result.id = .AngleBracketLeftEqual;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .AngleBracketLeft;
|
||
break;
|
||
},
|
||
},
|
||
.AngleBracketAngleBracketLeft => switch (c) {
|
||
'=' => {
|
||
result.id = .AngleBracketAngleBracketLeftEqual;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .AngleBracketAngleBracketLeft;
|
||
break;
|
||
},
|
||
},
|
||
.AngleBracketRight => switch (c) {
|
||
'>' => {
|
||
state = .AngleBracketAngleBracketRight;
|
||
},
|
||
'=' => {
|
||
result.id = .AngleBracketRightEqual;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .AngleBracketRight;
|
||
break;
|
||
},
|
||
},
|
||
.AngleBracketAngleBracketRight => switch (c) {
|
||
'=' => {
|
||
result.id = .AngleBracketAngleBracketRightEqual;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .AngleBracketAngleBracketRight;
|
||
break;
|
||
},
|
||
},
|
||
.Caret => switch (c) {
|
||
'=' => {
|
||
result.id = .CaretEqual;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .Caret;
|
||
break;
|
||
},
|
||
},
|
||
.Period => switch (c) {
|
||
'.' => {
|
||
state = .Period2;
|
||
},
|
||
'0'...'9' => {
|
||
state = .FloatFraction;
|
||
},
|
||
else => {
|
||
result.id = .Period;
|
||
break;
|
||
},
|
||
},
|
||
.Period2 => switch (c) {
|
||
'.' => {
|
||
result.id = .Ellipsis;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .Period;
|
||
self.index -= 1;
|
||
break;
|
||
},
|
||
},
|
||
.Minus => switch (c) {
|
||
'>' => {
|
||
result.id = .Arrow;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'=' => {
|
||
result.id = .MinusEqual;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'-' => {
|
||
result.id = .MinusMinus;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .Minus;
|
||
break;
|
||
},
|
||
},
|
||
.Slash => switch (c) {
|
||
'/' => {
|
||
state = .LineComment;
|
||
},
|
||
'*' => {
|
||
state = .MultiLineComment;
|
||
},
|
||
'=' => {
|
||
result.id = .SlashEqual;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .Slash;
|
||
break;
|
||
},
|
||
},
|
||
.Ampersand => switch (c) {
|
||
'&' => {
|
||
result.id = .AmpersandAmpersand;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'=' => {
|
||
result.id = .AmpersandEqual;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .Ampersand;
|
||
break;
|
||
},
|
||
},
|
||
.Hash => switch (c) {
|
||
'#' => {
|
||
result.id = .HashHash;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .Hash;
|
||
break;
|
||
},
|
||
},
|
||
.LineComment => switch (c) {
|
||
'\n' => {
|
||
result.id = .LineComment;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {},
|
||
},
|
||
.MultiLineComment => switch (c) {
|
||
'*' => {
|
||
state = .MultiLineCommentAsterisk;
|
||
},
|
||
else => {},
|
||
},
|
||
.MultiLineCommentAsterisk => switch (c) {
|
||
'/' => {
|
||
result.id = .MultiLineComment;
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
state = .MultiLineComment;
|
||
},
|
||
},
|
||
.Zero => switch (c) {
|
||
'0'...'9' => {
|
||
state = .IntegerLiteralOct;
|
||
},
|
||
'b', 'B' => {
|
||
state = .IntegerLiteralBinary;
|
||
},
|
||
'x', 'X' => {
|
||
state = .IntegerLiteralHex;
|
||
},
|
||
else => {
|
||
state = .IntegerSuffix;
|
||
self.index -= 1;
|
||
},
|
||
},
|
||
.IntegerLiteralOct => switch (c) {
|
||
'0'...'7' => {},
|
||
else => {
|
||
state = .IntegerSuffix;
|
||
self.index -= 1;
|
||
},
|
||
},
|
||
.IntegerLiteralBinary => switch (c) {
|
||
'0', '1' => {},
|
||
else => {
|
||
state = .IntegerSuffix;
|
||
self.index -= 1;
|
||
},
|
||
},
|
||
.IntegerLiteralHex => switch (c) {
|
||
'0'...'9', 'a'...'f', 'A'...'F' => {},
|
||
'.' => {
|
||
state = .FloatFractionHex;
|
||
},
|
||
'p', 'P' => {
|
||
state = .FloatExponent;
|
||
},
|
||
else => {
|
||
state = .IntegerSuffix;
|
||
self.index -= 1;
|
||
},
|
||
},
|
||
.IntegerLiteral => switch (c) {
|
||
'0'...'9' => {},
|
||
'.' => {
|
||
state = .FloatFraction;
|
||
},
|
||
'e', 'E' => {
|
||
state = .FloatExponent;
|
||
},
|
||
else => {
|
||
state = .IntegerSuffix;
|
||
self.index -= 1;
|
||
},
|
||
},
|
||
.IntegerSuffix => switch (c) {
|
||
'u', 'U' => {
|
||
state = .IntegerSuffixU;
|
||
},
|
||
'l', 'L' => {
|
||
state = .IntegerSuffixL;
|
||
},
|
||
else => {
|
||
result.id = .{ .IntegerLiteral = .None };
|
||
break;
|
||
},
|
||
},
|
||
.IntegerSuffixU => switch (c) {
|
||
'l', 'L' => {
|
||
state = .IntegerSuffixUL;
|
||
},
|
||
else => {
|
||
result.id = .{ .IntegerLiteral = .U };
|
||
break;
|
||
},
|
||
},
|
||
.IntegerSuffixL => switch (c) {
|
||
'l', 'L' => {
|
||
state = .IntegerSuffixLL;
|
||
},
|
||
'u', 'U' => {
|
||
result.id = .{ .IntegerLiteral = .LU };
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .{ .IntegerLiteral = .L };
|
||
break;
|
||
},
|
||
},
|
||
.IntegerSuffixLL => switch (c) {
|
||
'u', 'U' => {
|
||
result.id = .{ .IntegerLiteral = .LLU };
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .{ .IntegerLiteral = .LL };
|
||
break;
|
||
},
|
||
},
|
||
.IntegerSuffixUL => switch (c) {
|
||
'l', 'L' => {
|
||
result.id = .{ .IntegerLiteral = .LLU };
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .{ .IntegerLiteral = .LU };
|
||
break;
|
||
},
|
||
},
|
||
.FloatFraction => switch (c) {
|
||
'0'...'9' => {},
|
||
'e', 'E' => {
|
||
state = .FloatExponent;
|
||
},
|
||
else => {
|
||
self.index -= 1;
|
||
state = .FloatSuffix;
|
||
},
|
||
},
|
||
.FloatFractionHex => switch (c) {
|
||
'0'...'9', 'a'...'f', 'A'...'F' => {},
|
||
'p', 'P' => {
|
||
state = .FloatExponent;
|
||
},
|
||
else => {
|
||
result.id = .Invalid;
|
||
break;
|
||
},
|
||
},
|
||
.FloatExponent => switch (c) {
|
||
'+', '-' => {
|
||
state = .FloatExponentDigits;
|
||
},
|
||
else => {
|
||
self.index -= 1;
|
||
state = .FloatExponentDigits;
|
||
},
|
||
},
|
||
.FloatExponentDigits => switch (c) {
|
||
'0'...'9' => {
|
||
counter += 1;
|
||
},
|
||
else => {
|
||
if (counter == 0) {
|
||
result.id = .Invalid;
|
||
break;
|
||
}
|
||
state = .FloatSuffix;
|
||
},
|
||
},
|
||
.FloatSuffix => switch (c) {
|
||
'l', 'L' => {
|
||
result.id = .{ .FloatLiteral = .L };
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
'f', 'F' => {
|
||
result.id = .{ .FloatLiteral = .F };
|
||
self.index += 1;
|
||
break;
|
||
},
|
||
else => {
|
||
result.id = .{ .FloatLiteral = .None };
|
||
break;
|
||
},
|
||
},
|
||
}
|
||
} else if (self.index == self.source.buffer.len) {
|
||
switch (state) {
|
||
.Start => {},
|
||
.u, .u8, .U, .L, .Identifier => {
|
||
result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash and !self.pp_directive) orelse .Identifier;
|
||
},
|
||
|
||
.Cr,
|
||
.Period2,
|
||
.StringLiteral,
|
||
.CharLiteral,
|
||
.EscapeSequence,
|
||
.OctalEscape,
|
||
.HexEscape,
|
||
.UnicodeEscape,
|
||
.MultiLineComment,
|
||
.MultiLineCommentAsterisk,
|
||
.FloatFraction,
|
||
.FloatFractionHex,
|
||
.FloatExponent,
|
||
.FloatExponentDigits,
|
||
.MacroString,
|
||
=> result.id = .Invalid,
|
||
|
||
.IntegerLiteralOct,
|
||
.IntegerLiteralBinary,
|
||
.IntegerLiteralHex,
|
||
.IntegerLiteral,
|
||
.IntegerSuffix,
|
||
.Zero,
|
||
=> result.id = .{ .IntegerLiteral = .None },
|
||
.IntegerSuffixU => result.id = .{ .IntegerLiteral = .U },
|
||
.IntegerSuffixL => result.id = .{ .IntegerLiteral = .L },
|
||
.IntegerSuffixLL => result.id = .{ .IntegerLiteral = .LL },
|
||
.IntegerSuffixUL => result.id = .{ .IntegerLiteral = .LU },
|
||
|
||
.FloatSuffix => result.id = .{ .FloatLiteral = .None },
|
||
.Equal => result.id = .Equal,
|
||
.Bang => result.id = .Bang,
|
||
.Minus => result.id = .Minus,
|
||
.Slash => result.id = .Slash,
|
||
.Ampersand => result.id = .Ampersand,
|
||
.Hash => result.id = .Hash,
|
||
.Period => result.id = .Period,
|
||
.Pipe => result.id = .Pipe,
|
||
.AngleBracketAngleBracketRight => result.id = .AngleBracketAngleBracketRight,
|
||
.AngleBracketRight => result.id = .AngleBracketRight,
|
||
.AngleBracketAngleBracketLeft => result.id = .AngleBracketAngleBracketLeft,
|
||
.AngleBracketLeft => result.id = .AngleBracketLeft,
|
||
.Plus => result.id = .Plus,
|
||
.Percent => result.id = .Percent,
|
||
.Caret => result.id = .Caret,
|
||
.Asterisk => result.id = .Asterisk,
|
||
.LineComment => result.id = .LineComment,
|
||
}
|
||
}
|
||
|
||
self.prev_tok_id = result.id;
|
||
result.end = self.index;
|
||
return result;
|
||
}
|
||
};
|
||
|
||
test "operators" {
|
||
expectTokens(
|
||
\\ ! != | || |= = ==
|
||
\\ ( ) { } [ ] . .. ...
|
||
\\ ^ ^= + ++ += - -- -=
|
||
\\ * *= % %= -> : ; / /=
|
||
\\ , & && &= ? < <= <<
|
||
\\ <<= > >= >> >>= ~ # ##
|
||
\\
|
||
,
|
||
&[_]Token.Id{
|
||
.Bang,
|
||
.BangEqual,
|
||
.Pipe,
|
||
.PipePipe,
|
||
.PipeEqual,
|
||
.Equal,
|
||
.EqualEqual,
|
||
.LParen,
|
||
.RParen,
|
||
.LBrace,
|
||
.RBrace,
|
||
.LBracket,
|
||
.RBracket,
|
||
.Period,
|
||
.Period,
|
||
.Period,
|
||
.Ellipsis,
|
||
.Caret,
|
||
.CaretEqual,
|
||
.Plus,
|
||
.PlusPlus,
|
||
.PlusEqual,
|
||
.Minus,
|
||
.MinusMinus,
|
||
.MinusEqual,
|
||
.Asterisk,
|
||
.AsteriskEqual,
|
||
.Percent,
|
||
.PercentEqual,
|
||
.Arrow,
|
||
.Colon,
|
||
.Semicolon,
|
||
.Slash,
|
||
.SlashEqual,
|
||
.Comma,
|
||
.Ampersand,
|
||
.AmpersandAmpersand,
|
||
.AmpersandEqual,
|
||
.QuestionMark,
|
||
.AngleBracketLeft,
|
||
.AngleBracketLeftEqual,
|
||
.AngleBracketAngleBracketLeft,
|
||
.AngleBracketAngleBracketLeftEqual,
|
||
.AngleBracketRight,
|
||
.AngleBracketRightEqual,
|
||
.AngleBracketAngleBracketRight,
|
||
.AngleBracketAngleBracketRightEqual,
|
||
.Tilde,
|
||
.Hash,
|
||
.HashHash,
|
||
},
|
||
);
|
||
}
|
||
|
||
test "keywords" {
|
||
expectTokens(
|
||
\\auto break case char const continue default do
|
||
\\double else enum extern float for goto if int
|
||
\\long register return short signed sizeof static
|
||
\\struct switch typedef union unsigned void volatile
|
||
\\while _Bool _Complex _Imaginary inline restrict _Alignas
|
||
\\_Alignof _Atomic _Generic _Noreturn _Static_assert _Thread_local
|
||
\\
|
||
, &[_]Token.Id{
|
||
.Keyword_auto,
|
||
.Keyword_break,
|
||
.Keyword_case,
|
||
.Keyword_char,
|
||
.Keyword_const,
|
||
.Keyword_continue,
|
||
.Keyword_default,
|
||
.Keyword_do,
|
||
.Keyword_double,
|
||
.Keyword_else,
|
||
.Keyword_enum,
|
||
.Keyword_extern,
|
||
.Keyword_float,
|
||
.Keyword_for,
|
||
.Keyword_goto,
|
||
.Keyword_if,
|
||
.Keyword_int,
|
||
.Keyword_long,
|
||
.Keyword_register,
|
||
.Keyword_return,
|
||
.Keyword_short,
|
||
.Keyword_signed,
|
||
.Keyword_sizeof,
|
||
.Keyword_static,
|
||
.Keyword_struct,
|
||
.Keyword_switch,
|
||
.Keyword_typedef,
|
||
.Keyword_union,
|
||
.Keyword_unsigned,
|
||
.Keyword_void,
|
||
.Keyword_volatile,
|
||
.Keyword_while,
|
||
.Keyword_bool,
|
||
.Keyword_complex,
|
||
.Keyword_imaginary,
|
||
.Keyword_inline,
|
||
.Keyword_restrict,
|
||
.Keyword_alignas,
|
||
.Keyword_alignof,
|
||
.Keyword_atomic,
|
||
.Keyword_generic,
|
||
.Keyword_noreturn,
|
||
.Keyword_static_assert,
|
||
.Keyword_thread_local,
|
||
});
|
||
}
|
||
|
||
test "preprocessor keywords" {
|
||
expectTokens(
|
||
\\#include <test>
|
||
\\#define #include <1
|
||
\\#ifdef
|
||
\\#ifndef
|
||
\\#error
|
||
\\#pragma
|
||
\\
|
||
, &[_]Token.Id{
|
||
.Hash,
|
||
.Keyword_include,
|
||
.MacroString,
|
||
.Nl,
|
||
.Hash,
|
||
.Keyword_define,
|
||
.Hash,
|
||
.Identifier,
|
||
.AngleBracketLeft,
|
||
.{ .IntegerLiteral = .None },
|
||
.Nl,
|
||
.Hash,
|
||
.Keyword_ifdef,
|
||
.Nl,
|
||
.Hash,
|
||
.Keyword_ifndef,
|
||
.Nl,
|
||
.Hash,
|
||
.Keyword_error,
|
||
.Nl,
|
||
.Hash,
|
||
.Keyword_pragma,
|
||
.Nl,
|
||
});
|
||
}
|
||
|
||
fn expectTokens(source: []const u8, expected_tokens: []const Token.Id) void {
|
||
var tokenizer = Tokenizer{
|
||
.source = &Source{
|
||
.buffer = source,
|
||
.file_name = undefined,
|
||
},
|
||
};
|
||
for (expected_tokens) |expected_token_id| {
|
||
const token = tokenizer.next();
|
||
if (!std.meta.eql(token.id, expected_token_id)) {
|
||
std.debug.panic("expected {}, found {}\n", .{ @tagName(expected_token_id), @tagName(token.id) });
|
||
}
|
||
}
|
||
const last_token = tokenizer.next();
|
||
std.testing.expect(last_token.id == .Eof);
|
||
}
|