687 lines
22 KiB
Zig
687 lines
22 KiB
Zig
const std = @import("std");
|
|
const expect = std.testing.expect;
|
|
|
|
pub const Source = struct {
|
|
buffer: []const u8,
|
|
file_name: []const u8,
|
|
};
|
|
|
|
pub const Token = struct {
|
|
id: Id,
|
|
num_suffix: NumSuffix = .None,
|
|
start: usize,
|
|
end: usize,
|
|
source: *Source,
|
|
|
|
pub const Id = enum {
|
|
Invalid,
|
|
Eof,
|
|
Nl,
|
|
Identifier,
|
|
StringLiteral,
|
|
CharLiteral,
|
|
IntegerLiteral,
|
|
FloatLiteral,
|
|
Bang,
|
|
BangEqual,
|
|
Pipe,
|
|
PipePipe,
|
|
PipeEqual,
|
|
Equal,
|
|
EqualEqual,
|
|
EqualAngleBracketRight,
|
|
LParen,
|
|
RParen,
|
|
LBrace,
|
|
RBrace,
|
|
LBracket,
|
|
RBracket,
|
|
Period,
|
|
PeriodAsterisk,
|
|
Ellipsis,
|
|
Caret,
|
|
CaretEqual,
|
|
Plus,
|
|
PlusPlus,
|
|
PlusEqual,
|
|
Minus,
|
|
MinusMinus,
|
|
MinusEqual,
|
|
Asterisk,
|
|
AsteriskEqual,
|
|
Percent,
|
|
PercentEqual,
|
|
Arrow,
|
|
Colon,
|
|
Semicolon,
|
|
Slash,
|
|
SlashEqual,
|
|
Comma,
|
|
Ampersand,
|
|
AmpersandAmpersand,
|
|
AmpersandEqual,
|
|
QuestionMark,
|
|
AngleBracketLeft,
|
|
AngleBracketLeftEqual,
|
|
AngleBracketAngleBracketLeft,
|
|
AngleBracketAngleBracketLeftEqual,
|
|
AngleBracketRight,
|
|
AngleBracketRightEqual,
|
|
AngleBracketAngleBracketRight,
|
|
AngleBracketAngleBracketRightEqual,
|
|
Tilde,
|
|
LineComment,
|
|
MultiLineComment,
|
|
Hash,
|
|
HashHash,
|
|
};
|
|
|
|
pub const NumSuffix = enum {
|
|
None,
|
|
F,
|
|
L,
|
|
U,
|
|
LU,
|
|
LL,
|
|
LLU,
|
|
};
|
|
};
|
|
|
|
pub const Tokenizer = struct {
|
|
source: *Source,
|
|
index: usize = 0,
|
|
|
|
pub fn next(self: *Tokenizer) Token {
|
|
const start_index = self.index;
|
|
var result = Token{
|
|
.id = .Eof,
|
|
.start = self.index,
|
|
.end = undefined,
|
|
.source = self.source,
|
|
};
|
|
var state: enum {
|
|
Start,
|
|
Cr,
|
|
StringLiteral,
|
|
CharLiteral,
|
|
Identifier,
|
|
Equal,
|
|
Bang,
|
|
Pipe,
|
|
Percent,
|
|
Asterisk,
|
|
Plus,
|
|
AngleBracketLeft,
|
|
AngleBracketAngleBracketLeft,
|
|
AngleBracketRight,
|
|
AngleBracketAngleBracketRight,
|
|
Caret,
|
|
Period,
|
|
Minus,
|
|
Slash,
|
|
Ampersand,
|
|
Zero,
|
|
IntegerLiteralOct,
|
|
IntegerLiteralBinary,
|
|
IntegerLiteralHex,
|
|
IntegerLiteral,
|
|
IntegerSuffix,
|
|
IntegerSuffixU,
|
|
IntegerSuffixL,
|
|
IntegerSuffixLL,
|
|
IntegerSuffixUL,
|
|
} = .Start;
|
|
while (self.index < self.source.buffer.len) : (self.index += 1) {
|
|
const c = self.source.buffer[self.index];
|
|
switch (state) {
|
|
.Start => switch (c) {
|
|
'\n' => {
|
|
result.id = .Nl;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'\r' => {
|
|
state = .Cr;
|
|
},
|
|
' ', '\t' => {
|
|
result.start = self.index + 1;
|
|
},
|
|
'"' => {
|
|
state = .StringLiteral;
|
|
result.id = .StringLiteral;
|
|
},
|
|
'\'' => {
|
|
state = .CharLiteral;
|
|
},
|
|
'a'...'z', 'A'...'Z', '_' => {
|
|
state = .Identifier;
|
|
result.id = .Identifier;
|
|
},
|
|
'=' => {
|
|
state = .Equal;
|
|
},
|
|
'!' => {
|
|
state = .Bang;
|
|
},
|
|
'|' => {
|
|
state = .Pipe;
|
|
},
|
|
'(' => {
|
|
result.id = .LParen;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
')' => {
|
|
result.id = .RParen;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'[' => {
|
|
result.id = .LBracket;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
']' => {
|
|
result.id = .RBracket;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
';' => {
|
|
result.id = .Semicolon;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
',' => {
|
|
result.id = .Comma;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'?' => {
|
|
result.id = .QuestionMark;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
':' => {
|
|
result.id = .Colon;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'%' => {
|
|
state = .Percent;
|
|
},
|
|
'*' => {
|
|
state = .Asterisk;
|
|
},
|
|
'+' => {
|
|
state = .Plus;
|
|
},
|
|
'<' => {
|
|
state = .AngleBracketLeft;
|
|
},
|
|
'>' => {
|
|
state = .AngleBracketRight;
|
|
},
|
|
'^' => {
|
|
state = .Caret;
|
|
},
|
|
'{' => {
|
|
result.id = .LBrace;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'}' => {
|
|
result.id = .RBrace;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'~' => {
|
|
result.id = .Tilde;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'.' => {
|
|
state = .Period;
|
|
},
|
|
'-' => {
|
|
state = .Minus;
|
|
},
|
|
'/' => {
|
|
state = .Slash;
|
|
},
|
|
'&' => {
|
|
state = .Ampersand;
|
|
},
|
|
'0' => {
|
|
state = .Zero;
|
|
result.id = .IntegerLiteral;
|
|
},
|
|
'1'...'9' => {
|
|
state = .IntegerLiteral;
|
|
result.id = .IntegerLiteral;
|
|
},
|
|
else => {
|
|
result.id = .Invalid;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
},
|
|
.Cr => switch (c) {
|
|
'\n' => {
|
|
result.id = .Nl;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Invalid;
|
|
break;
|
|
},
|
|
},
|
|
.Identifier => switch (c) {
|
|
'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
|
|
else => {
|
|
result.id = .Identifier;
|
|
break;
|
|
},
|
|
},
|
|
.Equal => switch (c) {
|
|
'=' => {
|
|
result.id = .EqualEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Equal;
|
|
break;
|
|
},
|
|
},
|
|
.Bang => switch (c) {
|
|
'=' => {
|
|
result.id = .BangEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Bang;
|
|
break;
|
|
},
|
|
},
|
|
.Pipe => switch (c) {
|
|
'=' => {
|
|
result.id = .PipeEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'|' => {
|
|
result.id = .PipePipe;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Pipe;
|
|
break;
|
|
},
|
|
},
|
|
.Percent => switch (c) {
|
|
'=' => {
|
|
result.id = .PercentEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Id.Percent;
|
|
break;
|
|
},
|
|
},
|
|
.Asterisk => switch (c) {
|
|
'=' => {
|
|
result.id = .AsteriskEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Asterisk;
|
|
break;
|
|
},
|
|
},
|
|
.Plus => switch (c) {
|
|
'=' => {
|
|
result.id = .PlusEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'+' => {
|
|
result.id = .PlusPlus;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Plus;
|
|
break;
|
|
},
|
|
},
|
|
.AngleBracketLeft => switch (c) {
|
|
'<' => {
|
|
state = .AngleBracketAngleBracketLeft;
|
|
},
|
|
'=' => {
|
|
result.id = .AngleBracketLeftEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .AngleBracketLeft;
|
|
break;
|
|
},
|
|
},
|
|
.AngleBracketAngleBracketLeft => switch (c) {
|
|
'=' => {
|
|
result.id = .AngleBracketAngleBracketLeftEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .AngleBracketAngleBracketLeft;
|
|
break;
|
|
},
|
|
},
|
|
.AngleBracketRight => switch (c) {
|
|
'>' => {
|
|
state = .AngleBracketAngleBracketRight;
|
|
},
|
|
'=' => {
|
|
result.id = .AngleBracketRightEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .AngleBracketRight;
|
|
break;
|
|
},
|
|
},
|
|
.AngleBracketAngleBracketRight => switch (c) {
|
|
'=' => {
|
|
result.id = .AngleBracketAngleBracketRightEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .AngleBracketAngleBracketRight;
|
|
break;
|
|
},
|
|
},
|
|
.Caret => switch (c) {
|
|
'=' => {
|
|
result.id = .CaretEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Caret;
|
|
break;
|
|
},
|
|
},
|
|
.Period => switch (c) {
|
|
'.' => {
|
|
state = .Period2;
|
|
},
|
|
'0'...'9' => {
|
|
state = .FloatFraction;
|
|
},
|
|
else => {
|
|
result.id = .Period;
|
|
break;
|
|
},
|
|
},
|
|
.Period2 => switch (c) {
|
|
'.' => {
|
|
result.id = .Ellipsis;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Period;
|
|
self.index -= 1;
|
|
break;
|
|
},
|
|
},
|
|
.Minus => switch (c) {
|
|
'>' => {
|
|
result.id = .Arrow;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'=' => {
|
|
result.id = .MinusEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'-' => {
|
|
result.id = .MinusMinus;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Minus;
|
|
break;
|
|
},
|
|
},
|
|
.Slash => switch (c) {
|
|
'/' => {
|
|
state = .LineComment;
|
|
result.id = .LineComment;
|
|
},
|
|
'=' => {
|
|
result.id = .SlashEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Slash;
|
|
break;
|
|
},
|
|
},
|
|
.Ampersand => switch (c) {
|
|
'&' => {
|
|
result.id = .AmpersandAmpersand;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
'=' => {
|
|
result.id = .AmpersandEqual;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .Ampersand;
|
|
break;
|
|
},
|
|
},
|
|
.Zero => switch (c) {
|
|
'0'...'9' => {
|
|
state = .IntegerLiteralOct;
|
|
},
|
|
'b', 'B' => {
|
|
state = .IntegerLiteralBinary;
|
|
},
|
|
'x', 'X' => {
|
|
state = .IntegerLiteralHex;
|
|
},
|
|
else => {
|
|
state = .IntegerSuffix;
|
|
self.index -= 1;
|
|
},
|
|
},
|
|
.IntegerLiteralOct => switch (c) {
|
|
'0'...'7' => {},
|
|
else => {
|
|
state = .IntegerSuffix;
|
|
self.index -= 1;
|
|
},
|
|
},
|
|
.IntegerLiteralBinary => switch (c) {
|
|
'0', '1' => {},
|
|
else => {
|
|
state = .IntegerSuffix;
|
|
self.index -= 1;
|
|
},
|
|
},
|
|
.IntegerLiteralHex => switch (c) {
|
|
'0'...'9', 'a'...'f', 'A'...'F' => {},
|
|
'.' => {
|
|
state = .FloatFractionHex;
|
|
},
|
|
'p', 'P' => {
|
|
state = .FloatExponentUnsignedHex;
|
|
},
|
|
else => {
|
|
state = .IntegerSuffix;
|
|
self.index -= 1;
|
|
},
|
|
},
|
|
.IntegerLiteral => switch (c) {
|
|
'0'...'9' => {},
|
|
'.' => {
|
|
state = .FloatFraction;
|
|
},
|
|
'e', 'E' => {
|
|
state = .FloatExponentUnsigned;
|
|
},
|
|
else => {
|
|
state = .IntegerSuffix;
|
|
self.index -= 1;
|
|
},
|
|
},
|
|
.IntegerSuffix => switch (c) {
|
|
'u', 'U' => {
|
|
state = .IntegerSuffixU;
|
|
},
|
|
'l', 'L' => {
|
|
state = .IntegerSuffixL;
|
|
},
|
|
else => {
|
|
result.id = .IntegerLiteral;
|
|
break;
|
|
},
|
|
},
|
|
.IntegerSuffixU => switch (c) {
|
|
'l', 'L' => {
|
|
state = .IntegerSuffixUL;
|
|
},
|
|
else => {
|
|
result.id = .IntegerLiteral;
|
|
result.num_suffix = .U;
|
|
break;
|
|
},
|
|
},
|
|
.IntegerSuffixL => switch (c) {
|
|
'l', 'L' => {
|
|
state = .IntegerSuffixLL;
|
|
},
|
|
'u', 'U' => {
|
|
result.id = .IntegerLiteral;
|
|
result.num_suffix = .LU;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .IntegerLiteral;
|
|
result.num_suffix = .L;
|
|
break;
|
|
},
|
|
},
|
|
.IntegerSuffixLL => switch (c) {
|
|
'u', 'U' => {
|
|
result.id = .IntegerLiteral;
|
|
result.num_suffix = .LLU;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .IntegerLiteral;
|
|
result.num_suffix = .LL;
|
|
break;
|
|
},
|
|
},
|
|
.IntegerSuffixUL => switch (c) {
|
|
'l', 'L' => {
|
|
result.id = .IntegerLiteral;
|
|
result.num_suffix = .LLU;
|
|
self.index += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
result.id = .IntegerLiteral;
|
|
result.num_suffix = .LU;
|
|
break;
|
|
},
|
|
},
|
|
}
|
|
} else if (self.index == self.source.buffer.len) {
|
|
switch (state) {
|
|
.Identifier => {
|
|
result.id = .Identifier;
|
|
},
|
|
.IntegerLiteralOct,
|
|
.IntegerLiteralBinary,
|
|
.IntegerLiteralHex,
|
|
.IntegerLiteral,
|
|
.IntegerSuffix,
|
|
.Zero => result.id = .IntegerLiteral,
|
|
.IntegerSuffixU => {
|
|
result.id = .IntegerLiteral;
|
|
result.num_suffix = .U;
|
|
},
|
|
.IntegerSuffixL => {
|
|
result.id = .IntegerLiteral;
|
|
result.num_suffix = .L;
|
|
},
|
|
.IntegerSuffixLL => {
|
|
result.id = .IntegerLiteral;
|
|
result.num_suffix = .LL;
|
|
},
|
|
.IntegerSuffixUL => {
|
|
result.id = .IntegerLiteral;
|
|
result.num_suffix = .Ul;
|
|
},
|
|
|
|
.Equal => result.id = .Equal,
|
|
.Bang => result.id = .Bang,
|
|
.Minus => result.id = .Minus,
|
|
.Slash => result.id = .Slash,
|
|
.Ampersand => result.id = .Ampersand,
|
|
.Period => result.id = .Period,
|
|
.Period2 => result.id = .Invalid,
|
|
.Pipe => result.id = .Pipe,
|
|
.AngleBracketAngleBracketRight => result.id = .AngleBracketAngleBracketRight,
|
|
.AngleBracketRight => result.id = .AngleBracketRight,
|
|
.AngleBracketAngleBracketLeft => result.id = .AngleBracketAngleBracketLeft,
|
|
.AngleBracketLeft => result.id = .AngleBracketLeft,
|
|
.Plus => result.id = .Plus,
|
|
.Percent => result.id = .Percent,
|
|
.Caret => result.id = .Caret,
|
|
.Asterisk => result.id = .Asterisk,
|
|
}
|
|
}
|
|
|
|
result.end = self.index;
|
|
return result;
|
|
}
|
|
};
|
|
|
|
fn expectTokens(source: []const u8, expected_tokens: []const Token.Id) void {
|
|
var tokenizer = Tokenizer{
|
|
.source = .{
|
|
.buffer = source,
|
|
.file_name = undefined,
|
|
},
|
|
};
|
|
for (expected_tokens) |expected_token_id| {
|
|
const token = tokenizer.next();
|
|
if (token.id != expected_token_id) {
|
|
std.debug.panic("expected {}, found {}\n", .{ @tagName(expected_token_id), @tagName(token.id) });
|
|
}
|
|
}
|
|
const last_token = tokenizer.next();
|
|
std.testing.expect(last_token.id == .Eof);
|
|
}
|