simple tokenization

master
Andrew Kelley 2017-12-04 23:09:03 -05:00
parent 31d9dc3539
commit 798dbe487b
2 changed files with 153 additions and 23 deletions

View File

@ -3,18 +3,134 @@ const io = @import("std").io;
const os = @import("std").os;
const heap = @import("std").heap;
const warn = @import("std").debug.warn;
const assert = @import("std").debug.assert;
const mem = @import("std").mem;
const Token = struct {
id: Id,
start: usize,
end: usize,
const Keyword = enum {
@"align",
@"and",
@"asm",
@"break",
@"coldcc",
@"comptime",
@"const",
@"continue",
@"defer",
@"else",
@"enum",
@"error",
@"export",
@"extern",
@"false",
@"fn",
@"for",
@"goto",
@"if",
@"inline",
@"nakedcc",
@"noalias",
@"null",
@"or",
@"packed",
@"pub",
@"return",
@"stdcallcc",
@"struct",
@"switch",
@"test",
@"this",
@"true",
@"undefined",
@"union",
@"unreachable",
@"use",
@"var",
@"volatile",
@"while",
};
fn getKeyword(bytes: []const u8) -> ?Keyword {
comptime var i = 0;
inline while (i < @memberCount(Keyword)) : (i += 1) {
if (mem.eql(u8, @memberName(Keyword, i), bytes)) {
return Keyword(i);
}
}
return null;
}
const Id = union(enum) {
Invalid,
Identifier,
Keyword: Keyword,
Eof,
};
};
const Tokenizer = struct {
buffer: []const u8,
index: usize,
pub fn next() -> Token {
pub fn dump(self: &Tokenizer, token: &const Token) {
warn("{} \"{}\"\n", @tagName(token.id), self.buffer[token.start..token.end]);
}
pub fn init(buffer: []const u8) -> Tokenizer {
return Tokenizer {
.buffer = buffer,
.index = 0,
};
}
const State = enum {
Start,
Identifier,
};
pub fn next(self: &Tokenizer) -> Token {
var state = State.Start;
var result = Token {
.id = Token.Id { .Eof = {} },
.start = self.index,
.end = undefined,
};
while (self.index < self.buffer.len) : (self.index += 1) {
const c = self.buffer[self.index];
switch (state) {
State.Start => switch (c) {
' ', '\n' => {
result.start = self.index + 1;
},
'a'...'z', 'A'...'Z', '_' => {
state = State.Identifier;
result.id = Token.Id { .Identifier = {} };
},
else => {
result.id = Token.Id { .Invalid = {} };
self.index += 1;
break;
},
},
State.Identifier => switch (c) {
'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
else => {
if (Token.getKeyword(self.buffer[result.start..self.index])) |keyword_id| {
result.id = Token.Id { .Keyword = keyword_id };
}
break;
},
},
}
}
result.end = self.index;
return result;
}
};
@ -36,4 +152,13 @@ pub fn main2() -> %void {
const target_file_buf = %return io.readFileAlloc(target_file, allocator);
warn("{}", target_file_buf);
var tokenizer = Tokenizer.init(target_file_buf);
while (true) {
const token = tokenizer.next();
tokenizer.dump(token);
if (@TagType(Token.Id)(token.id) == Token.Id.Eof) {
break;
}
}
}

View File

@ -8422,16 +8422,6 @@ static IrInstruction *ir_analyze_int_to_enum(IrAnalyze *ira, IrInstruction *sour
if (type_is_invalid(wanted_type))
return ira->codegen->invalid_instruction;
if (actual_type != wanted_type->data.enumeration.tag_int_type) {
ir_add_error(ira, source_instr,
buf_sprintf("integer to enum cast from '%s' instead of its tag type, '%s'",
buf_ptr(&actual_type->name),
buf_ptr(&wanted_type->data.enumeration.tag_int_type->name)));
return ira->codegen->invalid_instruction;
}
assert(actual_type->id == TypeTableEntryIdInt);
if (instr_is_comptime(target)) {
ConstExprValue *val = ir_resolve_const(ira, target, UndefBad);
if (!val)
@ -8453,6 +8443,17 @@ static IrInstruction *ir_analyze_int_to_enum(IrAnalyze *ira, IrInstruction *sour
return result;
}
if (actual_type != wanted_type->data.enumeration.tag_int_type) {
ir_add_error(ira, source_instr,
buf_sprintf("integer to enum cast from '%s' instead of its tag type, '%s'",
buf_ptr(&actual_type->name),
buf_ptr(&wanted_type->data.enumeration.tag_int_type->name)));
return ira->codegen->invalid_instruction;
}
assert(actual_type->id == TypeTableEntryIdInt);
IrInstruction *result = ir_build_int_to_enum(&ira->new_irb, source_instr->scope,
source_instr->source_node, target);
result->value.type = wanted_type;
@ -8822,6 +8823,20 @@ static IrInstruction *ir_analyze_cast(IrAnalyze *ira, IrInstruction *source_inst
}
}
// explicit cast from integer to enum type with no payload
if ((actual_type->id == TypeTableEntryIdInt || actual_type->id == TypeTableEntryIdNumLitInt) &&
wanted_type->id == TypeTableEntryIdEnum)
{
return ir_analyze_int_to_enum(ira, source_instr, value, wanted_type);
}
// explicit cast from enum type with no payload to integer
if ((wanted_type->id == TypeTableEntryIdInt || wanted_type->id == TypeTableEntryIdNumLitInt) &&
actual_type->id == TypeTableEntryIdEnum)
{
return ir_analyze_enum_to_int(ira, source_instr, value, wanted_type);
}
// explicit cast from number literal to another type
// explicit cast from number literal to &const integer
if (actual_type->id == TypeTableEntryIdNumLitFloat ||
@ -8886,16 +8901,6 @@ static IrInstruction *ir_analyze_cast(IrAnalyze *ira, IrInstruction *source_inst
return ir_analyze_int_to_err(ira, source_instr, value);
}
// explicit cast from integer to enum type with no payload
if (actual_type->id == TypeTableEntryIdInt && wanted_type->id == TypeTableEntryIdEnum) {
return ir_analyze_int_to_enum(ira, source_instr, value, wanted_type);
}
// explicit cast from enum type with no payload to integer
if (wanted_type->id == TypeTableEntryIdInt && actual_type->id == TypeTableEntryIdEnum) {
return ir_analyze_enum_to_int(ira, source_instr, value, wanted_type);
}
// explicit cast from union to the enum type of the union
if (actual_type->id == TypeTableEntryIdUnion && wanted_type->id == TypeTableEntryIdEnum) {
type_ensure_zero_bits_known(ira->codegen, actual_type);