simple tokenization
parent
31d9dc3539
commit
798dbe487b
|
@ -3,18 +3,134 @@ const io = @import("std").io;
|
|||
const os = @import("std").os;
|
||||
const heap = @import("std").heap;
|
||||
const warn = @import("std").debug.warn;
|
||||
|
||||
const assert = @import("std").debug.assert;
|
||||
const mem = @import("std").mem;
|
||||
|
||||
const Token = struct {
|
||||
id: Id,
|
||||
start: usize,
|
||||
end: usize,
|
||||
|
||||
const Keyword = enum {
|
||||
@"align",
|
||||
@"and",
|
||||
@"asm",
|
||||
@"break",
|
||||
@"coldcc",
|
||||
@"comptime",
|
||||
@"const",
|
||||
@"continue",
|
||||
@"defer",
|
||||
@"else",
|
||||
@"enum",
|
||||
@"error",
|
||||
@"export",
|
||||
@"extern",
|
||||
@"false",
|
||||
@"fn",
|
||||
@"for",
|
||||
@"goto",
|
||||
@"if",
|
||||
@"inline",
|
||||
@"nakedcc",
|
||||
@"noalias",
|
||||
@"null",
|
||||
@"or",
|
||||
@"packed",
|
||||
@"pub",
|
||||
@"return",
|
||||
@"stdcallcc",
|
||||
@"struct",
|
||||
@"switch",
|
||||
@"test",
|
||||
@"this",
|
||||
@"true",
|
||||
@"undefined",
|
||||
@"union",
|
||||
@"unreachable",
|
||||
@"use",
|
||||
@"var",
|
||||
@"volatile",
|
||||
@"while",
|
||||
};
|
||||
|
||||
fn getKeyword(bytes: []const u8) -> ?Keyword {
|
||||
comptime var i = 0;
|
||||
inline while (i < @memberCount(Keyword)) : (i += 1) {
|
||||
if (mem.eql(u8, @memberName(Keyword, i), bytes)) {
|
||||
return Keyword(i);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
const Id = union(enum) {
|
||||
Invalid,
|
||||
Identifier,
|
||||
Keyword: Keyword,
|
||||
Eof,
|
||||
};
|
||||
};
|
||||
|
||||
const Tokenizer = struct {
|
||||
buffer: []const u8,
|
||||
index: usize,
|
||||
|
||||
pub fn next() -> Token {
|
||||
|
||||
pub fn dump(self: &Tokenizer, token: &const Token) {
|
||||
warn("{} \"{}\"\n", @tagName(token.id), self.buffer[token.start..token.end]);
|
||||
}
|
||||
|
||||
pub fn init(buffer: []const u8) -> Tokenizer {
|
||||
return Tokenizer {
|
||||
.buffer = buffer,
|
||||
.index = 0,
|
||||
};
|
||||
}
|
||||
|
||||
const State = enum {
|
||||
Start,
|
||||
Identifier,
|
||||
};
|
||||
|
||||
pub fn next(self: &Tokenizer) -> Token {
|
||||
var state = State.Start;
|
||||
var result = Token {
|
||||
.id = Token.Id { .Eof = {} },
|
||||
.start = self.index,
|
||||
.end = undefined,
|
||||
};
|
||||
while (self.index < self.buffer.len) : (self.index += 1) {
|
||||
const c = self.buffer[self.index];
|
||||
switch (state) {
|
||||
State.Start => switch (c) {
|
||||
' ', '\n' => {
|
||||
result.start = self.index + 1;
|
||||
},
|
||||
'a'...'z', 'A'...'Z', '_' => {
|
||||
state = State.Identifier;
|
||||
result.id = Token.Id { .Identifier = {} };
|
||||
},
|
||||
else => {
|
||||
result.id = Token.Id { .Invalid = {} };
|
||||
self.index += 1;
|
||||
break;
|
||||
},
|
||||
},
|
||||
State.Identifier => switch (c) {
|
||||
'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
|
||||
else => {
|
||||
if (Token.getKeyword(self.buffer[result.start..self.index])) |keyword_id| {
|
||||
result.id = Token.Id { .Keyword = keyword_id };
|
||||
}
|
||||
break;
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
result.end = self.index;
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
@ -36,4 +152,13 @@ pub fn main2() -> %void {
|
|||
const target_file_buf = %return io.readFileAlloc(target_file, allocator);
|
||||
|
||||
warn("{}", target_file_buf);
|
||||
|
||||
var tokenizer = Tokenizer.init(target_file_buf);
|
||||
while (true) {
|
||||
const token = tokenizer.next();
|
||||
tokenizer.dump(token);
|
||||
if (@TagType(Token.Id)(token.id) == Token.Id.Eof) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
45
src/ir.cpp
45
src/ir.cpp
|
@ -8422,16 +8422,6 @@ static IrInstruction *ir_analyze_int_to_enum(IrAnalyze *ira, IrInstruction *sour
|
|||
if (type_is_invalid(wanted_type))
|
||||
return ira->codegen->invalid_instruction;
|
||||
|
||||
if (actual_type != wanted_type->data.enumeration.tag_int_type) {
|
||||
ir_add_error(ira, source_instr,
|
||||
buf_sprintf("integer to enum cast from '%s' instead of its tag type, '%s'",
|
||||
buf_ptr(&actual_type->name),
|
||||
buf_ptr(&wanted_type->data.enumeration.tag_int_type->name)));
|
||||
return ira->codegen->invalid_instruction;
|
||||
}
|
||||
|
||||
assert(actual_type->id == TypeTableEntryIdInt);
|
||||
|
||||
if (instr_is_comptime(target)) {
|
||||
ConstExprValue *val = ir_resolve_const(ira, target, UndefBad);
|
||||
if (!val)
|
||||
|
@ -8453,6 +8443,17 @@ static IrInstruction *ir_analyze_int_to_enum(IrAnalyze *ira, IrInstruction *sour
|
|||
return result;
|
||||
}
|
||||
|
||||
if (actual_type != wanted_type->data.enumeration.tag_int_type) {
|
||||
ir_add_error(ira, source_instr,
|
||||
buf_sprintf("integer to enum cast from '%s' instead of its tag type, '%s'",
|
||||
buf_ptr(&actual_type->name),
|
||||
buf_ptr(&wanted_type->data.enumeration.tag_int_type->name)));
|
||||
return ira->codegen->invalid_instruction;
|
||||
}
|
||||
|
||||
assert(actual_type->id == TypeTableEntryIdInt);
|
||||
|
||||
|
||||
IrInstruction *result = ir_build_int_to_enum(&ira->new_irb, source_instr->scope,
|
||||
source_instr->source_node, target);
|
||||
result->value.type = wanted_type;
|
||||
|
@ -8822,6 +8823,20 @@ static IrInstruction *ir_analyze_cast(IrAnalyze *ira, IrInstruction *source_inst
|
|||
}
|
||||
}
|
||||
|
||||
// explicit cast from integer to enum type with no payload
|
||||
if ((actual_type->id == TypeTableEntryIdInt || actual_type->id == TypeTableEntryIdNumLitInt) &&
|
||||
wanted_type->id == TypeTableEntryIdEnum)
|
||||
{
|
||||
return ir_analyze_int_to_enum(ira, source_instr, value, wanted_type);
|
||||
}
|
||||
|
||||
// explicit cast from enum type with no payload to integer
|
||||
if ((wanted_type->id == TypeTableEntryIdInt || wanted_type->id == TypeTableEntryIdNumLitInt) &&
|
||||
actual_type->id == TypeTableEntryIdEnum)
|
||||
{
|
||||
return ir_analyze_enum_to_int(ira, source_instr, value, wanted_type);
|
||||
}
|
||||
|
||||
// explicit cast from number literal to another type
|
||||
// explicit cast from number literal to &const integer
|
||||
if (actual_type->id == TypeTableEntryIdNumLitFloat ||
|
||||
|
@ -8886,16 +8901,6 @@ static IrInstruction *ir_analyze_cast(IrAnalyze *ira, IrInstruction *source_inst
|
|||
return ir_analyze_int_to_err(ira, source_instr, value);
|
||||
}
|
||||
|
||||
// explicit cast from integer to enum type with no payload
|
||||
if (actual_type->id == TypeTableEntryIdInt && wanted_type->id == TypeTableEntryIdEnum) {
|
||||
return ir_analyze_int_to_enum(ira, source_instr, value, wanted_type);
|
||||
}
|
||||
|
||||
// explicit cast from enum type with no payload to integer
|
||||
if (wanted_type->id == TypeTableEntryIdInt && actual_type->id == TypeTableEntryIdEnum) {
|
||||
return ir_analyze_enum_to_int(ira, source_instr, value, wanted_type);
|
||||
}
|
||||
|
||||
// explicit cast from union to the enum type of the union
|
||||
if (actual_type->id == TypeTableEntryIdUnion && wanted_type->id == TypeTableEntryIdEnum) {
|
||||
type_ensure_zero_bits_known(ira->codegen, actual_type);
|
||||
|
|
Loading…
Reference in New Issue