simple tokenization
This commit is contained in:
parent
31d9dc3539
commit
798dbe487b
@ -3,18 +3,134 @@ const io = @import("std").io;
|
|||||||
const os = @import("std").os;
|
const os = @import("std").os;
|
||||||
const heap = @import("std").heap;
|
const heap = @import("std").heap;
|
||||||
const warn = @import("std").debug.warn;
|
const warn = @import("std").debug.warn;
|
||||||
|
const assert = @import("std").debug.assert;
|
||||||
|
const mem = @import("std").mem;
|
||||||
|
|
||||||
const Token = struct {
|
const Token = struct {
|
||||||
|
id: Id,
|
||||||
|
start: usize,
|
||||||
|
end: usize,
|
||||||
|
|
||||||
|
const Keyword = enum {
|
||||||
|
@"align",
|
||||||
|
@"and",
|
||||||
|
@"asm",
|
||||||
|
@"break",
|
||||||
|
@"coldcc",
|
||||||
|
@"comptime",
|
||||||
|
@"const",
|
||||||
|
@"continue",
|
||||||
|
@"defer",
|
||||||
|
@"else",
|
||||||
|
@"enum",
|
||||||
|
@"error",
|
||||||
|
@"export",
|
||||||
|
@"extern",
|
||||||
|
@"false",
|
||||||
|
@"fn",
|
||||||
|
@"for",
|
||||||
|
@"goto",
|
||||||
|
@"if",
|
||||||
|
@"inline",
|
||||||
|
@"nakedcc",
|
||||||
|
@"noalias",
|
||||||
|
@"null",
|
||||||
|
@"or",
|
||||||
|
@"packed",
|
||||||
|
@"pub",
|
||||||
|
@"return",
|
||||||
|
@"stdcallcc",
|
||||||
|
@"struct",
|
||||||
|
@"switch",
|
||||||
|
@"test",
|
||||||
|
@"this",
|
||||||
|
@"true",
|
||||||
|
@"undefined",
|
||||||
|
@"union",
|
||||||
|
@"unreachable",
|
||||||
|
@"use",
|
||||||
|
@"var",
|
||||||
|
@"volatile",
|
||||||
|
@"while",
|
||||||
|
};
|
||||||
|
|
||||||
|
fn getKeyword(bytes: []const u8) -> ?Keyword {
|
||||||
|
comptime var i = 0;
|
||||||
|
inline while (i < @memberCount(Keyword)) : (i += 1) {
|
||||||
|
if (mem.eql(u8, @memberName(Keyword, i), bytes)) {
|
||||||
|
return Keyword(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const Id = union(enum) {
|
||||||
|
Invalid,
|
||||||
|
Identifier,
|
||||||
|
Keyword: Keyword,
|
||||||
|
Eof,
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
const Tokenizer = struct {
|
const Tokenizer = struct {
|
||||||
|
buffer: []const u8,
|
||||||
|
index: usize,
|
||||||
|
|
||||||
pub fn next() -> Token {
|
pub fn dump(self: &Tokenizer, token: &const Token) {
|
||||||
|
warn("{} \"{}\"\n", @tagName(token.id), self.buffer[token.start..token.end]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn init(buffer: []const u8) -> Tokenizer {
|
||||||
|
return Tokenizer {
|
||||||
|
.buffer = buffer,
|
||||||
|
.index = 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const State = enum {
|
||||||
|
Start,
|
||||||
|
Identifier,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn next(self: &Tokenizer) -> Token {
|
||||||
|
var state = State.Start;
|
||||||
|
var result = Token {
|
||||||
|
.id = Token.Id { .Eof = {} },
|
||||||
|
.start = self.index,
|
||||||
|
.end = undefined,
|
||||||
|
};
|
||||||
|
while (self.index < self.buffer.len) : (self.index += 1) {
|
||||||
|
const c = self.buffer[self.index];
|
||||||
|
switch (state) {
|
||||||
|
State.Start => switch (c) {
|
||||||
|
' ', '\n' => {
|
||||||
|
result.start = self.index + 1;
|
||||||
|
},
|
||||||
|
'a'...'z', 'A'...'Z', '_' => {
|
||||||
|
state = State.Identifier;
|
||||||
|
result.id = Token.Id { .Identifier = {} };
|
||||||
|
},
|
||||||
|
else => {
|
||||||
|
result.id = Token.Id { .Invalid = {} };
|
||||||
|
self.index += 1;
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
},
|
||||||
|
State.Identifier => switch (c) {
|
||||||
|
'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
|
||||||
|
else => {
|
||||||
|
if (Token.getKeyword(self.buffer[result.start..self.index])) |keyword_id| {
|
||||||
|
result.id = Token.Id { .Keyword = keyword_id };
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.end = self.index;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -36,4 +152,13 @@ pub fn main2() -> %void {
|
|||||||
const target_file_buf = %return io.readFileAlloc(target_file, allocator);
|
const target_file_buf = %return io.readFileAlloc(target_file, allocator);
|
||||||
|
|
||||||
warn("{}", target_file_buf);
|
warn("{}", target_file_buf);
|
||||||
|
|
||||||
|
var tokenizer = Tokenizer.init(target_file_buf);
|
||||||
|
while (true) {
|
||||||
|
const token = tokenizer.next();
|
||||||
|
tokenizer.dump(token);
|
||||||
|
if (@TagType(Token.Id)(token.id) == Token.Id.Eof) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
45
src/ir.cpp
45
src/ir.cpp
@ -8422,16 +8422,6 @@ static IrInstruction *ir_analyze_int_to_enum(IrAnalyze *ira, IrInstruction *sour
|
|||||||
if (type_is_invalid(wanted_type))
|
if (type_is_invalid(wanted_type))
|
||||||
return ira->codegen->invalid_instruction;
|
return ira->codegen->invalid_instruction;
|
||||||
|
|
||||||
if (actual_type != wanted_type->data.enumeration.tag_int_type) {
|
|
||||||
ir_add_error(ira, source_instr,
|
|
||||||
buf_sprintf("integer to enum cast from '%s' instead of its tag type, '%s'",
|
|
||||||
buf_ptr(&actual_type->name),
|
|
||||||
buf_ptr(&wanted_type->data.enumeration.tag_int_type->name)));
|
|
||||||
return ira->codegen->invalid_instruction;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(actual_type->id == TypeTableEntryIdInt);
|
|
||||||
|
|
||||||
if (instr_is_comptime(target)) {
|
if (instr_is_comptime(target)) {
|
||||||
ConstExprValue *val = ir_resolve_const(ira, target, UndefBad);
|
ConstExprValue *val = ir_resolve_const(ira, target, UndefBad);
|
||||||
if (!val)
|
if (!val)
|
||||||
@ -8453,6 +8443,17 @@ static IrInstruction *ir_analyze_int_to_enum(IrAnalyze *ira, IrInstruction *sour
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (actual_type != wanted_type->data.enumeration.tag_int_type) {
|
||||||
|
ir_add_error(ira, source_instr,
|
||||||
|
buf_sprintf("integer to enum cast from '%s' instead of its tag type, '%s'",
|
||||||
|
buf_ptr(&actual_type->name),
|
||||||
|
buf_ptr(&wanted_type->data.enumeration.tag_int_type->name)));
|
||||||
|
return ira->codegen->invalid_instruction;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(actual_type->id == TypeTableEntryIdInt);
|
||||||
|
|
||||||
|
|
||||||
IrInstruction *result = ir_build_int_to_enum(&ira->new_irb, source_instr->scope,
|
IrInstruction *result = ir_build_int_to_enum(&ira->new_irb, source_instr->scope,
|
||||||
source_instr->source_node, target);
|
source_instr->source_node, target);
|
||||||
result->value.type = wanted_type;
|
result->value.type = wanted_type;
|
||||||
@ -8822,6 +8823,20 @@ static IrInstruction *ir_analyze_cast(IrAnalyze *ira, IrInstruction *source_inst
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// explicit cast from integer to enum type with no payload
|
||||||
|
if ((actual_type->id == TypeTableEntryIdInt || actual_type->id == TypeTableEntryIdNumLitInt) &&
|
||||||
|
wanted_type->id == TypeTableEntryIdEnum)
|
||||||
|
{
|
||||||
|
return ir_analyze_int_to_enum(ira, source_instr, value, wanted_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
// explicit cast from enum type with no payload to integer
|
||||||
|
if ((wanted_type->id == TypeTableEntryIdInt || wanted_type->id == TypeTableEntryIdNumLitInt) &&
|
||||||
|
actual_type->id == TypeTableEntryIdEnum)
|
||||||
|
{
|
||||||
|
return ir_analyze_enum_to_int(ira, source_instr, value, wanted_type);
|
||||||
|
}
|
||||||
|
|
||||||
// explicit cast from number literal to another type
|
// explicit cast from number literal to another type
|
||||||
// explicit cast from number literal to &const integer
|
// explicit cast from number literal to &const integer
|
||||||
if (actual_type->id == TypeTableEntryIdNumLitFloat ||
|
if (actual_type->id == TypeTableEntryIdNumLitFloat ||
|
||||||
@ -8886,16 +8901,6 @@ static IrInstruction *ir_analyze_cast(IrAnalyze *ira, IrInstruction *source_inst
|
|||||||
return ir_analyze_int_to_err(ira, source_instr, value);
|
return ir_analyze_int_to_err(ira, source_instr, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
// explicit cast from integer to enum type with no payload
|
|
||||||
if (actual_type->id == TypeTableEntryIdInt && wanted_type->id == TypeTableEntryIdEnum) {
|
|
||||||
return ir_analyze_int_to_enum(ira, source_instr, value, wanted_type);
|
|
||||||
}
|
|
||||||
|
|
||||||
// explicit cast from enum type with no payload to integer
|
|
||||||
if (wanted_type->id == TypeTableEntryIdInt && actual_type->id == TypeTableEntryIdEnum) {
|
|
||||||
return ir_analyze_enum_to_int(ira, source_instr, value, wanted_type);
|
|
||||||
}
|
|
||||||
|
|
||||||
// explicit cast from union to the enum type of the union
|
// explicit cast from union to the enum type of the union
|
||||||
if (actual_type->id == TypeTableEntryIdUnion && wanted_type->id == TypeTableEntryIdEnum) {
|
if (actual_type->id == TypeTableEntryIdUnion && wanted_type->id == TypeTableEntryIdEnum) {
|
||||||
type_ensure_zero_bits_known(ira->codegen, actual_type);
|
type_ensure_zero_bits_known(ira->codegen, actual_type);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user