const std = @import("std"); const mem = std.mem; const Allocator = std.mem.Allocator; const Value = @import("value.zig").Value; const assert = std.debug.assert; pub const Inst = struct { tag: Tag, pub const all_types = .{ Constant, PtrToInt, FieldPtr, Deref, Assembly, Unreach, }; pub const Tag = enum { constant, ptrtoint, fieldptr, deref, @"asm", unreach, }; /// This struct owns the `Value` memory. When the struct is deallocated, /// so is the `Value`. The value of a constant must be copied into /// a memory location for the value to survive after a const instruction. pub const Constant = struct { base: Inst = Inst{ .tag = .constant }, value: *Value, }; pub const PtrToInt = struct { base: Inst = Inst{ .tag = .ptrtoint }, }; pub const FieldPtr = struct { base: Inst = Inst{ .tag = .fieldptr }, }; pub const Deref = struct { base: Inst = Inst{ .tag = .deref }, }; pub const Assembly = struct { base: Inst = Inst{ .tag = .@"asm" }, }; pub const Unreach = struct { base: Inst = Inst{ .tag = .unreach }, }; }; pub const ErrorMsg = struct { byte_offset: usize, msg: []const u8, }; pub const Tree = struct { decls: std.ArrayList(*Inst), errors: std.ArrayList(ErrorMsg), }; const ParseContext = struct { allocator: *Allocator, i: usize, source: []const u8, errors: *std.ArrayList(ErrorMsg), }; pub fn parse(allocator: *Allocator, source: []const u8) Allocator.Error!Tree { var tree: Tree = .{ .decls = std.ArrayList(*Inst).init(allocator), .errors = std.ArrayList(ErrorMsg).init(allocator), }; var ctx: ParseContext = .{ .allocator = allocator, .i = 0, .source = source, .errors = &tree.errors, }; parseRoot(&ctx, &tree) catch |err| switch (err) { error.ParseFailure => { assert(tree.errors.items.len != 0); }, else => |e| return e, }; return tree; } pub fn parseRoot(ctx: *ParseContext, tree: *Tree) !void { // The IR format is designed so that it can be tokenized and parsed at the same time. var global_name_map = std.StringHashMap(usize).init(ctx.allocator); while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) { ';' => _ = try skipToAndOver(ctx, '\n'), '@' => { const at_start = ctx.i; const ident = try skipToAndOver(ctx, ' '); var ty: ?*Value = null; if (eatByte(ctx, ':')) { ty = try parseType(ctx); skipSpace(ctx); } try requireEatBytes(ctx, "= "); const inst = try parseInstruction(ctx); const ident_index = tree.decls.items.len; if (try global_name_map.put(ident, ident_index)) |_| { return parseError(ctx, "redefinition of identifier '{}'", .{ident}); } try tree.decls.append(inst); continue; }, ' ', '\n' => continue, else => |byte| return parseError(ctx, "unexpected byte: '{c}'", .{byte}), }; } fn eatByte(ctx: *ParseContext, byte: u8) bool { if (ctx.i >= ctx.source.len) return false; if (ctx.source[ctx.i] != byte) return false; ctx.i += 1; return true; } fn skipSpace(ctx: *ParseContext) void { while (ctx.i < ctx.source.len and ctx.source[ctx.i] == ' ') : (ctx.i += 1) {} } fn requireEatBytes(ctx: *ParseContext, bytes: []const u8) !void { if (ctx.i + bytes.len > ctx.source.len) return parseError(ctx, "unexpected EOF", .{}); if (!mem.eql(u8, ctx.source[ctx.i..][0..bytes.len], bytes)) return parseError(ctx, "expected '{}'", .{bytes}); ctx.i += bytes.len; } fn skipToAndOver(ctx: *ParseContext, byte: u8) ![]const u8 { const start_i = ctx.i; while (ctx.i < ctx.source.len) : (ctx.i += 1) { if (ctx.source[ctx.i] == byte) { const result = ctx.source[start_i..ctx.i]; ctx.i += 1; return result; } } return parseError(ctx, "unexpected EOF", .{}); } fn parseError(ctx: *ParseContext, comptime format: []const u8, args: var) error{ ParseFailure, OutOfMemory } { const msg = try std.fmt.allocPrint(ctx.allocator, format, args); (try ctx.errors.addOne()).* = .{ .byte_offset = ctx.i, .msg = msg, }; return error.ParseFailure; } fn parseType(ctx: *ParseContext) !*Value { return parseError(ctx, "TODO parse type", .{}); } fn parseInstruction(ctx: *ParseContext) !*Inst { switch (ctx.source[ctx.i]) { '"' => return parseStringLiteralConst(ctx), '0'...'9' => return parseIntegerLiteralConst(ctx), else => {}, } const fn_name = skipToAndOver(ctx, '('); return parseError(ctx, "TODO parse instruction '{}'", .{fn_name}); } fn parseStringLiteralConst(ctx: *ParseContext) !*Inst { const start = ctx.i; ctx.i += 1; // skip over '"' while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) { '"' => { ctx.i += 1; const span = ctx.source[start..ctx.i]; var bad_index: usize = undefined; const parsed = std.zig.parseStringLiteral(ctx.allocator, span, &bad_index) catch |err| switch (err) { error.InvalidCharacter => { ctx.i = start + bad_index; const bad_byte = ctx.source[ctx.i]; return parseError(ctx, "invalid string literal character: '{c}'\n", .{bad_byte}); }, else => |e| return e, }; const bytes_val = try ctx.allocator.create(Value.Bytes); bytes_val.* = .{ .data = parsed }; const const_inst = try ctx.allocator.create(Inst.Constant); const_inst.* = .{ .value = &bytes_val.base }; return &const_inst.base; }, '\\' => { ctx.i += 1; if (ctx.i >= ctx.source.len) break; continue; }, else => continue, }; return parseError(ctx, "unexpected EOF in string literal", .{}); } fn parseIntegerLiteralConst(ctx: *ParseContext) !*Inst { return parseError(ctx, "TODO parse integer literal", .{}); } pub fn main() anyerror!void { var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); defer arena.deinit(); const allocator = &arena.allocator; const args = try std.process.argsAlloc(allocator); const src_path = args[1]; const debug_error_trace = true; const source = try std.fs.cwd().readFileAlloc(allocator, src_path, std.math.maxInt(u32)); const tree = try parse(allocator, source); if (tree.errors.items.len != 0) { for (tree.errors.items) |err_msg| { const loc = findLineColumn(source, err_msg.byte_offset); std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg }); } if (debug_error_trace) return error.ParseFailure; std.process.exit(1); } } fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize, column: usize } { var line: usize = 0; var column: usize = 0; for (source[0..byte_offset]) |byte| { switch (byte) { '\n' => { line += 1; column = 0; }, else => { column += 1; }, } } return .{ .line = line, .column = column }; }