zig/src-self-hosted/ir.zig

const std = @import("std");
const mem = std.mem;
const Allocator = std.mem.Allocator;
const Value = @import("value.zig").Value;
const assert = std.debug.assert;

pub const Inst = struct {
    tag: Tag,

    pub const all_types = .{
        Constant,
        PtrToInt,
        FieldPtr,
        Deref,
        Assembly,
        Unreach,
    };

    pub const Tag = enum {
        constant,
        ptrtoint,
        fieldptr,
        deref,
        @"asm",
        unreach,
    };

    /// This struct owns the `Value` memory. When the struct is deallocated,
    /// so is the `Value`. The value of a constant must be copied into
    /// a memory location for the value to survive after a const instruction.
    pub const Constant = struct {
        base: Inst = Inst{ .tag = .constant },
        value: *Value,
    };

    pub const PtrToInt = struct {
        base: Inst = Inst{ .tag = .ptrtoint },
    };

    pub const FieldPtr = struct {
        base: Inst = Inst{ .tag = .fieldptr },
    };

    pub const Deref = struct {
        base: Inst = Inst{ .tag = .deref },
    };

    pub const Assembly = struct {
        base: Inst = Inst{ .tag = .@"asm" },
    };

    pub const Unreach = struct {
        base: Inst = Inst{ .tag = .unreach },
    };
};

pub const ErrorMsg = struct {
    byte_offset: usize,
    msg: []const u8,
};

pub const Tree = struct {
    decls: std.ArrayList(*Inst),
    errors: std.ArrayList(ErrorMsg),
};

const ParseContext = struct {
    allocator: *Allocator,
    i: usize,
    source: []const u8,
    errors: *std.ArrayList(ErrorMsg),
};

pub fn parse(allocator: *Allocator, source: []const u8) Allocator.Error!Tree {
    var tree: Tree = .{
        .decls = std.ArrayList(*Inst).init(allocator),
        .errors = std.ArrayList(ErrorMsg).init(allocator),
    };
    var ctx: ParseContext = .{
        .allocator = allocator,
        .i = 0,
        .source = source,
        .errors = &tree.errors,
    };
    parseRoot(&ctx, &tree) catch |err| switch (err) {
        error.ParseFailure => {
            assert(tree.errors.items.len != 0);
        },
        else => |e| return e,
    };
    return tree;
}

pub fn parseRoot(ctx: *ParseContext, tree: *Tree) !void {
    // The IR format is designed so that it can be tokenized and parsed at the same time.
    var global_name_map = std.StringHashMap(usize).init(ctx.allocator);
    while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
        ';' => _ = try skipToAndOver(ctx, '\n'),
        '@' => {
            const at_start = ctx.i;
            const ident = try skipToAndOver(ctx, ' ');
            var ty: ?*Value = null;
            if (eatByte(ctx, ':')) {
                ty = try parseType(ctx);
                skipSpace(ctx);
            }
            try requireEatBytes(ctx, "= ");
            const inst = try parseInstruction(ctx);
            const ident_index = tree.decls.items.len;
            if (try global_name_map.put(ident, ident_index)) |_| {
                return parseError(ctx, "redefinition of identifier '{}'", .{ident});
            }
            try tree.decls.append(inst);
            continue;
        },
        ' ', '\n' => continue,
        else => |byte| return parseError(ctx, "unexpected byte: '{c}'", .{byte}),
    };
}

fn eatByte(ctx: *ParseContext, byte: u8) bool {
    if (ctx.i >= ctx.source.len) return false;
    if (ctx.source[ctx.i] != byte) return false;
    ctx.i += 1;
    return true;
}

fn skipSpace(ctx: *ParseContext) void {
    while (ctx.i < ctx.source.len and ctx.source[ctx.i] == ' ') : (ctx.i += 1) {}
}

fn requireEatBytes(ctx: *ParseContext, bytes: []const u8) !void {
    if (ctx.i + bytes.len > ctx.source.len)
        return parseError(ctx, "unexpected EOF", .{});
    if (!mem.eql(u8, ctx.source[ctx.i..][0..bytes.len], bytes))
        return parseError(ctx, "expected '{}'", .{bytes});
    ctx.i += bytes.len;
}

fn skipToAndOver(ctx: *ParseContext, byte: u8) ![]const u8 {
    const start_i = ctx.i;
    while (ctx.i < ctx.source.len) : (ctx.i += 1) {
        if (ctx.source[ctx.i] == byte) {
            const result = ctx.source[start_i..ctx.i];
            ctx.i += 1;
            return result;
        }
    }
    return parseError(ctx, "unexpected EOF", .{});
}

fn parseError(ctx: *ParseContext, comptime format: []const u8, args: var) error{ ParseFailure, OutOfMemory } {
    const msg = try std.fmt.allocPrint(ctx.allocator, format, args);
    (try ctx.errors.addOne()).* = .{
        .byte_offset = ctx.i,
        .msg = msg,
    };
    return error.ParseFailure;
}

fn parseType(ctx: *ParseContext) !*Value {
    return parseError(ctx, "TODO parse type", .{});
}

fn parseInstruction(ctx: *ParseContext) !*Inst {
    switch (ctx.source[ctx.i]) {
        '"' => return parseStringLiteralConst(ctx),
        '0'...'9' => return parseIntegerLiteralConst(ctx),
        else => {},
    }
    const fn_name = skipToAndOver(ctx, '(');
    return parseError(ctx, "TODO parse instruction '{}'", .{fn_name});
}

fn parseStringLiteralConst(ctx: *ParseContext) !*Inst {
    const start = ctx.i;
    ctx.i += 1; // skip over '"'

    while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
        '"' => {
            ctx.i += 1;
            const span = ctx.source[start..ctx.i];
            var bad_index: usize = undefined;
            const parsed = std.zig.parseStringLiteral(ctx.allocator, span, &bad_index) catch |err| switch (err) {
                error.InvalidCharacter => {
                    ctx.i = start + bad_index;
                    const bad_byte = ctx.source[ctx.i];
                    return parseError(ctx, "invalid string literal character: '{c}'\n", .{bad_byte});
                },
                else => |e| return e,
            };
            const bytes_val = try ctx.allocator.create(Value.Bytes);
            bytes_val.* = .{ .data = parsed };
            const const_inst = try ctx.allocator.create(Inst.Constant);
            const_inst.* = .{ .value = &bytes_val.base };
            return &const_inst.base;
        },
        '\\' => {
            ctx.i += 1;
            if (ctx.i >= ctx.source.len) break;
            continue;
        },
        else => continue,
    };
    return parseError(ctx, "unexpected EOF in string literal", .{});
}

fn parseIntegerLiteralConst(ctx: *ParseContext) !*Inst {
    return parseError(ctx, "TODO parse integer literal", .{});
}

pub fn main() anyerror!void {
    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
    defer arena.deinit();
    const allocator = &arena.allocator;

    const args = try std.process.argsAlloc(allocator);

    const src_path = args[1];
    const debug_error_trace = true;

    const source = try std.fs.cwd().readFileAlloc(allocator, src_path, std.math.maxInt(u32));

    const tree = try parse(allocator, source);
    if (tree.errors.items.len != 0) {
        for (tree.errors.items) |err_msg| {
            const loc = findLineColumn(source, err_msg.byte_offset);
            std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg });
        }
        if (debug_error_trace) return error.ParseFailure;
        std.process.exit(1);
    }
}

fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize, column: usize } {
    var line: usize = 0;
    var column: usize = 0;
    for (source[0..byte_offset]) |byte| {
        switch (byte) {
            '\n' => {
                line += 1;
                column = 0;
            },
            else => {
                column += 1;
            },
        }
    }
    return .{ .line = line, .column = column };
}
self-hosted: generate zig IR for simple function no tests for this yet. I think the quickest path to testing will be creating the .o files and linking with libc, executing, and then comparing output. 2018-07-12 12:08:40 -07:00			`const std = @import("std");`
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`const mem = std.mem;`
self-hosted: generate zig IR for simple function no tests for this yet. I think the quickest path to testing will be creating the .o files and linking with libc, executing, and then comparing output. 2018-07-12 12:08:40 -07:00			`const Allocator = std.mem.Allocator;`
			`const Value = @import("value.zig").Value;`
			`const assert = std.debug.assert;`
self-hosted: build against zig_llvm and embedded LLD Now the self-hosted compiler re-uses the same C++ code for interfacing with LLVM as the C++ code. It also links against the same LLD library files. 2017-12-26 16:44:08 -08:00
New Zig formal grammar (#1685) Reverted #1628 and changed the grammar+parser of the language to not allow certain expr where types are expected 2018-11-13 05:08:37 -08:00			`pub const Inst = struct {`
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`tag: Tag,`

			`pub const all_types = .{`
			`Constant,`
			`PtrToInt,`
			`FieldPtr,`
			`Deref,`
			`Assembly,`
			`Unreach,`
			`};`

			`pub const Tag = enum {`
			`constant,`
			`ptrtoint,`
			`fieldptr,`
			`deref,`
			`@"asm",`
			`unreach,`
			`};`

			/// This struct owns the `Value` memory. When the struct is deallocated,
			/// so is the `Value`. The value of a constant must be copied into
			`/// a memory location for the value to survive after a const instruction.`
			`pub const Constant = struct {`
			`base: Inst = Inst{ .tag = .constant },`
			`value: *Value,`
self-hosted: generate zig IR for simple function no tests for this yet. I think the quickest path to testing will be creating the .o files and linking with libc, executing, and then comparing output. 2018-07-12 12:08:40 -07:00			`};`

beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`pub const PtrToInt = struct {`
			`base: Inst = Inst{ .tag = .ptrtoint },`
self-hosted: generate zig IR for simple function no tests for this yet. I think the quickest path to testing will be creating the .o files and linking with libc, executing, and then comparing output. 2018-07-12 12:08:40 -07:00			`};`

beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`pub const FieldPtr = struct {`
			`base: Inst = Inst{ .tag = .fieldptr },`
self-hosted: generate zig IR for simple function no tests for this yet. I think the quickest path to testing will be creating the .o files and linking with libc, executing, and then comparing output. 2018-07-12 12:08:40 -07:00			`};`

beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`pub const Deref = struct {`
			`base: Inst = Inst{ .tag = .deref },`
self-hosted can compile libc hello world 2018-07-22 20:27:58 -07:00			`};`

beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`pub const Assembly = struct {`
			`base: Inst = Inst{ .tag = .@"asm" },`
self-hosted: function calling another function 2018-07-24 17:24:05 -07:00			`};`

beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`pub const Unreach = struct {`
			`base: Inst = Inst{ .tag = .unreach },`
self-hosted: function calling another function 2018-07-24 17:24:05 -07:00			`};`
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`};`
self-hosted: function calling another function 2018-07-24 17:24:05 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`pub const ErrorMsg = struct {`
			`byte_offset: usize,`
			`msg: []const u8,`
			`};`
self-hosted can compile libc hello world 2018-07-22 20:27:58 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`pub const Tree = struct {`
			`decls: std.ArrayList(*Inst),`
			`errors: std.ArrayList(ErrorMsg),`
			`};`
self-hosted can compile libc hello world 2018-07-22 20:27:58 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`const ParseContext = struct {`
			`allocator: *Allocator,`
			`i: usize,`
			`source: []const u8,`
			`errors: *std.ArrayList(ErrorMsg),`
			`};`
self-hosted can compile libc hello world 2018-07-22 20:27:58 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`pub fn parse(allocator: *Allocator, source: []const u8) Allocator.Error!Tree {`
			`var tree: Tree = .{`
			`.decls = std.ArrayList(*Inst).init(allocator),`
			`.errors = std.ArrayList(ErrorMsg).init(allocator),`
self-hosted can compile libc hello world 2018-07-22 20:27:58 -07:00			`};`
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`var ctx: ParseContext = .{`
			`.allocator = allocator,`
			`.i = 0,`
			`.source = source,`
			`.errors = &tree.errors,`
self-hosted: generate zig IR for simple function no tests for this yet. I think the quickest path to testing will be creating the .o files and linking with libc, executing, and then comparing output. 2018-07-12 12:08:40 -07:00			`};`
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`parseRoot(&ctx, &tree) catch \|err\| switch (err) {`
			`error.ParseFailure => {`
			`assert(tree.errors.items.len != 0);`
			`},`
			`else => \|e\| return e,`
self-hosted: generate zig IR for simple function no tests for this yet. I think the quickest path to testing will be creating the .o files and linking with libc, executing, and then comparing output. 2018-07-12 12:08:40 -07:00			`};`
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`return tree;`
			`}`
self-hosted: generate zig IR for simple function no tests for this yet. I think the quickest path to testing will be creating the .o files and linking with libc, executing, and then comparing output. 2018-07-12 12:08:40 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`pub fn parseRoot(ctx: ParseContext, tree: Tree) !void {`
			`// The IR format is designed so that it can be tokenized and parsed at the same time.`
			`var global_name_map = std.StringHashMap(usize).init(ctx.allocator);`
			`while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {`
			`';' => _ = try skipToAndOver(ctx, '\n'),`
			`'@' => {`
			`const at_start = ctx.i;`
			`const ident = try skipToAndOver(ctx, ' ');`
			`var ty: ?*Value = null;`
			`if (eatByte(ctx, ':')) {`
			`ty = try parseType(ctx);`
			`skipSpace(ctx);`
			`}`
			`try requireEatBytes(ctx, "= ");`
			`const inst = try parseInstruction(ctx);`
			`const ident_index = tree.decls.items.len;`
			`if (try global_name_map.put(ident, ident_index)) \|_\| {`
			`return parseError(ctx, "redefinition of identifier '{}'", .{ident});`
			`}`
			`try tree.decls.append(inst);`
			`continue;`
			`},`
			`' ', '\n' => continue,`
			`else => \|byte\| return parseError(ctx, "unexpected byte: '{c}'", .{byte}),`
self-hosted: generate zig IR for simple function no tests for this yet. I think the quickest path to testing will be creating the .o files and linking with libc, executing, and then comparing output. 2018-07-12 12:08:40 -07:00			`};`
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`}`
self-hosted: generate zig IR for simple function no tests for this yet. I think the quickest path to testing will be creating the .o files and linking with libc, executing, and then comparing output. 2018-07-12 12:08:40 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`fn eatByte(ctx: *ParseContext, byte: u8) bool {`
			`if (ctx.i >= ctx.source.len) return false;`
			`if (ctx.source[ctx.i] != byte) return false;`
			`ctx.i += 1;`
			`return true;`
			`}`
self-hosted: basic IR pass2 2018-07-13 18:56:38 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`fn skipSpace(ctx: *ParseContext) void {`
			`while (ctx.i < ctx.source.len and ctx.source[ctx.i] == ' ') : (ctx.i += 1) {}`
			`}`
self-hosted: generate zig IR for simple function no tests for this yet. I think the quickest path to testing will be creating the .o files and linking with libc, executing, and then comparing output. 2018-07-12 12:08:40 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`fn requireEatBytes(ctx: *ParseContext, bytes: []const u8) !void {`
			`if (ctx.i + bytes.len > ctx.source.len)`
			`return parseError(ctx, "unexpected EOF", .{});`
			`if (!mem.eql(u8, ctx.source[ctx.i..][0..bytes.len], bytes))`
			`return parseError(ctx, "expected '{}'", .{bytes});`
			`ctx.i += bytes.len;`
			`}`
self-hosted: basic IR pass2 2018-07-13 18:56:38 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`fn skipToAndOver(ctx: *ParseContext, byte: u8) ![]const u8 {`
			`const start_i = ctx.i;`
			`while (ctx.i < ctx.source.len) : (ctx.i += 1) {`
			`if (ctx.source[ctx.i] == byte) {`
			`const result = ctx.source[start_i..ctx.i];`
			`ctx.i += 1;`
			`return result;`
self-hosted: basic IR pass2 2018-07-13 18:56:38 -07:00			`}`
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`}`
			`return parseError(ctx, "unexpected EOF", .{});`
			`}`
self-hosted: basic IR pass2 2018-07-13 18:56:38 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`fn parseError(ctx: *ParseContext, comptime format: []const u8, args: var) error{ ParseFailure, OutOfMemory } {`
			`const msg = try std.fmt.allocPrint(ctx.allocator, format, args);`
			`(try ctx.errors.addOne()).* = .{`
			`.byte_offset = ctx.i,`
			`.msg = msg,`
self-hosted: compile errors for return in wrong place * outside fn definition * inside defer expression 2018-07-18 14:40:59 -07:00			`};`
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`return error.ParseFailure;`
			`}`
self-hosted: compile errors for return in wrong place * outside fn definition * inside defer expression 2018-07-18 14:40:59 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`fn parseType(ctx: ParseContext) !Value {`
			`return parseError(ctx, "TODO parse type", .{});`
			`}`
self-hosted: compile errors for return in wrong place * outside fn definition * inside defer expression 2018-07-18 14:40:59 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`fn parseInstruction(ctx: ParseContext) !Inst {`
			`switch (ctx.source[ctx.i]) {`
			`'"' => return parseStringLiteralConst(ctx),`
			`'0'...'9' => return parseIntegerLiteralConst(ctx),`
			`else => {},`
			`}`
			`const fn_name = skipToAndOver(ctx, '(');`
			`return parseError(ctx, "TODO parse instruction '{}'", .{fn_name});`
			`}`
self-hosted: compile errors for return in wrong place * outside fn definition * inside defer expression 2018-07-18 14:40:59 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`fn parseStringLiteralConst(ctx: ParseContext) !Inst {`
			`const start = ctx.i;`
			`ctx.i += 1; // skip over '"'`

			`while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {`
			`'"' => {`
			`ctx.i += 1;`
			`const span = ctx.source[start..ctx.i];`
			`var bad_index: usize = undefined;`
			`const parsed = std.zig.parseStringLiteral(ctx.allocator, span, &bad_index) catch \|err\| switch (err) {`
			`error.InvalidCharacter => {`
			`ctx.i = start + bad_index;`
			`const bad_byte = ctx.source[ctx.i];`
			`return parseError(ctx, "invalid string literal character: '{c}'\n", .{bad_byte});`
			`},`
			`else => \|e\| return e,`
			`};`
			`const bytes_val = try ctx.allocator.create(Value.Bytes);`
			`bytes_val.* = .{ .data = parsed };`
			`const const_inst = try ctx.allocator.create(Inst.Constant);`
			`const_inst.* = .{ .value = &bytes_val.base };`
			`return &const_inst.base;`
			`},`
			`'\\' => {`
			`ctx.i += 1;`
			`if (ctx.i >= ctx.source.len) break;`
			`continue;`
			`},`
			`else => continue,`
self-hosted: basic IR pass2 2018-07-13 18:56:38 -07:00			`};`
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`return parseError(ctx, "unexpected EOF in string literal", .{});`
			`}`
self-hosted: basic IR pass2 2018-07-13 18:56:38 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`fn parseIntegerLiteralConst(ctx: ParseContext) !Inst {`
			`return parseError(ctx, "TODO parse integer literal", .{});`
			`}`
self-hosted: compile errors for return in wrong place * outside fn definition * inside defer expression 2018-07-18 14:40:59 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`pub fn main() anyerror!void {`
			`var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);`
			`defer arena.deinit();`
			`const allocator = &arena.allocator;`
self-hosted: compile errors for return in wrong place * outside fn definition * inside defer expression 2018-07-18 14:40:59 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`const args = try std.process.argsAlloc(allocator);`
self-hosted: compile errors for return in wrong place * outside fn definition * inside defer expression 2018-07-18 14:40:59 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`const src_path = args[1];`
			`const debug_error_trace = true;`
self-hosted: compile errors for return in wrong place * outside fn definition * inside defer expression 2018-07-18 14:40:59 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`const source = try std.fs.cwd().readFileAlloc(allocator, src_path, std.math.maxInt(u32));`
self-hosted: compile errors for return in wrong place * outside fn definition * inside defer expression 2018-07-18 14:40:59 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`const tree = try parse(allocator, source);`
			`if (tree.errors.items.len != 0) {`
			`for (tree.errors.items) \|err_msg\| {`
			`const loc = findLineColumn(source, err_msg.byte_offset);`
			`std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg });`
self-hosted: compile errors for return in wrong place * outside fn definition * inside defer expression 2018-07-18 14:40:59 -07:00			`}`
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`if (debug_error_trace) return error.ParseFailure;`
			`std.process.exit(1);`
			`}`
			`}`
self-hosted: compile errors for return in wrong place * outside fn definition * inside defer expression 2018-07-18 14:40:59 -07:00
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize, column: usize } {`
			`var line: usize = 0;`
			`var column: usize = 0;`
			`for (source[0..byte_offset]) \|byte\| {`
			`switch (byte) {`
			`'\n' => {`
			`line += 1;`
			`column = 0;`
			`},`
			`else => {`
			`column += 1;`
			`},`
self-hosted: compile errors for return in wrong place * outside fn definition * inside defer expression 2018-07-18 14:40:59 -07:00			`}`
			`}`
beginnings of zig ir parser 2020-04-17 21:09:43 -07:00			`return .{ .line = line, .column = column };`
self-hosted: basic IR pass2 2018-07-13 18:56:38 -07:00			`}`