zig/src-self-hosted/ir.zig

439 lines
16 KiB
Zig
Raw Normal View History

const std = @import("std");
2020-04-17 21:09:43 -07:00
const mem = std.mem;
const Allocator = std.mem.Allocator;
const Value = @import("value.zig").Value;
2020-04-18 16:41:45 -07:00
const Type = @import("type.zig").Type;
const assert = std.debug.assert;
const text = @import("ir/text.zig");
/// These are in-memory, analyzed instructions. See `text.Inst` for the representation
/// of instructions that correspond to the ZIR text format.
2020-04-20 21:56:30 -07:00
/// This struct owns the `Value` and `Type` memory. When the struct is deallocated,
/// so are the `Value` and `Type`. The value of a constant must be copied into
/// a memory location for the value to survive after a const instruction.
pub const Inst = struct {
2020-04-20 21:56:30 -07:00
tag: Tag,
ty: Type,
2020-04-21 10:50:04 -07:00
/// Byte offset into the source.
src: usize,
2020-04-20 21:56:30 -07:00
pub const Tag = enum {
unreach,
constant,
assembly,
};
pub fn cast(base: *Inst, comptime T: type) ?*T {
if (base.tag != T.base_tag)
return null;
return @fieldParentPtr(T, "base", base);
2020-04-19 17:04:11 -07:00
}
2020-04-21 10:50:04 -07:00
/// Returns `null` if runtime-known.
pub fn value(base: *Inst) ?Value {
return switch (base.tag) {
.unreach => Value.initTag(.noreturn_value),
.constant => base.cast(Constant).?.val,
.assembly => null,
};
}
2020-04-17 21:09:43 -07:00
pub const Constant = struct {
2020-04-20 21:56:30 -07:00
pub const base_tag = Tag.constant;
base: Inst,
2020-04-17 23:55:28 -07:00
2020-04-20 21:56:30 -07:00
val: Value,
};
pub const Assembly = struct {
pub const base_tag = Tag.assembly;
base: Inst,
asm_source: []const u8,
is_volatile: bool,
output: []const u8,
inputs: []const []const u8,
clobbers: []const []const u8,
args: []const []const u8,
};
2020-04-17 21:09:43 -07:00
};
2020-04-20 21:56:30 -07:00
const TypedValue = struct {
ty: Type,
val: Value,
};
2020-04-20 21:56:30 -07:00
pub const Module = struct {
exports: []Export,
errors: []ErrorMsg,
arena: std.heap.ArenaAllocator,
2020-04-21 13:06:15 -07:00
fns: []Fn,
2020-04-20 21:56:30 -07:00
pub const Export = struct {
name: []const u8,
typed_value: TypedValue,
};
2020-04-20 21:56:30 -07:00
2020-04-21 13:06:15 -07:00
pub const Fn = struct {
analysis_status: enum { in_progress, failure, success },
body: []*Inst,
};
2020-04-20 21:56:30 -07:00
pub fn deinit(self: *Module, allocator: *Allocator) void {
allocator.free(self.exports);
allocator.free(self.errors);
self.arena.deinit();
self.* = undefined;
}
pub fn emit_zir(self: Module, allocator: *Allocator) !text.Module {
return error.TodoImplementEmitToZIR;
}
};
2020-04-18 17:04:37 -07:00
2020-04-20 21:56:30 -07:00
pub const ErrorMsg = struct {
byte_offset: usize,
msg: []const u8,
};
pub fn analyze(allocator: *Allocator, old_module: text.Module) !Module {
var ctx = Analyze{
2020-04-17 21:09:43 -07:00
.allocator = allocator,
2020-04-20 21:56:30 -07:00
.arena = std.heap.ArenaAllocator.init(allocator),
.old_module = &old_module,
2020-04-18 17:04:37 -07:00
.errors = std.ArrayList(ErrorMsg).init(allocator),
2020-04-21 13:06:15 -07:00
.decl_table = std.AutoHashMap(*text.Inst, Analyze.NewDecl).init(allocator),
2020-04-20 21:56:30 -07:00
.exports = std.ArrayList(Module.Export).init(allocator),
2020-04-21 13:06:15 -07:00
.fns = std.ArrayList(Module.Fn).init(allocator),
};
defer ctx.errors.deinit();
2020-04-21 13:06:15 -07:00
defer ctx.decl_table.deinit();
2020-04-20 21:56:30 -07:00
defer ctx.exports.deinit();
2020-04-21 13:06:15 -07:00
defer ctx.fns.deinit();
2020-04-20 21:56:30 -07:00
ctx.analyzeRoot() catch |err| switch (err) {
error.AnalysisFail => {
2020-04-18 17:04:37 -07:00
assert(ctx.errors.items.len != 0);
2020-04-17 21:09:43 -07:00
},
else => |e| return e,
};
return Module{
2020-04-20 21:56:30 -07:00
.exports = ctx.exports.toOwnedSlice(),
2020-04-18 17:04:37 -07:00
.errors = ctx.errors.toOwnedSlice(),
2020-04-21 13:06:15 -07:00
.fns = ctx.fns.toOwnedSlice(),
2020-04-20 21:56:30 -07:00
.arena = ctx.arena,
2020-04-18 17:04:37 -07:00
};
2020-04-17 21:09:43 -07:00
}
2020-04-20 21:56:30 -07:00
const Analyze = struct {
allocator: *Allocator,
arena: std.heap.ArenaAllocator,
old_module: *const text.Module,
errors: std.ArrayList(ErrorMsg),
2020-04-21 13:06:15 -07:00
decl_table: std.AutoHashMap(*text.Inst, NewDecl),
2020-04-20 21:56:30 -07:00
exports: std.ArrayList(Module.Export),
2020-04-21 13:06:15 -07:00
fns: std.ArrayList(Module.Fn),
2020-04-20 21:56:30 -07:00
2020-04-21 13:06:15 -07:00
const NewDecl = struct {
2020-04-20 21:56:30 -07:00
/// null means a semantic analysis error happened
ptr: ?*Inst,
};
2020-04-21 13:06:15 -07:00
const NewInst = struct {
ptr: *Inst,
};
const Fn = struct {
body: std.ArrayList(*Inst),
inst_table: std.AutoHashMap(*text.Inst, NewInst),
/// Index into Module fns array
fn_index: usize,
};
2020-04-20 21:56:30 -07:00
const InnerError = error{ OutOfMemory, AnalysisFail };
fn analyzeRoot(self: *Analyze) !void {
for (self.old_module.decls) |decl| {
if (decl.cast(text.Inst.Export)) |export_inst| {
2020-04-21 13:06:15 -07:00
try analyzeExport(self, null, export_inst);
2020-04-20 21:56:30 -07:00
}
2018-07-13 18:56:38 -07:00
}
2020-04-17 21:09:43 -07:00
}
2018-07-13 18:56:38 -07:00
2020-04-21 13:06:15 -07:00
fn resolveInst(self: *Analyze, opt_func: ?*Fn, old_inst: *text.Inst) InnerError!*Inst {
if (opt_func) |func| {
const kv = func.inst_table.get(old_inst) orelse return error.AnalysisFail;
return kv.value.ptr;
} else if (self.decl_table.get(old_inst)) |kv| {
2020-04-20 21:56:30 -07:00
return kv.value.ptr orelse return error.AnalysisFail;
} else {
2020-04-21 13:06:15 -07:00
const new_inst = self.analyzeInst(old_inst, null) catch |err| switch (err) {
2020-04-20 21:56:30 -07:00
error.AnalysisFail => {
2020-04-21 13:06:15 -07:00
try self.decl_table.putNoClobber(old_inst, .{ .ptr = null });
2020-04-20 21:56:30 -07:00
return error.AnalysisFail;
},
else => |e| return e,
};
2020-04-21 13:06:15 -07:00
try self.decl_table.putNoClobber(old_inst, .{ .ptr = new_inst });
2020-04-20 21:56:30 -07:00
return new_inst;
}
}
2020-04-17 23:55:28 -07:00
2020-04-21 13:06:15 -07:00
fn resolveInstConst(self: *Analyze, func: ?*Fn, old_inst: *text.Inst) InnerError!TypedValue {
const new_inst = try self.resolveInst(func, old_inst);
2020-04-20 21:56:30 -07:00
const val = try self.resolveConstValue(new_inst);
return TypedValue{
.ty = new_inst.ty,
.val = val,
};
}
fn resolveConstValue(self: *Analyze, base: *Inst) !Value {
2020-04-21 10:50:04 -07:00
return base.value() orelse return self.fail(base.src, "unable to resolve comptime value", .{});
2020-04-20 21:56:30 -07:00
}
2020-04-21 13:06:15 -07:00
fn resolveConstString(self: *Analyze, func: ?*Fn, old_inst: *text.Inst) ![]u8 {
const new_inst = try self.resolveInst(func, old_inst);
2020-04-20 21:56:30 -07:00
const wanted_type = Type.initTag(.const_slice_u8);
const coerced_inst = try self.coerce(wanted_type, new_inst);
const val = try self.resolveConstValue(coerced_inst);
return val.toAllocatedBytes(&self.arena.allocator);
}
2020-04-21 13:06:15 -07:00
fn resolveType(self: *Analyze, func: ?*Fn, old_inst: *text.Inst) !Type {
const new_inst = try self.resolveInst(func, old_inst);
const wanted_type = Type.initTag(.@"type");
const coerced_inst = try self.coerce(wanted_type, new_inst);
const val = try self.resolveConstValue(coerced_inst);
return val.toType();
}
fn analyzeExport(self: *Analyze, func: ?*Fn, export_inst: *text.Inst.Export) !void {
const symbol_name = try self.resolveConstString(func, export_inst.positionals.symbol_name);
const typed_value = try self.resolveInstConst(func, export_inst.positionals.value);
2020-04-20 21:56:30 -07:00
switch (typed_value.ty.zigTypeTag()) {
.Fn => {},
else => return self.fail(
2020-04-21 10:50:04 -07:00
export_inst.positionals.value.src,
2020-04-20 21:56:30 -07:00
"unable to export type '{}'",
.{typed_value.ty},
),
}
try self.exports.append(.{
.name = symbol_name,
.typed_value = typed_value,
});
}
2020-04-21 10:50:04 -07:00
fn constInst(self: *Analyze, src: usize, typed_value: TypedValue) !*Inst {
const const_inst = try self.arena.allocator.create(Inst.Constant);
const_inst.* = .{
.base = .{
.tag = Inst.Constant.base_tag,
.ty = typed_value.ty,
.src = src,
},
.val = typed_value.val,
};
return &const_inst.base;
}
fn constStr(self: *Analyze, src: usize, str: []const u8) !*Inst {
2020-04-20 22:20:01 -07:00
const array_payload = try self.arena.allocator.create(Type.Payload.Array_u8_Sentinel0);
array_payload.* = .{ .len = str.len };
const ty_payload = try self.arena.allocator.create(Type.Payload.SingleConstPointer);
ty_payload.* = .{ .pointee_type = Type.initPayload(&array_payload.base) };
const bytes_payload = try self.arena.allocator.create(Value.Payload.Bytes);
bytes_payload.* = .{ .data = str };
2020-04-21 10:50:04 -07:00
return self.constInst(src, .{
.ty = Type.initPayload(&ty_payload.base),
2020-04-20 22:20:01 -07:00
.val = Value.initPayload(&bytes_payload.base),
2020-04-21 10:50:04 -07:00
});
2020-04-20 22:20:01 -07:00
}
2020-04-21 13:06:15 -07:00
fn analyzeInst(self: *Analyze, old_inst: *text.Inst, opt_func: ?*Fn) InnerError!*Inst {
2020-04-20 21:56:30 -07:00
switch (old_inst.tag) {
2020-04-20 22:20:01 -07:00
.str => {
// We can use this reference because Inst.Const's Value is arena-allocated.
// The value would get copied to a MemoryCell before the `text.Inst.Str` lifetime ends.
const bytes = old_inst.cast(text.Inst.Str).?.positionals.bytes;
2020-04-21 10:50:04 -07:00
return self.constStr(old_inst.src, bytes);
2020-04-20 22:20:01 -07:00
},
2020-04-21 10:50:04 -07:00
.int => return self.fail(old_inst.src, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.ptrtoint => return self.fail(old_inst.src, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.fieldptr => return self.fail(old_inst.src, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.deref => return self.fail(old_inst.src, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.as => return self.fail(old_inst.src, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.@"asm" => return self.fail(old_inst.src, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.@"unreachable" => return self.fail(old_inst.src, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
2020-04-21 13:06:15 -07:00
.@"fn" => {
const fn_inst = old_inst.cast(text.Inst.Fn).?;
const fn_type = try self.resolveType(opt_func, fn_inst.positionals.fn_type);
var new_func: Fn = .{
.body = std.ArrayList(*Inst).init(self.allocator),
.inst_table = std.AutoHashMap(*text.Inst, NewInst).init(self.allocator),
.fn_index = self.fns.items.len,
};
defer new_func.body.deinit();
defer new_func.inst_table.deinit();
// Don't hang on to a reference to this when analyzing body instructions, since the memory
// could become invalid.
(try self.fns.addOne()).* = .{
.analysis_status = .in_progress,
.body = undefined,
};
for (fn_inst.positionals.body.instructions) |src_inst| {
const new_inst = self.analyzeInst(src_inst, &new_func) catch |err| {
self.fns.items[new_func.fn_index].analysis_status = .failure;
return err;
};
try new_func.inst_table.putNoClobber(src_inst, .{ .ptr = new_inst });
}
self.fns.items[new_func.fn_index] = .{
.analysis_status = .success,
.body = new_func.body.toOwnedSlice(),
};
const fn_payload = try self.arena.allocator.create(Value.Payload.Function);
fn_payload.* = .{ .index = new_func.fn_index };
return self.constInst(old_inst.src, .{
.ty = fn_type,
.val = Value.initPayload(&fn_payload.base),
});
},
2020-04-21 10:50:04 -07:00
.@"export" => return self.fail(old_inst.src, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.primitive => return self.fail(old_inst.src, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.fntype => return self.fail(old_inst.src, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
2020-04-20 21:56:30 -07:00
}
}
fn coerce(self: *Analyze, dest_type: Type, inst: *Inst) !*Inst {
2020-04-21 10:50:04 -07:00
const in_memory_result = coerceInMemoryAllowed(dest_type, inst.ty);
if (in_memory_result == .ok) {
return self.bitcast(dest_type, inst);
}
2020-04-21 10:24:25 -07:00
// *[N]T to []T
if (inst.ty.isSinglePointer() and dest_type.isSlice() and
(!inst.ty.pointerIsConst() or dest_type.pointerIsConst()))
{
const array_type = inst.ty.elemType();
const dst_elem_type = dest_type.elemType();
if (array_type.zigTypeTag() == .Array and
coerceInMemoryAllowed(dst_elem_type, array_type.elemType()) == .ok)
{
2020-04-21 10:50:04 -07:00
return self.coerceArrayPtrToSlice(dest_type, inst);
2020-04-21 10:24:25 -07:00
}
}
2020-04-21 10:50:04 -07:00
return self.fail(inst.src, "TODO implement type coercion", .{});
}
fn bitcast(self: *Analyze, dest_type: Type, inst: *Inst) !*Inst {
return self.fail(inst.src, "TODO implement bitcast analysis", .{});
}
fn coerceArrayPtrToSlice(self: *Analyze, dest_type: Type, inst: *Inst) !*Inst {
if (inst.value()) |val| {
// The comptime Value representation is compatible with both types.
return self.constInst(inst.src, .{ .ty = dest_type, .val = val });
}
return self.fail(inst.src, "TODO implement coerceArrayPtrToSlice runtime instruction", .{});
2020-04-20 21:56:30 -07:00
}
2020-04-21 10:50:04 -07:00
fn fail(self: *Analyze, src: usize, comptime format: []const u8, args: var) InnerError {
2020-04-20 21:56:30 -07:00
@setCold(true);
const msg = try std.fmt.allocPrint(&self.arena.allocator, format, args);
(try self.errors.addOne()).* = .{
2020-04-21 10:50:04 -07:00
.byte_offset = src,
2020-04-20 21:56:30 -07:00
.msg = msg,
};
return error.AnalysisFail;
}
2020-04-21 10:24:25 -07:00
const InMemoryCoercionResult = enum {
ok,
no_match,
};
fn coerceInMemoryAllowed(dest_type: Type, src_type: Type) InMemoryCoercionResult {
// As a shortcut, if the small tags / addresses match, we're done.
if (dest_type.tag_if_small_enough == src_type.tag_if_small_enough)
return .ok;
// TODO: implement more of this function
return .no_match;
}
2020-04-20 21:56:30 -07:00
};
2020-04-17 21:09:43 -07:00
pub fn main() anyerror!void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
2020-04-20 21:56:30 -07:00
const allocator = if (std.builtin.link_libc) std.heap.c_allocator else &arena.allocator;
2020-04-17 21:09:43 -07:00
const args = try std.process.argsAlloc(allocator);
2020-04-17 21:09:43 -07:00
const src_path = args[1];
const debug_error_trace = true;
const source = try std.fs.cwd().readFileAllocOptions(allocator, src_path, std.math.maxInt(u32), 1, 0);
2020-04-20 21:56:30 -07:00
var zir_module = try text.parse(allocator, source);
defer zir_module.deinit(allocator);
2020-04-19 17:04:11 -07:00
2020-04-20 21:56:30 -07:00
if (zir_module.errors.len != 0) {
for (zir_module.errors) |err_msg| {
2020-04-17 21:09:43 -07:00
const loc = findLineColumn(source, err_msg.byte_offset);
std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg });
}
2020-04-17 21:09:43 -07:00
if (debug_error_trace) return error.ParseFailure;
std.process.exit(1);
}
2020-04-19 17:04:11 -07:00
2020-04-20 21:56:30 -07:00
var analyzed_module = try analyze(allocator, zir_module);
defer analyzed_module.deinit(allocator);
2020-04-19 17:04:11 -07:00
2020-04-20 21:56:30 -07:00
if (analyzed_module.errors.len != 0) {
for (analyzed_module.errors) |err_msg| {
const loc = findLineColumn(source, err_msg.byte_offset);
std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg });
}
if (debug_error_trace) return error.ParseFailure;
std.process.exit(1);
}
2020-04-20 21:56:30 -07:00
var new_zir_module = try analyzed_module.emit_zir(allocator);
defer new_zir_module.deinit(allocator);
2020-04-20 21:56:30 -07:00
new_zir_module.dump();
2020-04-17 21:09:43 -07:00
}
2020-04-17 21:09:43 -07:00
fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize, column: usize } {
var line: usize = 0;
var column: usize = 0;
for (source[0..byte_offset]) |byte| {
switch (byte) {
'\n' => {
line += 1;
column = 0;
},
else => {
column += 1;
},
}
}
2020-04-17 21:09:43 -07:00
return .{ .line = line, .column = column };
2018-07-13 18:56:38 -07:00
}
2020-04-18 21:38:56 -07:00
// Performance optimization ideas:
// * when analyzing use a field in the Inst instead of HashMap to track corresponding instructions