simplify ZIR spec; separate parsing/rendering from analysis

This commit is contained in:
Andrew Kelley 2020-04-20 19:19:35 -04:00
parent af4ccf34c1
commit cc1c2bd568
6 changed files with 906 additions and 682 deletions

View File

@ -1012,25 +1012,27 @@ pub const Dir = struct {
/// On success, caller owns returned buffer.
/// If the file is larger than `max_bytes`, returns `error.FileTooBig`.
pub fn readFileAlloc(self: Dir, allocator: *mem.Allocator, file_path: []const u8, max_bytes: usize) ![]u8 {
return self.readFileAllocAligned(allocator, file_path, max_bytes, @alignOf(u8));
return self.readFileAllocOptions(allocator, file_path, max_bytes, @alignOf(u8), null);
}
/// On success, caller owns returned buffer.
/// If the file is larger than `max_bytes`, returns `error.FileTooBig`.
pub fn readFileAllocAligned(
/// Allows specifying alignment and a sentinel value.
pub fn readFileAllocOptions(
self: Dir,
allocator: *mem.Allocator,
file_path: []const u8,
max_bytes: usize,
comptime A: u29,
) ![]align(A) u8 {
comptime alignment: u29,
comptime optional_sentinel: ?u8,
) !(if (optional_sentinel) |s| [:s]align(alignment) u8 else []align(alignment) u8) {
var file = try self.openFile(file_path, .{});
defer file.close();
const size = math.cast(usize, try file.getEndPos()) catch math.maxInt(usize);
if (size > max_bytes) return error.FileTooBig;
const buf = try allocator.alignedAlloc(u8, A, size);
const buf = try allocator.allocWithOptions(u8, size, alignment, optional_sentinel);
errdefer allocator.free(buf);
try file.inStream().readNoEof(buf);

View File

@ -105,6 +105,31 @@ pub const Allocator = struct {
return self.alignedAlloc(T, null, n);
}
pub fn allocWithOptions(
self: *Allocator,
comptime Elem: type,
n: usize,
/// null means naturally aligned
comptime optional_alignment: ?u29,
comptime optional_sentinel: ?Elem,
) Error!AllocWithOptionsPayload(Elem, optional_alignment, optional_sentinel) {
if (optional_sentinel) |sentinel| {
const ptr = try self.alignedAlloc(Elem, optional_alignment, n + 1);
ptr[n] = sentinel;
return ptr[0..n :sentinel];
} else {
return alignedAlloc(Elem, optional_alignment, n);
}
}
fn AllocWithOptionsPayload(comptime Elem: type, comptime alignment: ?u29, comptime sentinel: ?Elem) type {
if (sentinel) |s| {
return [:s]align(alignment orelse @alignOf(T)) Elem;
} else {
return []align(alignment orelse @alignOf(T)) Elem;
}
}
/// Allocates an array of `n + 1` items of type `T` and sets the first `n`
/// items to `undefined` and the last item to `sentinel`. Depending on the
/// Allocator implementation, it may be required to call `free` once the
@ -113,10 +138,10 @@ pub const Allocator = struct {
/// call `free` when done.
///
/// For allocating a single item, see `create`.
///
/// Deprecated; use `allocWithOptions`.
pub fn allocSentinel(self: *Allocator, comptime Elem: type, n: usize, comptime sentinel: Elem) Error![:sentinel]Elem {
var ptr = try self.alloc(Elem, n + 1);
ptr[n] = sentinel;
return ptr[0..n :sentinel];
return self.allocWithOptions(Elem, n, null, sentinel);
}
pub fn alignedAlloc(

View File

@ -4,41 +4,26 @@ const Allocator = std.mem.Allocator;
const Value = @import("value.zig").Value;
const Type = @import("type.zig").Type;
const assert = std.debug.assert;
const text = @import("ir/text.zig");
/// These are in-memory, analyzed instructions. See `text.Inst` for the representation
/// of instructions that correspond to the ZIR text format.
pub const Inst = struct {
tag: Tag,
pub fn ty(base: *Inst) ?Type {
switch (base.tag) {
.constant => return base.cast(Constant).?.ty,
.@"asm" => return base.cast(Assembly).?.ty,
.@"fn" => return base.cast(Fn).?.ty,
/// These names are used for the IR text format.
pub const Tag = enum {
constant,
ptrtoint,
fieldptr,
deref,
@"asm",
@"unreachable",
@"fn",
@"export",
};
.ptrtoint => return Type.initTag(.@"usize"),
.@"unreachable" => return Type.initTag(.@"noreturn"),
.@"export" => return Type.initTag(.@"void"),
.fntype, .primitive => return Type.initTag(.@"type"),
pub fn TagToType(tag: Tag) type {
return switch (tag) {
.constant => Constant,
.ptrtoint => PtrToInt,
.fieldptr => FieldPtr,
.deref => Deref,
.@"asm" => Assembly,
.@"unreachable" => Unreachable,
.@"fn" => Fn,
.@"export" => Export,
};
}
pub fn cast(base: *Inst, comptime T: type) ?*T {
const expected_tag = std.meta.fieldInfo(T, "base").default_value.?.tag;
if (base.tag != expected_tag)
return null;
return @fieldParentPtr(T, "base", base);
.fieldptr,
.deref,
=> return null,
}
}
/// This struct owns the `Value` memory. When the struct is deallocated,
@ -53,644 +38,70 @@ pub const Inst = struct {
},
kw_args: struct {},
};
pub const PtrToInt = struct {
base: Inst = Inst{ .tag = .ptrtoint },
positionals: struct {
ptr: *Inst,
},
kw_args: struct {},
};
pub const FieldPtr = struct {
base: Inst = Inst{ .tag = .fieldptr },
positionals: struct {
object_ptr: *Inst,
field_name: *Inst,
},
kw_args: struct {},
};
pub const Deref = struct {
base: Inst = Inst{ .tag = .deref },
positionals: struct {
ptr: *Inst,
},
kw_args: struct {},
};
pub const Assembly = struct {
base: Inst = Inst{ .tag = .@"asm" },
positionals: struct {
asm_source: *Inst,
},
kw_args: struct {
@"volatile": bool = false,
output: ?*Inst = null,
inputs: []*Inst = &[0]*Inst{},
clobbers: []*Inst = &[0]*Inst{},
args: []*Inst = &[0]*Inst{},
},
};
pub const Unreachable = struct {
base: Inst = Inst{ .tag = .@"unreachable" },
positionals: struct {},
kw_args: struct {},
};
pub const Fn = struct {
base: Inst = Inst{ .tag = .@"fn" },
positionals: struct {
body: Body,
},
kw_args: struct {
cc: std.builtin.CallingConvention = .Unspecified,
},
pub const Body = struct {
instructions: []*Inst,
};
};
pub const Export = struct {
base: Inst = Inst{ .tag = .@"export" },
positionals: struct {
symbol_name: *Inst,
value: *Inst,
},
kw_args: struct {},
};
};
pub const ErrorMsg = struct {
byte_offset: usize,
msg: []const u8,
};
pub const Tree = struct {
decls: []*Inst,
errors: []ErrorMsg,
pub fn deinit(self: *Tree) void {
// TODO resource deallocation
self.* = undefined;
}
/// This is a debugging utility for rendering the tree to stderr.
pub fn dump(self: Tree) void {
self.writeToStream(std.heap.page_allocator, std.io.getStdErr().outStream()) catch {};
}
const InstPtrTable = std.AutoHashMap(*Inst, struct { index: usize, fn_body: ?*Inst.Fn.Body });
pub fn writeToStream(self: Tree, allocator: *Allocator, stream: var) !void {
// First, build a map of *Inst to @ or % indexes
var inst_table = InstPtrTable.init(allocator);
defer inst_table.deinit();
try inst_table.ensureCapacity(self.decls.len);
for (self.decls) |decl, decl_i| {
try inst_table.putNoClobber(decl, .{ .index = decl_i, .fn_body = null });
if (decl.cast(Inst.Fn)) |fn_inst| {
for (fn_inst.positionals.body.instructions) |inst, inst_i| {
try inst_table.putNoClobber(inst, .{ .index = inst_i, .fn_body = &fn_inst.positionals.body });
}
}
}
for (self.decls) |decl, i| {
try stream.print("@{} ", .{i});
try self.writeInstToStream(stream, decl, &inst_table);
try stream.writeByte('\n');
}
}
fn writeInstToStream(
self: Tree,
stream: var,
decl: *Inst,
inst_table: *const InstPtrTable,
) @TypeOf(stream).Error!void {
// TODO I tried implementing this with an inline for loop and hit a compiler bug
switch (decl.tag) {
.constant => return self.writeInstToStreamGeneric(stream, .constant, decl, inst_table),
.ptrtoint => return self.writeInstToStreamGeneric(stream, .ptrtoint, decl, inst_table),
.fieldptr => return self.writeInstToStreamGeneric(stream, .fieldptr, decl, inst_table),
.deref => return self.writeInstToStreamGeneric(stream, .deref, decl, inst_table),
.@"asm" => return self.writeInstToStreamGeneric(stream, .@"asm", decl, inst_table),
.@"unreachable" => return self.writeInstToStreamGeneric(stream, .@"unreachable", decl, inst_table),
.@"fn" => return self.writeInstToStreamGeneric(stream, .@"fn", decl, inst_table),
.@"export" => return self.writeInstToStreamGeneric(stream, .@"export", decl, inst_table),
}
}
fn writeInstToStreamGeneric(
self: Tree,
stream: var,
comptime inst_tag: Inst.Tag,
base: *Inst,
inst_table: *const InstPtrTable,
) !void {
const SpecificInst = Inst.TagToType(inst_tag);
const inst = @fieldParentPtr(SpecificInst, "base", base);
if (@hasField(SpecificInst, "ty")) {
try stream.print(": {} ", .{inst.ty});
}
if (inst_tag == .constant) {
if (inst.positionals.value.cast(Value.Payload.Bytes)) |bytes_value| {
try stream.writeAll("= ");
return std.zig.renderStringLiteral(bytes_value.data, stream);
} else if (inst.positionals.value.cast(Value.Payload.Int_u64)) |v| {
return stream.print("= {}", .{v.int});
} else if (inst.positionals.value.cast(Value.Payload.Int_i64)) |v| {
return stream.print("= {}", .{v.int});
}
}
const Positionals = @TypeOf(inst.positionals);
try stream.writeAll("= " ++ @tagName(inst_tag) ++ "(");
const pos_fields = @typeInfo(Positionals).Struct.fields;
inline for (pos_fields) |arg_field, i| {
if (i != 0) {
try stream.writeAll(", ");
}
try self.writeParamToStream(stream, @field(inst.positionals, arg_field.name), inst_table);
}
comptime var need_comma = pos_fields.len != 0;
const KW_Args = @TypeOf(inst.kw_args);
inline for (@typeInfo(KW_Args).Struct.fields) |arg_field, i| {
if (need_comma) {
try stream.writeAll(",\n ");
}
if (@typeInfo(arg_field.field_type) == .Optional) {
if (@field(inst.kw_args, arg_field.name)) |non_optional| {
try stream.print("{}=", .{arg_field.name});
try self.writeParamToStream(stream, non_optional, inst_table);
need_comma = true;
}
} else {
try stream.print("{}=", .{arg_field.name});
try self.writeParamToStream(stream, @field(inst.kw_args, arg_field.name), inst_table);
need_comma = true;
}
}
try stream.writeByte(')');
}
fn writeParamToStream(self: Tree, stream: var, param: var, inst_table: *const InstPtrTable) !void {
if (@typeInfo(@TypeOf(param)) == .Enum) {
return stream.writeAll(@tagName(param));
}
switch (@TypeOf(param)) {
Value => {
try stream.print("{}", .{param});
},
*Inst => return self.writeInstParamToStream(stream, param, inst_table),
[]*Inst => {
try stream.writeByte('[');
for (param) |inst, i| {
if (i != 0) {
try stream.writeAll(", ");
}
try self.writeInstParamToStream(stream, inst, inst_table);
}
try stream.writeByte(']');
},
Inst.Fn.Body => {
try stream.writeAll("{\n");
for (param.instructions) |inst, i| {
try stream.print(" %{} ", .{i});
try self.writeInstToStream(stream, inst, inst_table);
try stream.writeByte('\n');
}
try stream.writeByte('}');
},
bool => return stream.writeByte("01"[@boolToInt(param)]),
else => |T| @compileError("unimplemented: rendering parameter of type " ++ @typeName(T)),
}
}
fn writeInstParamToStream(self: Tree, stream: var, inst: *Inst, inst_table: *const InstPtrTable) !void {
const info = inst_table.getValue(inst).?;
const prefix = if (info.fn_body == null) "@" else "%";
try stream.print("{}{}", .{ prefix, info.index });
}
};
const ParseContext = struct {
const Analyze = struct {
allocator: *Allocator,
i: usize,
source: []const u8,
old_tree: *const Module,
errors: std.ArrayList(ErrorMsg),
decls: std.ArrayList(*Inst),
global_name_map: *std.StringHashMap(usize),
const NewInst = struct {
ptr: *Inst,
};
};
pub fn parse(allocator: *Allocator, source: []const u8) Allocator.Error!Tree {
var global_name_map = std.StringHashMap(usize).init(allocator);
defer global_name_map.deinit();
var ctx: ParseContext = .{
pub fn analyze(allocator: *Allocator, old_tree: Module) !Module {
var ctx = Analyze{
.allocator = allocator,
.i = 0,
.source = source,
.old_tree = &old_tree,
.decls = std.ArrayList(*Inst).init(allocator),
.errors = std.ArrayList(ErrorMsg).init(allocator),
.global_name_map = &global_name_map,
.inst_table = std.HashMap(*Inst, Analyze.InstData).init(allocator),
};
parseRoot(&ctx) catch |err| switch (err) {
error.ParseFailure => {
defer ctx.decls.deinit();
defer ctx.errors.deinit();
defer inst_table.deinit();
analyzeRoot(&ctx) catch |err| switch (err) {
error.AnalyzeFailure => {
assert(ctx.errors.items.len != 0);
},
else => |e| return e,
};
return Tree{
return Module{
.decls = ctx.decls.toOwnedSlice(),
.errors = ctx.errors.toOwnedSlice(),
};
}
pub fn parseRoot(ctx: *ParseContext) !void {
// The IR format is designed so that it can be tokenized and parsed at the same time.
while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
';' => _ = try skipToAndOver(ctx, '\n'),
'@' => {
ctx.i += 1;
const ident = try skipToAndOver(ctx, ' ');
const opt_type = try parseOptionalType(ctx);
const inst = try parseInstruction(ctx, opt_type, null);
const ident_index = ctx.decls.items.len;
if (try ctx.global_name_map.put(ident, ident_index)) |_| {
return parseError(ctx, "redefinition of identifier '{}'", .{ident});
}
try ctx.decls.append(inst);
continue;
},
' ', '\n' => continue,
else => |byte| return parseError(ctx, "unexpected byte: '{c}'", .{byte}),
};
}
fn eatByte(ctx: *ParseContext, byte: u8) bool {
if (ctx.i >= ctx.source.len) return false;
if (ctx.source[ctx.i] != byte) return false;
ctx.i += 1;
return true;
}
fn skipSpace(ctx: *ParseContext) void {
while (ctx.i < ctx.source.len and (ctx.source[ctx.i] == ' ' or ctx.source[ctx.i] == '\n')) {
ctx.i += 1;
}
}
fn requireEatBytes(ctx: *ParseContext, bytes: []const u8) !void {
if (ctx.i + bytes.len > ctx.source.len)
return parseError(ctx, "unexpected EOF", .{});
if (!mem.eql(u8, ctx.source[ctx.i..][0..bytes.len], bytes))
return parseError(ctx, "expected '{}'", .{bytes});
ctx.i += bytes.len;
}
fn skipToAndOver(ctx: *ParseContext, byte: u8) ![]const u8 {
const start_i = ctx.i;
while (ctx.i < ctx.source.len) : (ctx.i += 1) {
if (ctx.source[ctx.i] == byte) {
const result = ctx.source[start_i..ctx.i];
ctx.i += 1;
return result;
fn analyzeRoot(ctx: *Analyze) !void {
for (old_tree.decls) |decl| {
if (decl.cast(Inst.Export)) |export_inst| {
try analyzeExport(ctx, export_inst);
}
}
return parseError(ctx, "unexpected EOF", .{});
}
fn parseError(ctx: *ParseContext, comptime format: []const u8, args: var) error{ ParseFailure, OutOfMemory } {
const msg = try std.fmt.allocPrint(ctx.allocator, format, args);
(try ctx.errors.addOne()).* = .{
.byte_offset = ctx.i,
.msg = msg,
};
return error.ParseFailure;
}
/// Regardless of whether a `Type` is returned, it skips past the '='.
fn parseOptionalType(ctx: *ParseContext) !?Type {
skipSpace(ctx);
if (eatByte(ctx, ':')) {
const type_text_untrimmed = try skipToAndOver(ctx, '=');
skipSpace(ctx);
const type_text = mem.trim(u8, type_text_untrimmed, " \n");
if (mem.eql(u8, type_text, "usize")) {
return Type.initTag(.int_usize);
} else if (mem.eql(u8, type_text, "noreturn")) {
return Type.initTag(.no_return);
} else {
return parseError(ctx, "TODO parse type '{}'", .{type_text});
}
} else {
skipSpace(ctx);
try requireEatBytes(ctx, "=");
skipSpace(ctx);
return null;
}
}
fn parseInstruction(
ctx: *ParseContext,
opt_type: ?Type,
body_ctx: ?*BodyContext,
) error{ OutOfMemory, ParseFailure }!*Inst {
switch (ctx.source[ctx.i]) {
'"' => return parseStringLiteralConst(ctx, opt_type),
'0'...'9' => return parseIntegerLiteralConst(ctx, opt_type),
else => {},
}
const fn_name = try skipToAndOver(ctx, '(');
inline for (@typeInfo(Inst.Tag).Enum.fields) |field| {
if (mem.eql(u8, field.name, fn_name)) {
const tag = @field(Inst.Tag, field.name);
return parseInstructionGeneric(ctx, field.name, Inst.TagToType(tag), opt_type, body_ctx);
}
}
return parseError(ctx, "unknown instruction '{}'", .{fn_name});
}
fn parseInstructionGeneric(
ctx: *ParseContext,
comptime fn_name: []const u8,
comptime InstType: type,
opt_type: ?Type,
body_ctx: ?*BodyContext,
) !*Inst {
const inst_specific = try ctx.allocator.create(InstType);
inst_specific.base = std.meta.fieldInfo(InstType, "base").default_value.?;
if (@hasField(InstType, "ty")) {
inst_specific.ty = opt_type orelse {
return parseError(ctx, "instruction '" ++ fn_name ++ "' requires type", .{});
};
}
const Positionals = @TypeOf(inst_specific.positionals);
inline for (@typeInfo(Positionals).Struct.fields) |arg_field| {
if (ctx.source[ctx.i] == ',') {
ctx.i += 1;
skipSpace(ctx);
} else if (ctx.source[ctx.i] == ')') {
return parseError(ctx, "expected positional parameter '{}'", .{arg_field.name});
}
@field(inst_specific.positionals, arg_field.name) = try parseParameterGeneric(
ctx,
arg_field.field_type,
body_ctx,
);
skipSpace(ctx);
}
const KW_Args = @TypeOf(inst_specific.kw_args);
inst_specific.kw_args = .{}; // assign defaults
skipSpace(ctx);
while (eatByte(ctx, ',')) {
skipSpace(ctx);
const name = try skipToAndOver(ctx, '=');
inline for (@typeInfo(KW_Args).Struct.fields) |arg_field| {
const field_name = arg_field.name;
if (mem.eql(u8, name, field_name)) {
const NonOptional = switch (@typeInfo(arg_field.field_type)) {
.Optional => |info| info.child,
else => arg_field.field_type,
};
@field(inst_specific.kw_args, field_name) = try parseParameterGeneric(ctx, NonOptional, body_ctx);
break;
}
} else {
return parseError(ctx, "unrecognized keyword parameter: '{}'", .{name});
}
skipSpace(ctx);
}
try requireEatBytes(ctx, ")");
return &inst_specific.base;
}
fn parseParameterGeneric(ctx: *ParseContext, comptime T: type, body_ctx: ?*BodyContext) !T {
if (@typeInfo(T) == .Enum) {
const start = ctx.i;
while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
' ', '\n', ',', ')' => {
const enum_name = ctx.source[start..ctx.i];
return std.meta.stringToEnum(T, enum_name) orelse {
return parseError(ctx, "tag '{}' not a member of enum '{}'", .{ enum_name, @typeName(T) });
};
},
else => continue,
};
return parseError(ctx, "unexpected EOF in enum parameter", .{});
}
switch (T) {
Inst.Fn.Body => return parseBody(ctx),
bool => {
const bool_value = switch (ctx.source[ctx.i]) {
'0' => false,
'1' => true,
else => |byte| return parseError(ctx, "expected '0' or '1' for boolean value, found {c}", .{byte}),
};
ctx.i += 1;
return bool_value;
},
[]*Inst => {
try requireEatBytes(ctx, "[");
skipSpace(ctx);
if (eatByte(ctx, ']')) return &[0]*Inst{};
var instructions = std.ArrayList(*Inst).init(ctx.allocator);
defer instructions.deinit();
while (true) {
skipSpace(ctx);
try instructions.append(try parseParameterInst(ctx, body_ctx));
skipSpace(ctx);
if (!eatByte(ctx, ',')) break;
}
try requireEatBytes(ctx, "]");
return instructions.toOwnedSlice();
},
*Inst => return parseParameterInst(ctx, body_ctx),
Value => return parseError(ctx, "TODO implement parseParameterGeneric for type Value", .{}),
else => @compileError("Unimplemented: ir parseParameterGeneric for type " ++ @typeName(T)),
}
return parseError(ctx, "TODO parse parameter {}", .{@typeName(T)});
}
fn parseParameterInst(ctx: *ParseContext, body_ctx: ?*BodyContext) !*Inst {
const local_ref = switch (ctx.source[ctx.i]) {
'@' => false,
'%' => true,
'"' => {
const str_lit_inst = try parseStringLiteralConst(ctx, null);
try ctx.decls.append(str_lit_inst);
return str_lit_inst;
},
else => |byte| return parseError(ctx, "unexpected byte: '{c}'", .{byte}),
};
const map = if (local_ref)
if (body_ctx) |bc|
&bc.name_map
else
return parseError(ctx, "referencing a % instruction in global scope", .{})
else
ctx.global_name_map;
ctx.i += 1;
const name_start = ctx.i;
while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
' ', '\n', ',', ')', ']' => break,
else => continue,
};
const ident = ctx.source[name_start..ctx.i];
const kv = map.get(ident) orelse {
const bad_name = ctx.source[name_start - 1 .. ctx.i];
ctx.i = name_start - 1;
return parseError(ctx, "unrecognized identifier: {}", .{bad_name});
};
if (local_ref) {
return body_ctx.?.instructions.items[kv.value];
} else {
return ctx.decls.items[kv.value];
}
}
const BodyContext = struct {
instructions: std.ArrayList(*Inst),
name_map: std.StringHashMap(usize),
};
fn parseBody(ctx: *ParseContext) !Inst.Fn.Body {
var body_context = BodyContext{
.instructions = std.ArrayList(*Inst).init(ctx.allocator),
.name_map = std.StringHashMap(usize).init(ctx.allocator),
};
defer body_context.instructions.deinit();
defer body_context.name_map.deinit();
try requireEatBytes(ctx, "{");
skipSpace(ctx);
while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
';' => _ = try skipToAndOver(ctx, '\n'),
'%' => {
ctx.i += 1;
const ident = try skipToAndOver(ctx, ' ');
const opt_type = try parseOptionalType(ctx);
const inst = try parseInstruction(ctx, opt_type, &body_context);
const ident_index = body_context.instructions.items.len;
if (try body_context.name_map.put(ident, ident_index)) |_| {
return parseError(ctx, "redefinition of identifier '{}'", .{ident});
}
try body_context.instructions.append(inst);
continue;
},
' ', '\n' => continue,
'}' => {
ctx.i += 1;
break;
},
else => |byte| return parseError(ctx, "unexpected byte: '{c}'", .{byte}),
fn analyzeExport(ctx: *Analyze, export_inst: *Inst.Export) !void {
const old_decl = export_inst.positionals.value;
const new_info = ctx.inst_table.get(old_exp_target) orelse blk: {
const new_decl = try analyzeDecl(ctx, old_decl);
const new_info: Analyze.NewInst = .{ .ptr = new_decl };
try ctx.inst_table.put(old_decl, new_info);
break :blk new_info;
};
return Inst.Fn.Body{
.instructions = body_context.instructions.toOwnedSlice(),
};
}
fn parseStringLiteralConst(ctx: *ParseContext, opt_type: ?Type) !*Inst {
const start = ctx.i;
ctx.i += 1; // skip over '"'
while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
'"' => {
ctx.i += 1;
const span = ctx.source[start..ctx.i];
var bad_index: usize = undefined;
const parsed = std.zig.parseStringLiteral(ctx.allocator, span, &bad_index) catch |err| switch (err) {
error.InvalidCharacter => {
ctx.i = start + bad_index;
const bad_byte = ctx.source[ctx.i];
return parseError(ctx, "invalid string literal character: '{c}'\n", .{bad_byte});
},
else => |e| return e,
};
const const_inst = try ctx.allocator.create(Inst.Constant);
errdefer ctx.allocator.destroy(const_inst);
const bytes_payload = try ctx.allocator.create(Value.Payload.Bytes);
errdefer ctx.allocator.destroy(bytes_payload);
bytes_payload.* = .{ .data = parsed };
const ty = opt_type orelse blk: {
const array_payload = try ctx.allocator.create(Type.Payload.Array_u8_Sentinel0);
errdefer ctx.allocator.destroy(array_payload);
array_payload.* = .{ .len = parsed.len };
const ty_payload = try ctx.allocator.create(Type.Payload.SingleConstPointer);
errdefer ctx.allocator.destroy(ty_payload);
ty_payload.* = .{ .pointee_type = Type.initPayload(&array_payload.base) };
break :blk Type.initPayload(&ty_payload.base);
};
const_inst.* = .{
.ty = ty,
.positionals = .{ .value = Value.initPayload(&bytes_payload.base) },
.kw_args = .{},
};
return &const_inst.base;
},
'\\' => {
ctx.i += 1;
if (ctx.i >= ctx.source.len) break;
continue;
},
else => continue,
};
return parseError(ctx, "unexpected EOF in string literal", .{});
}
fn parseIntegerLiteralConst(ctx: *ParseContext, opt_type: ?Type) !*Inst {
const start = ctx.i;
while (ctx.i < ctx.source.len) : (ctx.i += 1) switch (ctx.source[ctx.i]) {
'0'...'9' => continue,
else => break,
};
const number_text = ctx.source[start..ctx.i];
const number = std.fmt.parseInt(u64, number_text, 10) catch |err| switch (err) {
error.Overflow => return parseError(ctx, "TODO handle big integers", .{}),
error.InvalidCharacter => return parseError(ctx, "invalid integer literal", .{}),
};
const int_payload = try ctx.allocator.create(Value.Payload.Int_u64);
errdefer ctx.allocator.destroy(int_payload);
int_payload.* = .{ .int = number };
const const_inst = try ctx.allocator.create(Inst.Constant);
errdefer ctx.allocator.destroy(const_inst);
const_inst.* = .{
.ty = opt_type orelse Type.initTag(.int_comptime),
.positionals = .{ .value = Value.initPayload(&int_payload.base) },
.kw_args = .{},
};
return &const_inst.base;
//const exp_type = new_info.ptr.ty();
//switch (exp_type.zigTypeTag()) {
// .Fn => {
// if () |kv| {
// kv.value
// }
// return analyzeExportFn(ctx, exp_target.cast(Inst.,
// },
// else => return ctx.fail("unable to export type '{}'", .{exp_type}),
//}
}
pub fn main() anyerror!void {
@ -703,9 +114,9 @@ pub fn main() anyerror!void {
const src_path = args[1];
const debug_error_trace = true;
const source = try std.fs.cwd().readFileAlloc(allocator, src_path, std.math.maxInt(u32));
const source = try std.fs.cwd().readFileAllocOptions(allocator, src_path, std.math.maxInt(u32), 1, 0);
var tree = try parse(allocator, source);
var tree = try text.parse(allocator, source);
defer tree.deinit();
if (tree.errors.len != 0) {
@ -719,8 +130,19 @@ pub fn main() anyerror!void {
tree.dump();
//const new_tree = try semanticallyAnalyze(tree);
//const new_tree = try analyze(allocator, tree);
//defer new_tree.deinit();
//if (new_tree.errors.len != 0) {
// for (new_tree.errors) |err_msg| {
// const loc = findLineColumn(source, err_msg.byte_offset);
// std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg });
// }
// if (debug_error_trace) return error.ParseFailure;
// std.process.exit(1);
//}
//new_tree.dump();
}
fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize, column: usize } {
@ -741,4 +163,4 @@ fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize,
}
// Performance optimization ideas:
// * make the source code sentinel-terminated, so that all the checks against the length can be skipped
// * when analyzing use a field in the Inst instead of HashMap to track corresponding instructions

712
src-self-hosted/ir/text.zig Normal file
View File

@ -0,0 +1,712 @@
//! This file has to do with parsing and rendering the ZIR text format.
const std = @import("std");
const mem = std.mem;
const Allocator = std.mem.Allocator;
const Value = @import("../value.zig").Value;
const assert = std.debug.assert;
const ir = @import("../ir.zig");
const BigInt = std.math.big.Int;
/// These are instructions that correspond to the ZIR text format. See `ir.Inst` for
/// in-memory, analyzed instructions with types and values.
pub const Inst = struct {
tag: Tag,
/// These names are used directly as the instruction names in the text format.
pub const Tag = enum {
str,
int,
ptrtoint,
fieldptr,
deref,
as,
@"asm",
@"unreachable",
@"fn",
@"export",
primitive,
fntype,
};
pub fn TagToType(tag: Tag) type {
return switch (tag) {
.str => Str,
.int => Int,
.ptrtoint => PtrToInt,
.fieldptr => FieldPtr,
.deref => Deref,
.as => As,
.@"asm" => Assembly,
.@"unreachable" => Unreachable,
.@"fn" => Fn,
.@"export" => Export,
.primitive => Primitive,
.fntype => FnType,
};
}
pub fn cast(base: *Inst, comptime T: type) ?*T {
const expected_tag = std.meta.fieldInfo(T, "base").default_value.?.tag;
if (base.tag != expected_tag)
return null;
return @fieldParentPtr(T, "base", base);
}
pub const Str = struct {
base: Inst = Inst{ .tag = .str },
positionals: struct {
bytes: []u8,
},
kw_args: struct {},
};
pub const Int = struct {
base: Inst = Inst{ .tag = .int },
positionals: struct {
int: BigInt,
},
kw_args: struct {},
};
pub const PtrToInt = struct {
base: Inst = Inst{ .tag = .ptrtoint },
positionals: struct {
ptr: *Inst,
},
kw_args: struct {},
};
pub const FieldPtr = struct {
base: Inst = Inst{ .tag = .fieldptr },
positionals: struct {
object_ptr: *Inst,
field_name: *Inst,
},
kw_args: struct {},
};
pub const Deref = struct {
base: Inst = Inst{ .tag = .deref },
positionals: struct {
ptr: *Inst,
},
kw_args: struct {},
};
pub const As = struct {
base: Inst = Inst{ .tag = .as },
positionals: struct {
dest_type: *Inst,
value: *Inst,
},
kw_args: struct {},
};
pub const Assembly = struct {
base: Inst = Inst{ .tag = .@"asm" },
positionals: struct {
asm_source: *Inst,
return_type: *Inst,
},
kw_args: struct {
@"volatile": bool = false,
output: ?*Inst = null,
inputs: []*Inst = &[0]*Inst{},
clobbers: []*Inst = &[0]*Inst{},
args: []*Inst = &[0]*Inst{},
},
};
pub const Unreachable = struct {
base: Inst = Inst{ .tag = .@"unreachable" },
positionals: struct {},
kw_args: struct {},
};
pub const Fn = struct {
base: Inst = Inst{ .tag = .@"fn" },
positionals: struct {
fn_type: *Inst,
body: Body,
},
kw_args: struct {},
pub const Body = struct {
instructions: []*Inst,
};
};
pub const Export = struct {
base: Inst = Inst{ .tag = .@"export" },
positionals: struct {
symbol_name: *Inst,
value: *Inst,
},
kw_args: struct {},
};
pub const Primitive = struct {
base: Inst = Inst{ .tag = .primitive },
positionals: struct {
tag: BuiltinType,
},
kw_args: struct {},
pub const BuiltinType = enum {
@"isize",
@"usize",
@"c_short",
@"c_ushort",
@"c_int",
@"c_uint",
@"c_long",
@"c_ulong",
@"c_longlong",
@"c_ulonglong",
@"c_longdouble",
@"c_void",
@"f16",
@"f32",
@"f64",
@"f128",
@"bool",
@"void",
@"noreturn",
@"type",
@"anyerror",
@"comptime_int",
@"comptime_float",
};
};
pub const FnType = struct {
base: Inst = Inst{ .tag = .fntype },
positionals: struct {
param_types: []*Inst,
return_type: *Inst,
},
kw_args: struct {
cc: std.builtin.CallingConvention = .Unspecified,
},
};
};
pub const ErrorMsg = struct {
byte_offset: usize,
msg: []const u8,
};
pub const Module = struct {
decls: []*Inst,
errors: []ErrorMsg,
pub fn deinit(self: *Module) void {
// TODO resource deallocation
self.* = undefined;
}
/// This is a debugging utility for rendering the tree to stderr.
pub fn dump(self: Module) void {
self.writeToStream(std.heap.page_allocator, std.io.getStdErr().outStream()) catch {};
}
const InstPtrTable = std.AutoHashMap(*Inst, struct { index: usize, fn_body: ?*Inst.Fn.Body });
pub fn writeToStream(self: Module, allocator: *Allocator, stream: var) !void {
// First, build a map of *Inst to @ or % indexes
var inst_table = InstPtrTable.init(allocator);
defer inst_table.deinit();
try inst_table.ensureCapacity(self.decls.len);
for (self.decls) |decl, decl_i| {
try inst_table.putNoClobber(decl, .{ .index = decl_i, .fn_body = null });
if (decl.cast(Inst.Fn)) |fn_inst| {
for (fn_inst.positionals.body.instructions) |inst, inst_i| {
try inst_table.putNoClobber(inst, .{ .index = inst_i, .fn_body = &fn_inst.positionals.body });
}
}
}
for (self.decls) |decl, i| {
try stream.print("@{} ", .{i});
try self.writeInstToStream(stream, decl, &inst_table);
try stream.writeByte('\n');
}
}
fn writeInstToStream(
self: Module,
stream: var,
decl: *Inst,
inst_table: *const InstPtrTable,
) @TypeOf(stream).Error!void {
// TODO I tried implementing this with an inline for loop and hit a compiler bug
switch (decl.tag) {
.str => return self.writeInstToStreamGeneric(stream, .str, decl, inst_table),
.int => return self.writeInstToStreamGeneric(stream, .int, decl, inst_table),
.ptrtoint => return self.writeInstToStreamGeneric(stream, .ptrtoint, decl, inst_table),
.fieldptr => return self.writeInstToStreamGeneric(stream, .fieldptr, decl, inst_table),
.deref => return self.writeInstToStreamGeneric(stream, .deref, decl, inst_table),
.as => return self.writeInstToStreamGeneric(stream, .as, decl, inst_table),
.@"asm" => return self.writeInstToStreamGeneric(stream, .@"asm", decl, inst_table),
.@"unreachable" => return self.writeInstToStreamGeneric(stream, .@"unreachable", decl, inst_table),
.@"fn" => return self.writeInstToStreamGeneric(stream, .@"fn", decl, inst_table),
.@"export" => return self.writeInstToStreamGeneric(stream, .@"export", decl, inst_table),
.primitive => return self.writeInstToStreamGeneric(stream, .primitive, decl, inst_table),
.fntype => return self.writeInstToStreamGeneric(stream, .fntype, decl, inst_table),
}
}
fn writeInstToStreamGeneric(
self: Module,
stream: var,
comptime inst_tag: Inst.Tag,
base: *Inst,
inst_table: *const InstPtrTable,
) !void {
const SpecificInst = Inst.TagToType(inst_tag);
const inst = @fieldParentPtr(SpecificInst, "base", base);
const Positionals = @TypeOf(inst.positionals);
try stream.writeAll("= " ++ @tagName(inst_tag) ++ "(");
const pos_fields = @typeInfo(Positionals).Struct.fields;
inline for (pos_fields) |arg_field, i| {
if (i != 0) {
try stream.writeAll(", ");
}
try self.writeParamToStream(stream, @field(inst.positionals, arg_field.name), inst_table);
}
comptime var need_comma = pos_fields.len != 0;
const KW_Args = @TypeOf(inst.kw_args);
inline for (@typeInfo(KW_Args).Struct.fields) |arg_field, i| {
if (need_comma) {
try stream.writeAll(", ");
}
if (@typeInfo(arg_field.field_type) == .Optional) {
if (@field(inst.kw_args, arg_field.name)) |non_optional| {
try stream.print("{}=", .{arg_field.name});
try self.writeParamToStream(stream, non_optional, inst_table);
need_comma = true;
}
} else {
try stream.print("{}=", .{arg_field.name});
try self.writeParamToStream(stream, @field(inst.kw_args, arg_field.name), inst_table);
need_comma = true;
}
}
try stream.writeByte(')');
}
fn writeParamToStream(self: Module, stream: var, param: var, inst_table: *const InstPtrTable) !void {
if (@typeInfo(@TypeOf(param)) == .Enum) {
return stream.writeAll(@tagName(param));
}
switch (@TypeOf(param)) {
Value => return stream.print("{}", .{param}),
*Inst => return self.writeInstParamToStream(stream, param, inst_table),
[]*Inst => {
try stream.writeByte('[');
for (param) |inst, i| {
if (i != 0) {
try stream.writeAll(", ");
}
try self.writeInstParamToStream(stream, inst, inst_table);
}
try stream.writeByte(']');
},
Inst.Fn.Body => {
try stream.writeAll("{\n");
for (param.instructions) |inst, i| {
try stream.print(" %{} ", .{i});
try self.writeInstToStream(stream, inst, inst_table);
try stream.writeByte('\n');
}
try stream.writeByte('}');
},
bool => return stream.writeByte("01"[@boolToInt(param)]),
[]u8 => return std.zig.renderStringLiteral(param, stream),
BigInt => return stream.print("{}", .{param}),
else => |T| @compileError("unimplemented: rendering parameter of type " ++ @typeName(T)),
}
}
fn writeInstParamToStream(self: Module, stream: var, inst: *Inst, inst_table: *const InstPtrTable) !void {
const info = inst_table.getValue(inst).?;
const prefix = if (info.fn_body == null) "@" else "%";
try stream.print("{}{}", .{ prefix, info.index });
}
};
pub fn parse(allocator: *Allocator, source: [:0]const u8) Allocator.Error!Module {
var global_name_map = std.StringHashMap(usize).init(allocator);
defer global_name_map.deinit();
var parser: Parser = .{
.allocator = allocator,
.i = 0,
.source = source,
.decls = std.ArrayList(*Inst).init(allocator),
.errors = std.ArrayList(ErrorMsg).init(allocator),
.global_name_map = &global_name_map,
};
parser.parseRoot() catch |err| switch (err) {
error.ParseFailure => {
assert(parser.errors.items.len != 0);
},
else => |e| return e,
};
return Module{
.decls = parser.decls.toOwnedSlice(),
.errors = parser.errors.toOwnedSlice(),
};
}
const Parser = struct {
allocator: *Allocator,
i: usize,
source: [:0]const u8,
errors: std.ArrayList(ErrorMsg),
decls: std.ArrayList(*Inst),
global_name_map: *std.StringHashMap(usize),
const Body = struct {
instructions: std.ArrayList(*Inst),
name_map: std.StringHashMap(usize),
};
fn parseBody(self: *Parser) !Inst.Fn.Body {
var body_context = Body{
.instructions = std.ArrayList(*Inst).init(self.allocator),
.name_map = std.StringHashMap(usize).init(self.allocator),
};
defer body_context.instructions.deinit();
defer body_context.name_map.deinit();
try requireEatBytes(self, "{");
skipSpace(self);
while (true) : (self.i += 1) switch (self.source[self.i]) {
';' => _ = try skipToAndOver(self, '\n'),
'%' => {
self.i += 1;
const ident = try skipToAndOver(self, ' ');
skipSpace(self);
try requireEatBytes(self, "=");
skipSpace(self);
const inst = try parseInstruction(self, &body_context);
const ident_index = body_context.instructions.items.len;
if (try body_context.name_map.put(ident, ident_index)) |_| {
return self.fail("redefinition of identifier '{}'", .{ident});
}
try body_context.instructions.append(inst);
continue;
},
' ', '\n' => continue,
'}' => {
self.i += 1;
break;
},
else => |byte| return self.failByte(byte),
};
return Inst.Fn.Body{
.instructions = body_context.instructions.toOwnedSlice(),
};
}
fn parseStringLiteral(self: *Parser) ![]u8 {
const start = self.i;
try self.requireEatBytes("\"");
while (true) : (self.i += 1) switch (self.source[self.i]) {
'"' => {
self.i += 1;
const span = self.source[start..self.i];
var bad_index: usize = undefined;
const parsed = std.zig.parseStringLiteral(self.allocator, span, &bad_index) catch |err| switch (err) {
error.InvalidCharacter => {
self.i = start + bad_index;
const bad_byte = self.source[self.i];
return self.fail("invalid string literal character: '{c}'\n", .{bad_byte});
},
else => |e| return e,
};
return parsed;
},
'\\' => {
self.i += 1;
continue;
},
0 => return self.failByte(0),
else => continue,
};
}
fn parseIntegerLiteral(self: *Parser) !BigInt {
const start = self.i;
if (self.source[self.i] == '-') self.i += 1;
while (true) : (self.i += 1) switch (self.source[self.i]) {
'0'...'9' => continue,
else => break,
};
const number_text = self.source[start..self.i];
var result = try BigInt.init(self.allocator);
result.setString(10, number_text) catch |err| {
self.i = start;
switch (err) {
error.InvalidBase => unreachable,
error.InvalidCharForDigit => return self.fail("invalid digit in integer literal", .{}),
error.DigitTooLargeForBase => return self.fail("digit too large in integer literal", .{}),
else => |e| return e,
}
};
return result;
}
fn parseRoot(self: *Parser) !void {
// The IR format is designed so that it can be tokenized and parsed at the same time.
while (true) : (self.i += 1) switch (self.source[self.i]) {
';' => _ = try skipToAndOver(self, '\n'),
'@' => {
self.i += 1;
const ident = try skipToAndOver(self, ' ');
skipSpace(self);
try requireEatBytes(self, "=");
skipSpace(self);
const inst = try parseInstruction(self, null);
const ident_index = self.decls.items.len;
if (try self.global_name_map.put(ident, ident_index)) |_| {
return self.fail("redefinition of identifier '{}'", .{ident});
}
try self.decls.append(inst);
continue;
},
' ', '\n' => continue,
0 => break,
else => |byte| return self.fail("unexpected byte: '{c}'", .{byte}),
};
}
fn eatByte(self: *Parser, byte: u8) bool {
if (self.source[self.i] != byte) return false;
self.i += 1;
return true;
}
fn skipSpace(self: *Parser) void {
while (self.source[self.i] == ' ' or self.source[self.i] == '\n') {
self.i += 1;
}
}
fn requireEatBytes(self: *Parser, bytes: []const u8) !void {
const start = self.i;
for (bytes) |byte| {
if (self.source[self.i] != byte) {
self.i = start;
return self.fail("expected '{}'", .{bytes});
}
self.i += 1;
}
}
fn skipToAndOver(self: *Parser, byte: u8) ![]const u8 {
const start_i = self.i;
while (self.source[self.i] != 0) : (self.i += 1) {
if (self.source[self.i] == byte) {
const result = self.source[start_i..self.i];
self.i += 1;
return result;
}
}
return self.fail("unexpected EOF", .{});
}
/// ParseFailure is an internal error code; handled in `parse`.
const InnerError = error{ ParseFailure, OutOfMemory };
fn failByte(self: *Parser, byte: u8) InnerError {
if (byte == 0) {
return self.fail("unexpected EOF", .{});
} else {
return self.fail("unexpected byte: '{c}'", .{byte});
}
}
fn fail(self: *Parser, comptime format: []const u8, args: var) InnerError {
@setCold(true);
const msg = try std.fmt.allocPrint(self.allocator, format, args);
(try self.errors.addOne()).* = .{
.byte_offset = self.i,
.msg = msg,
};
return error.ParseFailure;
}
fn parseInstruction(self: *Parser, body_ctx: ?*Body) InnerError!*Inst {
const fn_name = try skipToAndOver(self, '(');
inline for (@typeInfo(Inst.Tag).Enum.fields) |field| {
if (mem.eql(u8, field.name, fn_name)) {
const tag = @field(Inst.Tag, field.name);
return parseInstructionGeneric(self, field.name, Inst.TagToType(tag), body_ctx);
}
}
return self.fail("unknown instruction '{}'", .{fn_name});
}
fn parseInstructionGeneric(
self: *Parser,
comptime fn_name: []const u8,
comptime InstType: type,
body_ctx: ?*Body,
) !*Inst {
const inst_specific = try self.allocator.create(InstType);
inst_specific.base = std.meta.fieldInfo(InstType, "base").default_value.?;
if (@hasField(InstType, "ty")) {
inst_specific.ty = opt_type orelse {
return self.fail("instruction '" ++ fn_name ++ "' requires type", .{});
};
}
const Positionals = @TypeOf(inst_specific.positionals);
inline for (@typeInfo(Positionals).Struct.fields) |arg_field| {
if (self.source[self.i] == ',') {
self.i += 1;
skipSpace(self);
} else if (self.source[self.i] == ')') {
return self.fail("expected positional parameter '{}'", .{arg_field.name});
}
@field(inst_specific.positionals, arg_field.name) = try parseParameterGeneric(
self,
arg_field.field_type,
body_ctx,
);
skipSpace(self);
}
const KW_Args = @TypeOf(inst_specific.kw_args);
inst_specific.kw_args = .{}; // assign defaults
skipSpace(self);
while (eatByte(self, ',')) {
skipSpace(self);
const name = try skipToAndOver(self, '=');
inline for (@typeInfo(KW_Args).Struct.fields) |arg_field| {
const field_name = arg_field.name;
if (mem.eql(u8, name, field_name)) {
const NonOptional = switch (@typeInfo(arg_field.field_type)) {
.Optional => |info| info.child,
else => arg_field.field_type,
};
@field(inst_specific.kw_args, field_name) = try parseParameterGeneric(self, NonOptional, body_ctx);
break;
}
} else {
return self.fail("unrecognized keyword parameter: '{}'", .{name});
}
skipSpace(self);
}
try requireEatBytes(self, ")");
return &inst_specific.base;
}
fn parseParameterGeneric(self: *Parser, comptime T: type, body_ctx: ?*Body) !T {
if (@typeInfo(T) == .Enum) {
const start = self.i;
while (true) : (self.i += 1) switch (self.source[self.i]) {
' ', '\n', ',', ')' => {
const enum_name = self.source[start..self.i];
return std.meta.stringToEnum(T, enum_name) orelse {
return self.fail("tag '{}' not a member of enum '{}'", .{ enum_name, @typeName(T) });
};
},
0 => return self.failByte(0),
else => continue,
};
}
switch (T) {
Inst.Fn.Body => return parseBody(self),
bool => {
const bool_value = switch (self.source[self.i]) {
'0' => false,
'1' => true,
else => |byte| return self.fail("expected '0' or '1' for boolean value, found {c}", .{byte}),
};
self.i += 1;
return bool_value;
},
[]*Inst => {
try requireEatBytes(self, "[");
skipSpace(self);
if (eatByte(self, ']')) return &[0]*Inst{};
var instructions = std.ArrayList(*Inst).init(self.allocator);
defer instructions.deinit();
while (true) {
skipSpace(self);
try instructions.append(try parseParameterInst(self, body_ctx));
skipSpace(self);
if (!eatByte(self, ',')) break;
}
try requireEatBytes(self, "]");
return instructions.toOwnedSlice();
},
*Inst => return parseParameterInst(self, body_ctx),
Value => return self.fail("TODO implement parseParameterGeneric for type Value", .{}),
[]u8 => return self.parseStringLiteral(),
BigInt => return self.parseIntegerLiteral(),
else => @compileError("Unimplemented: ir parseParameterGeneric for type " ++ @typeName(T)),
}
return self.fail("TODO parse parameter {}", .{@typeName(T)});
}
fn parseParameterInst(self: *Parser, body_ctx: ?*Body) !*Inst {
const local_ref = switch (self.source[self.i]) {
'@' => false,
'%' => true,
else => |byte| return self.fail("unexpected byte: '{c}'", .{byte}),
};
const map = if (local_ref)
if (body_ctx) |bc|
&bc.name_map
else
return self.fail("referencing a % instruction in global scope", .{})
else
self.global_name_map;
self.i += 1;
const name_start = self.i;
while (true) : (self.i += 1) switch (self.source[self.i]) {
0, ' ', '\n', ',', ')', ']' => break,
else => continue,
};
const ident = self.source[name_start..self.i];
const kv = map.get(ident) orelse {
const bad_name = self.source[name_start - 1 .. self.i];
self.i = name_start - 1;
return self.fail("unrecognized identifier: {}", .{bad_name});
};
if (local_ref) {
return body_ctx.?.instructions.items[kv.value];
} else {
return self.decls.items[kv.value];
}
}
};

View File

@ -18,7 +18,7 @@ pub const Type = extern union {
pub fn zigTypeTag(self: Type) std.builtin.TypeId {
switch (self.tag()) {
.int_u8, .int_usize => return .Int,
.@"u8", .@"usize" => return .Int,
.array_u8, .array_u8_sentinel_0 => return .Array,
.single_const_pointer => return .Pointer,
}
@ -52,10 +52,35 @@ pub const Type = extern union {
var ty = self;
while (true) {
switch (ty.tag()) {
.no_return => return out_stream.writeAll("noreturn"),
.int_comptime => return out_stream.writeAll("comptime_int"),
.int_u8 => return out_stream.writeAll("u8"),
.int_usize => return out_stream.writeAll("usize"),
@"u8",
@"i8",
@"isize",
@"usize",
@"noreturn",
@"void",
@"c_short",
@"c_ushort",
@"c_int",
@"c_uint",
@"c_long",
@"c_ulong",
@"c_longlong",
@"c_ulonglong",
@"c_longdouble",
@"c_void",
@"f16",
@"f32",
@"f64",
@"f128",
@"bool",
@"void",
@"type",
@"anyerror",
@"comptime_int",
@"comptime_float",
@"noreturn",
=> |t| return out_stream.writeAll(@tagName(t)),
.array_u8_sentinel_0 => {
const payload = @fieldParentPtr(Payload.Array_u8_Sentinel0, "base", ty.ptr_otherwise);
return out_stream.print("[{}:0]u8", .{payload.len});
@ -85,17 +110,38 @@ pub const Type = extern union {
/// See `zigTypeTag` for the function that corresponds to `std.builtin.TypeId`.
pub const Tag = enum {
// The first section of this enum are tags that require no payload.
no_return,
int_comptime,
int_u8,
int_usize, // See last_no_payload_tag below.
@"u8",
@"i8",
@"isize",
@"usize",
@"c_short",
@"c_ushort",
@"c_int",
@"c_uint",
@"c_long",
@"c_ulong",
@"c_longlong",
@"c_ulonglong",
@"c_longdouble",
@"c_void",
@"f16",
@"f32",
@"f64",
@"f128",
@"bool",
@"void",
@"type",
@"anyerror",
@"comptime_int",
@"comptime_float",
@"noreturn", // See last_no_payload_tag below.
// After this, the tag requires a payload.
array_u8_sentinel_0,
array,
single_const_pointer,
pub const last_no_payload_tag = Tag.int_usize;
pub const last_no_payload_tag = Tag.@"noreturn";
pub const no_payload_count = @enumToInt(last_no_payload_tag) + 1;
};

View File

@ -1,33 +1,50 @@
test "hello world IR" {
exeCmp(
\\@0 = "Hello, world!\n"
\\@0 = str("Hello, world!\n")
\\@1 = primitive(void)
\\@2 = primitive(usize)
\\@3 = fntype([], @1, cc=Naked)
\\@4 = int(0)
\\@5 = int(1)
\\@6 = int(231)
\\@7 = str("len")
\\
\\@1 = fn({
\\ %0 : usize = 1 ;SYS_write
\\ %1 : usize = 1 ;STDOUT_FILENO
\\@8 = fn(@3, {
\\ %0 = as(@2, @5) ; SYS_write
\\ %1 = as(@2, @5) ; STDOUT_FILENO
\\ %2 = ptrtoint(@0) ; msg ptr
\\ %3 = fieldptr(@0, "len") ; msg len ptr
\\ %3 = fieldptr(@0, @7) ; msg len ptr
\\ %4 = deref(%3) ; msg len
\\ %5 = asm("syscall",
\\ %sysoutreg = str("={rax}")
\\ %rax = str("{rax}")
\\ %rdi = str("{rdi}")
\\ %rsi = str("{rsi}")
\\ %rdx = str("{rdx}")
\\ %rcx = str("rcx")
\\ %r11 = str("r11")
\\ %memory = str("memory")
\\ %syscall = str("syscall")
\\ %5 = asm(%syscall, @2,
\\ volatile=1,
\\ output="={rax}",
\\ inputs=["{rax}", "{rdi}", "{rsi}", "{rdx}"],
\\ clobbers=["rcx", "r11", "memory"],
\\ output=%sysoutreg,
\\ inputs=[%rax, %rdi, %rsi, %rdx],
\\ clobbers=[%rcx, %r11, %memory],
\\ args=[%0, %1, %2, %4])
\\
\\ %6 : usize = 231 ;SYS_exit_group
\\ %7 : usize = 0 ;exit code
\\ %8 = asm("syscall",
\\ %6 = as(@2, @6) ;SYS_exit_group
\\ %7 = as(@2, @4) ;exit code
\\ %8 = asm(%syscall, @2,
\\ volatile=1,
\\ output="={rax}",
\\ inputs=["{rax}", "{rdi}"],
\\ clobbers=["rcx", "r11", "memory"],
\\ output=%sysoutreg,
\\ inputs=[%rax, %rdi],
\\ clobbers=[%rcx, %r11, %memory],
\\ args=[%6, %7])
\\
\\ %9 = unreachable()
\\}, cc=naked)
\\})
\\
\\@2 = export("_start", @1)
\\@9 = str("_start")
\\@10 = export(@9, @8)
,
\\Hello, world!
\\