From 8d812dba30fe3c17e9d64607fdf813977eaf0496 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 19 May 2020 13:33:36 -0400 Subject: [PATCH] stage2: set up a trampoline table for functions However there does not appear to be an x86 encoding for calling an immediate address. So there's no point of setting this up. We should just emit an indirect call to the got addr. --- src-self-hosted/Module.zig | 17 ++-- src-self-hosted/codegen.zig | 35 +++++++- src-self-hosted/link.zig | 165 +++++++++++++++++++++++++++++++----- 3 files changed, 183 insertions(+), 34 deletions(-) diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig index 1ff551ecb..da826a6e9 100644 --- a/src-self-hosted/Module.zig +++ b/src-self-hosted/Module.zig @@ -231,6 +231,7 @@ pub const Fn = struct { dependency_failure, success: Body, }, + owner_decl: *Decl, /// This memory is temporary and points to stack memory for the duration /// of Fn analysis. @@ -883,14 +884,6 @@ fn resolveDecl( }; const arena_state = try decl_scope.arena.allocator.create(std.heap.ArenaAllocator.State); - const has_codegen_bits = typed_value.ty.hasCodeGenBits(); - if (has_codegen_bits) { - // We don't fully codegen the decl until later, but we do need to reserve a global - // offset table index for it. This allows us to codegen decls out of dependency order, - // increasing how many computations can be done in parallel. - try self.bin_file.allocateDeclIndexes(new_decl); - } - arena_state.* = decl_scope.arena.state; new_decl.typed_value = .{ @@ -900,7 +893,12 @@ fn resolveDecl( }, }; new_decl.analysis = .complete; - if (has_codegen_bits) { + if (typed_value.ty.hasCodeGenBits()) { + // We don't fully codegen the decl until later, but we do need to reserve a global + // offset table index for it. This allows us to codegen decls out of dependency order, + // increasing how many computations can be done in parallel. + try self.bin_file.allocateDeclIndexes(new_decl); + // We ensureCapacity when scanning for decls. self.work_queue.writeItemAssumeCapacity(.{ .codegen_decl = new_decl }); } @@ -1329,6 +1327,7 @@ fn analyzeInstFn(self: *Module, scope: *Scope, fn_inst: *zir.Inst.Fn) InnerError new_func.* = .{ .fn_type = fn_type, .analysis = .{ .queued = fn_inst }, + .owner_decl = scope.decl(), }; const fn_payload = try scope.arena().create(Value.Payload.Function); fn_payload.* = .{ .func = new_func }; diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 08a7b29ca..6d0c0cee7 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -19,6 +19,24 @@ pub const Result = union(enum) { fail: *Module.ErrorMsg, }; +pub fn pltEntrySize(target: Target) u16 { + return switch (target.cpu.arch) { + .i386, .x86_64 => 5, + else => @panic("TODO implement pltEntrySize for more architectures"), + }; +} + +pub fn writePltEntry(target: Target, buf: []u8, addr: u32) void { + switch (target.cpu.arch) { + .i386, .x86_64 => { + // 9a xx xx xx xx call addr + buf[0] = 0x9a; + mem.writeIntLittle(u32, buf[1..5], addr); + }, + else => @panic("TODO implement pltEntrySize for more architectures"), + } +} + pub fn generateSymbol( bin_file: *link.ElfFile, src: usize, @@ -203,7 +221,20 @@ const Function = struct { if (func_inst.val.cast(Value.Payload.Function)) |func_val| { const func = func_val.func; - return self.fail(inst.base.src, "TODO implement calling function", .{}); + const plt_index = func.owner_decl.link.offset_table_index.plt; + const plt = &self.bin_file.program_headers.items[self.bin_file.phdr_got_plt_index.?]; + const plt_entry_size = pltEntrySize(self.target.*); + const plt_addr = @intCast(u32, plt.p_vaddr + func.owner_decl.link.offset_table_index.plt * plt_entry_size); + // ea xx xx xx xx jmp addr + try self.code.resize(self.code.items.len + 5); + self.code.items[self.code.items.len - 5] = 0xea; + mem.writeIntLittle(u32, self.code.items[self.code.items.len - 4 ..][0..4], plt_addr); + const return_type = func.fn_type.fnReturnType(); + switch (return_type.zigTypeTag()) { + .Void => return MCValue{ .none = {} }, + .NoReturn => return MCValue{ .unreach = {} }, + else => return self.fail(inst.base.src, "TODO implement fn call with non-void return value", .{}), + } } else { return self.fail(inst.base.src, "TODO implement calling weird function values", .{}); } @@ -575,7 +606,7 @@ const Function = struct { if (typed_value.val.cast(Value.Payload.DeclRef)) |payload| { const got = &self.bin_file.program_headers.items[self.bin_file.phdr_got_index.?]; const decl = payload.decl; - const got_addr = got.p_vaddr + decl.link.offset_table_index * ptr_bytes; + const got_addr = got.p_vaddr + decl.link.offset_table_index.got * ptr_bytes; return MCValue{ .memory = got_addr }; } return self.fail(src, "TODO codegen more kinds of const pointers", .{}); diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index a0c7ec849..9f376252d 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -110,6 +110,7 @@ pub const ElfFile = struct { /// The index into the program headers of the global offset table. /// It needs PT_LOAD and Read flags. phdr_got_index: ?u16 = null, + phdr_got_plt_index: ?u16 = null, entry_addr: ?u64 = null, shstrtab: std.ArrayListUnmanaged(u8) = std.ArrayListUnmanaged(u8){}, @@ -118,6 +119,7 @@ pub const ElfFile = struct { text_section_index: ?u16 = null, symtab_section_index: ?u16 = null, got_section_index: ?u16 = null, + got_plt_section_index: ?u16 = null, /// The same order as in the file. ELF requires global symbols to all be after the /// local symbols, they cannot be mixed. So we must buffer all the global symbols and @@ -130,11 +132,16 @@ pub const ElfFile = struct { /// If the vaddr of the executable program header changes, the entire /// offset table needs to be rewritten. offset_table: std.ArrayListUnmanaged(u64) = std.ArrayListUnmanaged(u64){}, + /// Same order as in the file. The value is the absolute vaddr value. + /// If the vaddr of the executable program header changes, the entire + /// fn trampoline table needs to be rewritten. + fn_trampoline_table: std.ArrayListUnmanaged(u64) = std.ArrayListUnmanaged(u64){}, phdr_table_dirty: bool = false, shdr_table_dirty: bool = false, shstrtab_dirty: bool = false, offset_table_count_dirty: bool = false, + fn_trampoline_table_count_dirty: bool = false, error_flags: ErrorFlags = ErrorFlags{}, @@ -150,12 +157,18 @@ pub const ElfFile = struct { /// If this field is 0, it means the codegen size = 0 and there is no symbol or /// offset table entry. local_sym_index: u32, - /// This field is undefined for symbols with size = 0. - offset_table_index: u32, + /// when size = 0 and there is no offset table index + offset_table_index: union { + unallocated: void, + /// This is an index into offset_table + got: u32, + /// This is an index into fn_trampoline_table + plt: u32, + }, pub const empty = Decl{ .local_sym_index = 0, - .offset_table_index = undefined, + .offset_table_index = .{ .unallocated = {} }, }; }; @@ -170,6 +183,7 @@ pub const ElfFile = struct { self.local_symbols.deinit(self.allocator); self.global_symbols.deinit(self.allocator); self.offset_table.deinit(self.allocator); + self.fn_trampoline_table.deinit(self.allocator); if (self.owns_file_handle) { if (self.file) |f| f.close(); } @@ -343,6 +357,30 @@ pub const ElfFile = struct { }); self.phdr_table_dirty = true; } + if (self.phdr_got_plt_index == null) { + self.phdr_got_plt_index = @intCast(u16, self.program_headers.items.len); + const file_size = @as(u64, ptr_size) * self.options.symbol_count_hint; + // We really only need ptr alignment but since we are using PROGBITS, linux requires + // page align. + const p_align = 0x1000; + const off = self.findFreeSpace(file_size, p_align); + //std.debug.warn("found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); + // TODO instead of hard coding the vaddr, make a function to find a vaddr to put things at. + // we'll need to re-use that function anyway, in case the GOT grows and overlaps something + // else in virtual memory. + const default_got_plt_addr = 0x6000000; + try self.program_headers.append(self.allocator, .{ + .p_type = elf.PT_LOAD, + .p_offset = off, + .p_filesz = file_size, + .p_vaddr = default_got_plt_addr, + .p_paddr = default_got_plt_addr, + .p_memsz = file_size, + .p_align = p_align, + .p_flags = elf.PF_R, + }); + self.phdr_table_dirty = true; + } if (self.shstrtab_index == null) { self.shstrtab_index = @intCast(u16, self.sections.items.len); assert(self.shstrtab.items.len == 0); @@ -400,6 +438,24 @@ pub const ElfFile = struct { }); self.shdr_table_dirty = true; } + if (self.got_plt_section_index == null) { + self.got_plt_section_index = @intCast(u16, self.sections.items.len); + const phdr = &self.program_headers.items[self.phdr_got_plt_index.?]; + + try self.sections.append(self.allocator, .{ + .sh_name = try self.makeString(".got.plt"), + .sh_type = elf.SHT_PROGBITS, + .sh_flags = elf.SHF_ALLOC, + .sh_addr = phdr.p_vaddr, + .sh_offset = phdr.p_offset, + .sh_size = phdr.p_filesz, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = phdr.p_align, + .sh_entsize = 0, + }); + self.shdr_table_dirty = true; + } if (self.symtab_section_index == null) { self.symtab_section_index = @intCast(u16, self.sections.items.len); const min_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym); @@ -584,6 +640,7 @@ pub const ElfFile = struct { assert(!self.shdr_table_dirty); assert(!self.shstrtab_dirty); assert(!self.offset_table_count_dirty); + assert(!self.fn_trampoline_table_count_dirty); const syms_sect = &self.sections.items[self.symtab_section_index.?]; assert(syms_sect.sh_info == self.local_symbols.items.len); } @@ -740,6 +797,7 @@ pub const ElfFile = struct { const amt = try self.file.?.copyRangeAll(shdr.sh_offset, self.file.?, new_offset, text_size); if (amt != text_size) return error.InputOutput; shdr.sh_offset = new_offset; + phdr.p_offset = new_offset; } // Now that we know the code size, we need to update the program header for executable code shdr.sh_size = needed_size; @@ -778,10 +836,14 @@ pub const ElfFile = struct { pub fn allocateDeclIndexes(self: *ElfFile, decl: *Module.Decl) !void { if (decl.link.local_sym_index != 0) return; + const is_fn = (decl.typed_value.most_recent.typed_value.ty.zigTypeTag() == .Fn); + try self.local_symbols.ensureCapacity(self.allocator, self.local_symbols.items.len + 1); try self.offset_table.ensureCapacity(self.allocator, self.offset_table.items.len + 1); + try self.fn_trampoline_table.ensureCapacity(self.allocator, self.fn_trampoline_table.items.len + 1); const local_sym_index = self.local_symbols.items.len; const offset_table_index = self.offset_table.items.len; + const fn_trampoline_table_index = self.fn_trampoline_table.items.len; const phdr = &self.program_headers.items[self.phdr_load_re_index.?]; self.local_symbols.appendAssumeCapacity(.{ @@ -792,15 +854,20 @@ pub const ElfFile = struct { .st_value = phdr.p_vaddr, .st_size = 0, }); - errdefer self.local_symbols.shrink(self.allocator, self.local_symbols.items.len - 1); - self.offset_table.appendAssumeCapacity(0); - errdefer self.offset_table.shrink(self.allocator, self.offset_table.items.len - 1); - - self.offset_table_count_dirty = true; + if (is_fn) { + self.fn_trampoline_table.appendAssumeCapacity(0); + self.fn_trampoline_table_count_dirty = true; + } else { + self.offset_table.appendAssumeCapacity(0); + self.offset_table_count_dirty = true; + } decl.link = .{ .local_sym_index = @intCast(u32, local_sym_index), - .offset_table_index = @intCast(u32, offset_table_index), + .offset_table_index = if (is_fn) + .{ .plt = @intCast(u32, fn_trampoline_table_index) } + else + .{ .got = @intCast(u32, offset_table_index) }, }; } @@ -818,6 +885,7 @@ pub const ElfFile = struct { return; }, }; + const is_fn = (typed_value.ty.zigTypeTag() == .Fn); const required_alignment = typed_value.ty.abiAlignment(self.options.target); @@ -837,14 +905,13 @@ pub const ElfFile = struct { const file_offset = if (need_realloc) fo: { const new_block = try self.allocateTextBlock(code.len, required_alignment); local_sym.st_value = new_block.vaddr; - self.offset_table.items[decl.link.offset_table_index] = new_block.vaddr; - - //std.debug.warn("{}: writing got index {}=0x{x}\n", .{ - // decl.name, - // decl.link.offset_table_index, - // self.offset_table.items[decl.link.offset_table_index], - //}); - try self.writeOffsetTableEntry(decl.link.offset_table_index); + if (is_fn) { + self.fn_trampoline_table.items[decl.link.offset_table_index.plt] = new_block.vaddr; + try self.writeFnTrampolineEntry(decl.link.offset_table_index.plt); + } else { + self.offset_table.items[decl.link.offset_table_index.got] = new_block.vaddr; + try self.writeOffsetTableEntry(decl.link.offset_table_index.got); + } break :fo new_block.file_offset; } else existing_block.file_offset; @@ -861,11 +928,13 @@ pub const ElfFile = struct { } else { try self.local_symbols.ensureCapacity(self.allocator, self.local_symbols.items.len + 1); try self.offset_table.ensureCapacity(self.allocator, self.offset_table.items.len + 1); + try self.fn_trampoline_table.ensureCapacity(self.allocator, self.fn_trampoline_table.items.len + 1); const decl_name = mem.spanZ(decl.name); const name_str_index = try self.makeString(decl_name); const new_block = try self.allocateTextBlock(code.len, required_alignment); const local_sym_index = self.local_symbols.items.len; const offset_table_index = self.offset_table.items.len; + const fn_trampoline_table_index = self.fn_trampoline_table.items.len; //std.debug.warn("add symbol for {} at vaddr 0x{x}, size {}\n", .{ decl.name, new_block.vaddr, code.len }); self.local_symbols.appendAssumeCapacity(.{ @@ -877,17 +946,32 @@ pub const ElfFile = struct { .st_size = code.len, }); errdefer self.local_symbols.shrink(self.allocator, self.local_symbols.items.len - 1); - self.offset_table.appendAssumeCapacity(new_block.vaddr); - errdefer self.offset_table.shrink(self.allocator, self.offset_table.items.len - 1); - - self.offset_table_count_dirty = true; + if (is_fn) { + self.fn_trampoline_table.appendAssumeCapacity(new_block.vaddr); + } else { + self.offset_table.appendAssumeCapacity(new_block.vaddr); + } + errdefer if (is_fn) { + self.fn_trampoline_table.shrink(self.allocator, self.fn_trampoline_table.items.len - 1); + } else { + self.offset_table.shrink(self.allocator, self.offset_table.items.len - 1); + }; try self.writeSymbol(local_sym_index); - try self.writeOffsetTableEntry(offset_table_index); + if (is_fn) { + try self.writeFnTrampolineEntry(fn_trampoline_table_index); + self.fn_trampoline_table_count_dirty = true; + } else { + try self.writeOffsetTableEntry(offset_table_index); + self.offset_table_count_dirty = true; + } decl.link = .{ .local_sym_index = @intCast(u32, local_sym_index), - .offset_table_index = @intCast(u32, offset_table_index), + .offset_table_index = if (is_fn) + .{ .plt = @intCast(u32, fn_trampoline_table_index) } + else + .{ .got = @intCast(u32, offset_table_index) }, }; //std.debug.warn("writing new {} at vaddr 0x{x}\n", .{ decl.name, new_block.vaddr }); @@ -1017,6 +1101,40 @@ pub const ElfFile = struct { } } + fn writeFnTrampolineEntry(self: *ElfFile, index: usize) !void { + const shdr = &self.sections.items[self.got_plt_section_index.?]; + const phdr = &self.program_headers.items[self.phdr_got_plt_index.?]; + const entry_size = codegen.pltEntrySize(self.options.target); + var entry_buf: [16]u8 = undefined; + assert(entry_size <= entry_buf.len); + + if (self.fn_trampoline_table_count_dirty) { + // TODO Also detect virtual address collisions. + const allocated_size = self.allocatedSize(shdr.sh_offset); + const needed_size = self.local_symbols.items.len * entry_size; + if (needed_size > allocated_size) { + // Must move the entire .got.plt section. + const new_offset = self.findFreeSpace(needed_size, entry_size); + const amt = try self.file.?.copyRangeAll(shdr.sh_offset, self.file.?, new_offset, shdr.sh_size); + if (amt != shdr.sh_size) return error.InputOutput; + shdr.sh_offset = new_offset; + phdr.p_offset = new_offset; + } + shdr.sh_size = needed_size; + phdr.p_memsz = needed_size; + phdr.p_filesz = needed_size; + + self.shdr_table_dirty = true; // TODO look into making only the one section dirty + self.phdr_table_dirty = true; // TODO look into making only the one program header dirty + + self.fn_trampoline_table_count_dirty = false; + } + const off = shdr.sh_offset + @as(u64, entry_size) * index; + const vaddr = @intCast(u32, self.fn_trampoline_table.items[index]); + codegen.writePltEntry(self.options.target, &entry_buf, vaddr); + try self.file.?.pwriteAll(entry_buf[0..entry_size], off); + } + fn writeOffsetTableEntry(self: *ElfFile, index: usize) !void { const shdr = &self.sections.items[self.got_section_index.?]; const phdr = &self.program_headers.items[self.phdr_got_index.?]; @@ -1034,6 +1152,7 @@ pub const ElfFile = struct { const amt = try self.file.?.copyRangeAll(shdr.sh_offset, self.file.?, new_offset, shdr.sh_size); if (amt != shdr.sh_size) return error.InputOutput; shdr.sh_offset = new_offset; + phdr.p_offset = new_offset; } shdr.sh_size = needed_size; phdr.p_memsz = needed_size;