stage2: set up a trampoline table for functions

However there does not appear to be an x86 encoding for calling an
immediate address. So there's no point of setting this up. We should
just emit an indirect call to the got addr.
master
Andrew Kelley 2020-05-19 13:33:36 -04:00
parent 1cde0edff4
commit 8d812dba30
3 changed files with 183 additions and 34 deletions

View File

@ -231,6 +231,7 @@ pub const Fn = struct {
dependency_failure,
success: Body,
},
owner_decl: *Decl,
/// This memory is temporary and points to stack memory for the duration
/// of Fn analysis.
@ -883,14 +884,6 @@ fn resolveDecl(
};
const arena_state = try decl_scope.arena.allocator.create(std.heap.ArenaAllocator.State);
const has_codegen_bits = typed_value.ty.hasCodeGenBits();
if (has_codegen_bits) {
// We don't fully codegen the decl until later, but we do need to reserve a global
// offset table index for it. This allows us to codegen decls out of dependency order,
// increasing how many computations can be done in parallel.
try self.bin_file.allocateDeclIndexes(new_decl);
}
arena_state.* = decl_scope.arena.state;
new_decl.typed_value = .{
@ -900,7 +893,12 @@ fn resolveDecl(
},
};
new_decl.analysis = .complete;
if (has_codegen_bits) {
if (typed_value.ty.hasCodeGenBits()) {
// We don't fully codegen the decl until later, but we do need to reserve a global
// offset table index for it. This allows us to codegen decls out of dependency order,
// increasing how many computations can be done in parallel.
try self.bin_file.allocateDeclIndexes(new_decl);
// We ensureCapacity when scanning for decls.
self.work_queue.writeItemAssumeCapacity(.{ .codegen_decl = new_decl });
}
@ -1329,6 +1327,7 @@ fn analyzeInstFn(self: *Module, scope: *Scope, fn_inst: *zir.Inst.Fn) InnerError
new_func.* = .{
.fn_type = fn_type,
.analysis = .{ .queued = fn_inst },
.owner_decl = scope.decl(),
};
const fn_payload = try scope.arena().create(Value.Payload.Function);
fn_payload.* = .{ .func = new_func };

View File

@ -19,6 +19,24 @@ pub const Result = union(enum) {
fail: *Module.ErrorMsg,
};
pub fn pltEntrySize(target: Target) u16 {
return switch (target.cpu.arch) {
.i386, .x86_64 => 5,
else => @panic("TODO implement pltEntrySize for more architectures"),
};
}
pub fn writePltEntry(target: Target, buf: []u8, addr: u32) void {
switch (target.cpu.arch) {
.i386, .x86_64 => {
// 9a xx xx xx xx call addr
buf[0] = 0x9a;
mem.writeIntLittle(u32, buf[1..5], addr);
},
else => @panic("TODO implement pltEntrySize for more architectures"),
}
}
pub fn generateSymbol(
bin_file: *link.ElfFile,
src: usize,
@ -203,7 +221,20 @@ const Function = struct {
if (func_inst.val.cast(Value.Payload.Function)) |func_val| {
const func = func_val.func;
return self.fail(inst.base.src, "TODO implement calling function", .{});
const plt_index = func.owner_decl.link.offset_table_index.plt;
const plt = &self.bin_file.program_headers.items[self.bin_file.phdr_got_plt_index.?];
const plt_entry_size = pltEntrySize(self.target.*);
const plt_addr = @intCast(u32, plt.p_vaddr + func.owner_decl.link.offset_table_index.plt * plt_entry_size);
// ea xx xx xx xx jmp addr
try self.code.resize(self.code.items.len + 5);
self.code.items[self.code.items.len - 5] = 0xea;
mem.writeIntLittle(u32, self.code.items[self.code.items.len - 4 ..][0..4], plt_addr);
const return_type = func.fn_type.fnReturnType();
switch (return_type.zigTypeTag()) {
.Void => return MCValue{ .none = {} },
.NoReturn => return MCValue{ .unreach = {} },
else => return self.fail(inst.base.src, "TODO implement fn call with non-void return value", .{}),
}
} else {
return self.fail(inst.base.src, "TODO implement calling weird function values", .{});
}
@ -575,7 +606,7 @@ const Function = struct {
if (typed_value.val.cast(Value.Payload.DeclRef)) |payload| {
const got = &self.bin_file.program_headers.items[self.bin_file.phdr_got_index.?];
const decl = payload.decl;
const got_addr = got.p_vaddr + decl.link.offset_table_index * ptr_bytes;
const got_addr = got.p_vaddr + decl.link.offset_table_index.got * ptr_bytes;
return MCValue{ .memory = got_addr };
}
return self.fail(src, "TODO codegen more kinds of const pointers", .{});

View File

@ -110,6 +110,7 @@ pub const ElfFile = struct {
/// The index into the program headers of the global offset table.
/// It needs PT_LOAD and Read flags.
phdr_got_index: ?u16 = null,
phdr_got_plt_index: ?u16 = null,
entry_addr: ?u64 = null,
shstrtab: std.ArrayListUnmanaged(u8) = std.ArrayListUnmanaged(u8){},
@ -118,6 +119,7 @@ pub const ElfFile = struct {
text_section_index: ?u16 = null,
symtab_section_index: ?u16 = null,
got_section_index: ?u16 = null,
got_plt_section_index: ?u16 = null,
/// The same order as in the file. ELF requires global symbols to all be after the
/// local symbols, they cannot be mixed. So we must buffer all the global symbols and
@ -130,11 +132,16 @@ pub const ElfFile = struct {
/// If the vaddr of the executable program header changes, the entire
/// offset table needs to be rewritten.
offset_table: std.ArrayListUnmanaged(u64) = std.ArrayListUnmanaged(u64){},
/// Same order as in the file. The value is the absolute vaddr value.
/// If the vaddr of the executable program header changes, the entire
/// fn trampoline table needs to be rewritten.
fn_trampoline_table: std.ArrayListUnmanaged(u64) = std.ArrayListUnmanaged(u64){},
phdr_table_dirty: bool = false,
shdr_table_dirty: bool = false,
shstrtab_dirty: bool = false,
offset_table_count_dirty: bool = false,
fn_trampoline_table_count_dirty: bool = false,
error_flags: ErrorFlags = ErrorFlags{},
@ -150,12 +157,18 @@ pub const ElfFile = struct {
/// If this field is 0, it means the codegen size = 0 and there is no symbol or
/// offset table entry.
local_sym_index: u32,
/// This field is undefined for symbols with size = 0.
offset_table_index: u32,
/// when size = 0 and there is no offset table index
offset_table_index: union {
unallocated: void,
/// This is an index into offset_table
got: u32,
/// This is an index into fn_trampoline_table
plt: u32,
},
pub const empty = Decl{
.local_sym_index = 0,
.offset_table_index = undefined,
.offset_table_index = .{ .unallocated = {} },
};
};
@ -170,6 +183,7 @@ pub const ElfFile = struct {
self.local_symbols.deinit(self.allocator);
self.global_symbols.deinit(self.allocator);
self.offset_table.deinit(self.allocator);
self.fn_trampoline_table.deinit(self.allocator);
if (self.owns_file_handle) {
if (self.file) |f| f.close();
}
@ -343,6 +357,30 @@ pub const ElfFile = struct {
});
self.phdr_table_dirty = true;
}
if (self.phdr_got_plt_index == null) {
self.phdr_got_plt_index = @intCast(u16, self.program_headers.items.len);
const file_size = @as(u64, ptr_size) * self.options.symbol_count_hint;
// We really only need ptr alignment but since we are using PROGBITS, linux requires
// page align.
const p_align = 0x1000;
const off = self.findFreeSpace(file_size, p_align);
//std.debug.warn("found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size });
// TODO instead of hard coding the vaddr, make a function to find a vaddr to put things at.
// we'll need to re-use that function anyway, in case the GOT grows and overlaps something
// else in virtual memory.
const default_got_plt_addr = 0x6000000;
try self.program_headers.append(self.allocator, .{
.p_type = elf.PT_LOAD,
.p_offset = off,
.p_filesz = file_size,
.p_vaddr = default_got_plt_addr,
.p_paddr = default_got_plt_addr,
.p_memsz = file_size,
.p_align = p_align,
.p_flags = elf.PF_R,
});
self.phdr_table_dirty = true;
}
if (self.shstrtab_index == null) {
self.shstrtab_index = @intCast(u16, self.sections.items.len);
assert(self.shstrtab.items.len == 0);
@ -400,6 +438,24 @@ pub const ElfFile = struct {
});
self.shdr_table_dirty = true;
}
if (self.got_plt_section_index == null) {
self.got_plt_section_index = @intCast(u16, self.sections.items.len);
const phdr = &self.program_headers.items[self.phdr_got_plt_index.?];
try self.sections.append(self.allocator, .{
.sh_name = try self.makeString(".got.plt"),
.sh_type = elf.SHT_PROGBITS,
.sh_flags = elf.SHF_ALLOC,
.sh_addr = phdr.p_vaddr,
.sh_offset = phdr.p_offset,
.sh_size = phdr.p_filesz,
.sh_link = 0,
.sh_info = 0,
.sh_addralign = phdr.p_align,
.sh_entsize = 0,
});
self.shdr_table_dirty = true;
}
if (self.symtab_section_index == null) {
self.symtab_section_index = @intCast(u16, self.sections.items.len);
const min_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym);
@ -584,6 +640,7 @@ pub const ElfFile = struct {
assert(!self.shdr_table_dirty);
assert(!self.shstrtab_dirty);
assert(!self.offset_table_count_dirty);
assert(!self.fn_trampoline_table_count_dirty);
const syms_sect = &self.sections.items[self.symtab_section_index.?];
assert(syms_sect.sh_info == self.local_symbols.items.len);
}
@ -740,6 +797,7 @@ pub const ElfFile = struct {
const amt = try self.file.?.copyRangeAll(shdr.sh_offset, self.file.?, new_offset, text_size);
if (amt != text_size) return error.InputOutput;
shdr.sh_offset = new_offset;
phdr.p_offset = new_offset;
}
// Now that we know the code size, we need to update the program header for executable code
shdr.sh_size = needed_size;
@ -778,10 +836,14 @@ pub const ElfFile = struct {
pub fn allocateDeclIndexes(self: *ElfFile, decl: *Module.Decl) !void {
if (decl.link.local_sym_index != 0) return;
const is_fn = (decl.typed_value.most_recent.typed_value.ty.zigTypeTag() == .Fn);
try self.local_symbols.ensureCapacity(self.allocator, self.local_symbols.items.len + 1);
try self.offset_table.ensureCapacity(self.allocator, self.offset_table.items.len + 1);
try self.fn_trampoline_table.ensureCapacity(self.allocator, self.fn_trampoline_table.items.len + 1);
const local_sym_index = self.local_symbols.items.len;
const offset_table_index = self.offset_table.items.len;
const fn_trampoline_table_index = self.fn_trampoline_table.items.len;
const phdr = &self.program_headers.items[self.phdr_load_re_index.?];
self.local_symbols.appendAssumeCapacity(.{
@ -792,15 +854,20 @@ pub const ElfFile = struct {
.st_value = phdr.p_vaddr,
.st_size = 0,
});
errdefer self.local_symbols.shrink(self.allocator, self.local_symbols.items.len - 1);
self.offset_table.appendAssumeCapacity(0);
errdefer self.offset_table.shrink(self.allocator, self.offset_table.items.len - 1);
self.offset_table_count_dirty = true;
if (is_fn) {
self.fn_trampoline_table.appendAssumeCapacity(0);
self.fn_trampoline_table_count_dirty = true;
} else {
self.offset_table.appendAssumeCapacity(0);
self.offset_table_count_dirty = true;
}
decl.link = .{
.local_sym_index = @intCast(u32, local_sym_index),
.offset_table_index = @intCast(u32, offset_table_index),
.offset_table_index = if (is_fn)
.{ .plt = @intCast(u32, fn_trampoline_table_index) }
else
.{ .got = @intCast(u32, offset_table_index) },
};
}
@ -818,6 +885,7 @@ pub const ElfFile = struct {
return;
},
};
const is_fn = (typed_value.ty.zigTypeTag() == .Fn);
const required_alignment = typed_value.ty.abiAlignment(self.options.target);
@ -837,14 +905,13 @@ pub const ElfFile = struct {
const file_offset = if (need_realloc) fo: {
const new_block = try self.allocateTextBlock(code.len, required_alignment);
local_sym.st_value = new_block.vaddr;
self.offset_table.items[decl.link.offset_table_index] = new_block.vaddr;
//std.debug.warn("{}: writing got index {}=0x{x}\n", .{
// decl.name,
// decl.link.offset_table_index,
// self.offset_table.items[decl.link.offset_table_index],
//});
try self.writeOffsetTableEntry(decl.link.offset_table_index);
if (is_fn) {
self.fn_trampoline_table.items[decl.link.offset_table_index.plt] = new_block.vaddr;
try self.writeFnTrampolineEntry(decl.link.offset_table_index.plt);
} else {
self.offset_table.items[decl.link.offset_table_index.got] = new_block.vaddr;
try self.writeOffsetTableEntry(decl.link.offset_table_index.got);
}
break :fo new_block.file_offset;
} else existing_block.file_offset;
@ -861,11 +928,13 @@ pub const ElfFile = struct {
} else {
try self.local_symbols.ensureCapacity(self.allocator, self.local_symbols.items.len + 1);
try self.offset_table.ensureCapacity(self.allocator, self.offset_table.items.len + 1);
try self.fn_trampoline_table.ensureCapacity(self.allocator, self.fn_trampoline_table.items.len + 1);
const decl_name = mem.spanZ(decl.name);
const name_str_index = try self.makeString(decl_name);
const new_block = try self.allocateTextBlock(code.len, required_alignment);
const local_sym_index = self.local_symbols.items.len;
const offset_table_index = self.offset_table.items.len;
const fn_trampoline_table_index = self.fn_trampoline_table.items.len;
//std.debug.warn("add symbol for {} at vaddr 0x{x}, size {}\n", .{ decl.name, new_block.vaddr, code.len });
self.local_symbols.appendAssumeCapacity(.{
@ -877,17 +946,32 @@ pub const ElfFile = struct {
.st_size = code.len,
});
errdefer self.local_symbols.shrink(self.allocator, self.local_symbols.items.len - 1);
self.offset_table.appendAssumeCapacity(new_block.vaddr);
errdefer self.offset_table.shrink(self.allocator, self.offset_table.items.len - 1);
self.offset_table_count_dirty = true;
if (is_fn) {
self.fn_trampoline_table.appendAssumeCapacity(new_block.vaddr);
} else {
self.offset_table.appendAssumeCapacity(new_block.vaddr);
}
errdefer if (is_fn) {
self.fn_trampoline_table.shrink(self.allocator, self.fn_trampoline_table.items.len - 1);
} else {
self.offset_table.shrink(self.allocator, self.offset_table.items.len - 1);
};
try self.writeSymbol(local_sym_index);
try self.writeOffsetTableEntry(offset_table_index);
if (is_fn) {
try self.writeFnTrampolineEntry(fn_trampoline_table_index);
self.fn_trampoline_table_count_dirty = true;
} else {
try self.writeOffsetTableEntry(offset_table_index);
self.offset_table_count_dirty = true;
}
decl.link = .{
.local_sym_index = @intCast(u32, local_sym_index),
.offset_table_index = @intCast(u32, offset_table_index),
.offset_table_index = if (is_fn)
.{ .plt = @intCast(u32, fn_trampoline_table_index) }
else
.{ .got = @intCast(u32, offset_table_index) },
};
//std.debug.warn("writing new {} at vaddr 0x{x}\n", .{ decl.name, new_block.vaddr });
@ -1017,6 +1101,40 @@ pub const ElfFile = struct {
}
}
fn writeFnTrampolineEntry(self: *ElfFile, index: usize) !void {
const shdr = &self.sections.items[self.got_plt_section_index.?];
const phdr = &self.program_headers.items[self.phdr_got_plt_index.?];
const entry_size = codegen.pltEntrySize(self.options.target);
var entry_buf: [16]u8 = undefined;
assert(entry_size <= entry_buf.len);
if (self.fn_trampoline_table_count_dirty) {
// TODO Also detect virtual address collisions.
const allocated_size = self.allocatedSize(shdr.sh_offset);
const needed_size = self.local_symbols.items.len * entry_size;
if (needed_size > allocated_size) {
// Must move the entire .got.plt section.
const new_offset = self.findFreeSpace(needed_size, entry_size);
const amt = try self.file.?.copyRangeAll(shdr.sh_offset, self.file.?, new_offset, shdr.sh_size);
if (amt != shdr.sh_size) return error.InputOutput;
shdr.sh_offset = new_offset;
phdr.p_offset = new_offset;
}
shdr.sh_size = needed_size;
phdr.p_memsz = needed_size;
phdr.p_filesz = needed_size;
self.shdr_table_dirty = true; // TODO look into making only the one section dirty
self.phdr_table_dirty = true; // TODO look into making only the one program header dirty
self.fn_trampoline_table_count_dirty = false;
}
const off = shdr.sh_offset + @as(u64, entry_size) * index;
const vaddr = @intCast(u32, self.fn_trampoline_table.items[index]);
codegen.writePltEntry(self.options.target, &entry_buf, vaddr);
try self.file.?.pwriteAll(entry_buf[0..entry_size], off);
}
fn writeOffsetTableEntry(self: *ElfFile, index: usize) !void {
const shdr = &self.sections.items[self.got_section_index.?];
const phdr = &self.program_headers.items[self.phdr_got_index.?];
@ -1034,6 +1152,7 @@ pub const ElfFile = struct {
const amt = try self.file.?.copyRangeAll(shdr.sh_offset, self.file.?, new_offset, shdr.sh_size);
if (amt != shdr.sh_size) return error.InputOutput;
shdr.sh_offset = new_offset;
phdr.p_offset = new_offset;
}
shdr.sh_size = needed_size;
phdr.p_memsz = needed_size;