std: Implement TLS support for Linux

Tested on x86_64, i386, ARM, AARCH64
master
LemonBoy 2019-05-04 12:02:55 +02:00
parent 7432fb04d6
commit d8ab301aa8
5 changed files with 257 additions and 72 deletions

View File

@ -611,6 +611,7 @@ set(ZIG_STD_FILES
"os/linux.zig"
"os/linux/arm64.zig"
"os/linux/errno.zig"
"os/linux/tls.zig"
"os/linux/vdso.zig"
"os/linux/x86_64.zig"
"os/netbsd.zig"

View File

@ -3126,9 +3126,6 @@ pub const SpawnThreadError = error{
Unexpected,
};
pub var linux_tls_phdr: ?*std.elf.Phdr = null;
pub var linux_tls_img_src: [*]const u8 = undefined; // defined if linux_tls_phdr is
/// caller must call wait on the returned thread
/// fn startFn(@typeOf(context)) T
/// where T is u8, noreturn, void, or !void
@ -3238,12 +3235,10 @@ pub fn spawnThread(context: var, comptime startFn: var) SpawnThreadError!*Thread
}
// Finally, the Thread Local Storage, if any.
if (!Thread.use_pthreads) {
if (linux_tls_phdr) |tls_phdr| {
l = mem.alignForward(l, tls_phdr.p_align);
if (linux.tls.tls_image) |tls_img| {
l = mem.alignForward(l, @alignOf(usize));
tls_start_offset = l;
l += tls_phdr.p_memsz;
// the fs register address
l += @sizeOf(usize);
l += tls_img.alloc_size;
}
}
break :blk l;
@ -3284,10 +3279,8 @@ pub fn spawnThread(context: var, comptime startFn: var) SpawnThreadError!*Thread
posix.CLONE_THREAD | posix.CLONE_SYSVSEM | posix.CLONE_PARENT_SETTID | posix.CLONE_CHILD_CLEARTID |
posix.CLONE_DETACHED;
var newtls: usize = undefined;
if (linux_tls_phdr) |tls_phdr| {
@memcpy(@intToPtr([*]u8, mmap_addr + tls_start_offset), linux_tls_img_src, tls_phdr.p_filesz);
newtls = mmap_addr + mmap_len - @sizeOf(usize);
@intToPtr(*usize, newtls).* = newtls;
if (linux.tls.tls_image) |tls_img| {
newtls = linux.tls.copyTLS(mmap_addr + tls_start_offset);
flags |= posix.CLONE_SETTLS;
}
const rc = posix.clone(MainFuncs.linuxThreadMain, mmap_addr + stack_end_offset, flags, arg, &thread_ptr.data.handle, newtls, &thread_ptr.data.handle);

View File

@ -3,6 +3,7 @@ const assert = std.debug.assert;
const builtin = @import("builtin");
const maxInt = std.math.maxInt;
const elf = std.elf;
pub const tls = @import("linux/tls.zig");
const vdso = @import("linux/vdso.zig");
const dl = @import("../dynamic_library.zig");
pub use switch (builtin.arch) {

242
std/os/linux/tls.zig Normal file
View File

@ -0,0 +1,242 @@
const std = @import("std");
const mem = std.mem;
const posix = std.posix;
const elf = std.elf;
const builtin = @import("builtin");
const assert = std.debug.assert;
// This file implements the two TLS variants [1] used by ELF-based systems.
//
// The variant I has the following layout in memory:
// -------------------------------------------------------
// | DTV | Zig | DTV | Alignment | TLS |
// | storage | thread data | pointer | | block |
// ------------------------^------------------------------
// `-- The thread pointer register points here
//
// In this case we allocate additional space for our control structure that's
// placed _before_ the DTV pointer together with the DTV.
//
// NOTE: Some systems such as power64 or mips use this variant with a twist: the
// alignment is not present and the tp and DTV addresses are offset by a
// constant.
//
// On the other hand the variant II has the following layout in memory:
// ---------------------------------------
// | TLS | TCB | Zig | DTV |
// | block | | thread data | storage |
// --------^------------------------------
// `-- The thread pointer register points here
//
// The structure of the TCB is not defined by the ABI so we reserve enough space
// for a single pointer as some architectures such as i386 and x86_64 need a
// pointer to the TCB block itself at the address pointed by the tp.
//
// In this case the control structure and DTV are placed one after another right
// after the TLS block data.
//
// At the moment the DTV is very simple since we only support static TLS, all we
// need is a two word vector to hold the number of entries (1) and the address
// of the first TLS block.
//
// [1] https://www.akkadia.org/drepper/tls.pdf
const TLSVariant = enum {
VariantI,
VariantII,
};
const tls_variant = switch (builtin.arch) {
.arm, .armeb, .aarch64, .aarch64_be => TLSVariant.VariantI,
.x86_64, .i386 => TLSVariant.VariantII,
else => @compileError("undefined tls_variant for this architecture"),
};
// Controls how many bytes are reserved for the Thread Control Block
const tls_tcb_size = switch (builtin.arch) {
// ARM EABI mandates enough space for two pointers: the first one points to
// the DTV while the second one is unspecified but reserved
.arm, .armeb, .aarch64, .aarch64_be => 2 * @sizeOf(usize),
.i386, .x86_64 => @sizeOf(usize),
else => 0,
};
// Controls if the TCB should be aligned according to the TLS segment p_align
const tls_tcb_align_size = switch (builtin.arch) {
.arm, .armeb, .aarch64, .aarch64_be => true,
else => false,
};
// Check if the architecture-specific parameters look correct
comptime {
if (tls_tcb_align_size and tls_variant != TLSVariant.VariantI) {
@compileError("tls_tcb_align_size is only meaningful for variant I TLS");
}
}
// Some architectures add some offset to the tp and dtv addresses in order to
// make the generated code more efficient
const tls_tp_offset = switch (builtin.arch) {
else => 0,
};
const tls_dtv_offset = switch (builtin.arch) {
else => 0,
};
// Per-thread storage for Zig's use
const CustomData = packed struct {
};
// Dynamic Thread Vector
const DTV = packed struct {
entries: usize,
tls_block: [1]usize,
};
// Holds all the information about the process TLS image
const TLSImage = struct {
data_src: []u8,
alloc_size: usize,
tcb_offset: usize,
dtv_offset: usize,
data_offset: usize,
};
pub var tls_image: ?TLSImage = null;
pub fn setThreadPointer(addr: usize) void {
switch (builtin.arch) {
.x86_64 => {
const ARCH_SET_FS = 0x1002;
const rc = std.os.linux.syscall2(std.os.linux.SYS_arch_prctl, ARCH_SET_FS, addr);
// arch_prctl is documented to never fail
assert(rc == 0);
},
.aarch64 => {
asm volatile (
\\ msr tpidr_el0, %[addr]
: : [addr] "r" (addr)
);
},
else => @compileError("Unsupported architecture"),
}
}
pub fn initTLS() void {
var tls_phdr: ?*elf.Phdr = null;
var img_base: usize = 0;
if (std.os.linux_elf_aux_maybe) |auxv| {
var at_phent: usize = undefined;
var at_phnum: usize = undefined;
var at_phdr: usize = undefined;
var i: usize = 0;
while (auxv[i].a_type != std.elf.AT_NULL) : (i += 1) {
switch (auxv[i].a_type) {
elf.AT_PHENT => at_phent = auxv[i].a_un.a_val,
elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val,
elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val,
else => continue,
}
}
// Sanity check
assert(at_phent == @sizeOf(elf.Phdr));
// Search the TLS section
const phdrs = (@intToPtr([*]elf.Phdr, at_phdr))[0..at_phnum];
for (phdrs) |*phdr| {
switch (phdr.p_type) {
elf.PT_PHDR => img_base = at_phdr - phdr.p_vaddr,
elf.PT_TLS => tls_phdr = phdr,
else => continue,
}
}
} else {
@panic("no auxv vector available!");
}
if (tls_phdr) |phdr| {
// Offsets into the allocated TLS area
var tcb_offset: usize = undefined;
var dtv_offset: usize = undefined;
var data_offset: usize = undefined;
var thread_data_offset: usize = undefined;
// Compute the total size of the ABI-specific data plus our own control
// structures
const alloc_size = switch (tls_variant) {
.VariantI => blk: {
var l: usize = 0;
dtv_offset = l;
l += @sizeOf(DTV);
thread_data_offset = l;
l += @sizeOf(CustomData);
l = mem.alignForward(l, phdr.p_align);
tcb_offset = l;
if (tls_tcb_align_size) {
l += mem.alignForward(tls_tcb_size, phdr.p_align);
} else {
l += tls_tcb_size;
}
data_offset = l;
l += phdr.p_memsz;
break :blk l;
},
.VariantII => blk: {
var l: usize = 0;
data_offset = l;
l += phdr.p_memsz;
l = mem.alignForward(l, phdr.p_align);
tcb_offset = l;
l += tls_tcb_size;
thread_data_offset = l;
l += @sizeOf(CustomData);
dtv_offset = l;
l += @sizeOf(DTV);
break :blk l;
}
};
tls_image = TLSImage{
.data_src = @intToPtr([*]u8, phdr.p_vaddr + img_base)[0..phdr.p_filesz],
.alloc_size = alloc_size,
.tcb_offset = tcb_offset,
.dtv_offset = dtv_offset,
.data_offset = data_offset,
};
}
}
pub fn copyTLS(addr: usize) usize {
const tls_img = tls_image orelse @panic("copyTLS called with no TLS section!");
// Be paranoid, clear the area we're going to use
@memset(@intToPtr([*]u8, addr), 0, tls_img.alloc_size);
// Prepare the DTV
const dtv = @intToPtr(*DTV, addr + tls_img.dtv_offset);
dtv.entries = 1;
dtv.tls_block[0] = addr + tls_img.data_offset + tls_dtv_offset;
// Set-up the TCB
const tcb_ptr = @intToPtr(*usize, addr + tls_img.tcb_offset);
if (tls_variant == TLSVariant.VariantI) {
tcb_ptr.* = addr + tls_img.dtv_offset;
} else {
tcb_ptr.* = addr + tls_img.tcb_offset;
}
// Copy the data
@memcpy(@intToPtr([*]u8, addr + tls_img.data_offset), tls_img.data_src.ptr, tls_img.data_src.len);
// Return the corrected (if needed) value for the tp register
return addr + tls_img.tcb_offset + tls_tp_offset;
}
var main_thread_tls_buffer: [64]u8 align(32) = undefined;
pub fn allocateTLS(size: usize) usize {
assert(size < main_thread_tls_buffer.len);
return @ptrToInt(&main_thread_tls_buffer);
}

View File

@ -67,24 +67,19 @@ fn posixCallMainAndExit() noreturn {
var envp_count: usize = 0;
while (envp_optional[envp_count]) |_| : (envp_count += 1) {}
const envp = @ptrCast([*][*]u8, envp_optional)[0..envp_count];
if (builtin.os == builtin.Os.linux) {
// Scan auxiliary vector.
const auxv = @ptrCast([*]std.elf.Auxv, envp.ptr + envp_count + 1);
std.os.linux_elf_aux_maybe = auxv;
var i: usize = 0;
var at_phdr: usize = 0;
var at_phnum: usize = 0;
var at_phent: usize = 0;
while (auxv[i].a_un.a_val != 0) : (i += 1) {
switch (auxv[i].a_type) {
std.elf.AT_PAGESZ => assert(auxv[i].a_un.a_val == std.os.page_size),
std.elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val,
std.elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val,
std.elf.AT_PHENT => at_phent = auxv[i].a_un.a_val,
else => {},
std.os.linux.tls.initTLS();
if (!builtin.single_threaded) {
if (std.os.linux.tls.tls_image) |tls_img| {
const tls_addr = std.os.linux.tls.allocateTLS(tls_img.alloc_size);
const tp = std.os.linux.tls.copyTLS(tls_addr);
std.os.linux.tls.setThreadPointer(tp);
}
}
if (!builtin.single_threaded) linuxInitializeThreadLocalStorage(at_phdr, at_phnum, at_phent);
}
std.os.posix.exit(callMainWithArgs(argc, argv, envp));
@ -140,50 +135,3 @@ inline fn callMain() u8 {
const main_thread_tls_align = 32;
var main_thread_tls_bytes: [64]u8 align(main_thread_tls_align) = [1]u8{0} ** 64;
fn linuxInitializeThreadLocalStorage(at_phdr: usize, at_phnum: usize, at_phent: usize) void {
var phdr_addr = at_phdr;
var n = at_phnum;
var base: usize = 0;
while (n != 0) : ({
n -= 1;
phdr_addr += at_phent;
}) {
const phdr = @intToPtr(*std.elf.Phdr, phdr_addr);
// TODO look for PT_DYNAMIC when we have https://github.com/ziglang/zig/issues/1917
switch (phdr.p_type) {
std.elf.PT_PHDR => base = at_phdr - phdr.p_vaddr,
std.elf.PT_TLS => std.os.linux_tls_phdr = phdr,
else => continue,
}
}
const tls_phdr = std.os.linux_tls_phdr orelse return;
std.os.linux_tls_img_src = @intToPtr([*]const u8, base + tls_phdr.p_vaddr);
const end_addr = @ptrToInt(&main_thread_tls_bytes) + tls_phdr.p_memsz;
const max_end_addr = @ptrToInt(&main_thread_tls_bytes) + main_thread_tls_bytes.len;
assert(max_end_addr >= end_addr + @sizeOf(usize)); // not enough preallocated Thread Local Storage
assert(main_thread_tls_align >= tls_phdr.p_align); // preallocated Thread Local Storage not aligned enough
@memcpy(&main_thread_tls_bytes, std.os.linux_tls_img_src, tls_phdr.p_filesz);
const end_ptr = @intToPtr(*usize, end_addr);
end_ptr.* = end_addr;
linuxSetThreadArea(end_addr);
}
fn linuxSetThreadArea(addr: usize) void {
switch (builtin.arch) {
builtin.Arch.x86_64 => {
const ARCH_SET_FS = 0x1002;
const rc = std.os.linux.syscall2(std.os.linux.SYS_arch_prctl, ARCH_SET_FS, addr);
// acrh_prctl is documented to never fail
assert(rc == 0);
},
builtin.Arch.aarch64 => {
asm volatile (
\\ msr tpidr_el0,x0
\\ mov w0,#0
\\ ret
);
},
else => @compileError("Unsupported architecture"),
}
}