parent
7432fb04d6
commit
d8ab301aa8
|
@ -611,6 +611,7 @@ set(ZIG_STD_FILES
|
|||
"os/linux.zig"
|
||||
"os/linux/arm64.zig"
|
||||
"os/linux/errno.zig"
|
||||
"os/linux/tls.zig"
|
||||
"os/linux/vdso.zig"
|
||||
"os/linux/x86_64.zig"
|
||||
"os/netbsd.zig"
|
||||
|
|
17
std/os.zig
17
std/os.zig
|
@ -3126,9 +3126,6 @@ pub const SpawnThreadError = error{
|
|||
Unexpected,
|
||||
};
|
||||
|
||||
pub var linux_tls_phdr: ?*std.elf.Phdr = null;
|
||||
pub var linux_tls_img_src: [*]const u8 = undefined; // defined if linux_tls_phdr is
|
||||
|
||||
/// caller must call wait on the returned thread
|
||||
/// fn startFn(@typeOf(context)) T
|
||||
/// where T is u8, noreturn, void, or !void
|
||||
|
@ -3238,12 +3235,10 @@ pub fn spawnThread(context: var, comptime startFn: var) SpawnThreadError!*Thread
|
|||
}
|
||||
// Finally, the Thread Local Storage, if any.
|
||||
if (!Thread.use_pthreads) {
|
||||
if (linux_tls_phdr) |tls_phdr| {
|
||||
l = mem.alignForward(l, tls_phdr.p_align);
|
||||
if (linux.tls.tls_image) |tls_img| {
|
||||
l = mem.alignForward(l, @alignOf(usize));
|
||||
tls_start_offset = l;
|
||||
l += tls_phdr.p_memsz;
|
||||
// the fs register address
|
||||
l += @sizeOf(usize);
|
||||
l += tls_img.alloc_size;
|
||||
}
|
||||
}
|
||||
break :blk l;
|
||||
|
@ -3284,10 +3279,8 @@ pub fn spawnThread(context: var, comptime startFn: var) SpawnThreadError!*Thread
|
|||
posix.CLONE_THREAD | posix.CLONE_SYSVSEM | posix.CLONE_PARENT_SETTID | posix.CLONE_CHILD_CLEARTID |
|
||||
posix.CLONE_DETACHED;
|
||||
var newtls: usize = undefined;
|
||||
if (linux_tls_phdr) |tls_phdr| {
|
||||
@memcpy(@intToPtr([*]u8, mmap_addr + tls_start_offset), linux_tls_img_src, tls_phdr.p_filesz);
|
||||
newtls = mmap_addr + mmap_len - @sizeOf(usize);
|
||||
@intToPtr(*usize, newtls).* = newtls;
|
||||
if (linux.tls.tls_image) |tls_img| {
|
||||
newtls = linux.tls.copyTLS(mmap_addr + tls_start_offset);
|
||||
flags |= posix.CLONE_SETTLS;
|
||||
}
|
||||
const rc = posix.clone(MainFuncs.linuxThreadMain, mmap_addr + stack_end_offset, flags, arg, &thread_ptr.data.handle, newtls, &thread_ptr.data.handle);
|
||||
|
|
|
@ -3,6 +3,7 @@ const assert = std.debug.assert;
|
|||
const builtin = @import("builtin");
|
||||
const maxInt = std.math.maxInt;
|
||||
const elf = std.elf;
|
||||
pub const tls = @import("linux/tls.zig");
|
||||
const vdso = @import("linux/vdso.zig");
|
||||
const dl = @import("../dynamic_library.zig");
|
||||
pub use switch (builtin.arch) {
|
||||
|
|
|
@ -0,0 +1,242 @@
|
|||
const std = @import("std");
|
||||
const mem = std.mem;
|
||||
const posix = std.posix;
|
||||
const elf = std.elf;
|
||||
const builtin = @import("builtin");
|
||||
const assert = std.debug.assert;
|
||||
|
||||
// This file implements the two TLS variants [1] used by ELF-based systems.
|
||||
//
|
||||
// The variant I has the following layout in memory:
|
||||
// -------------------------------------------------------
|
||||
// | DTV | Zig | DTV | Alignment | TLS |
|
||||
// | storage | thread data | pointer | | block |
|
||||
// ------------------------^------------------------------
|
||||
// `-- The thread pointer register points here
|
||||
//
|
||||
// In this case we allocate additional space for our control structure that's
|
||||
// placed _before_ the DTV pointer together with the DTV.
|
||||
//
|
||||
// NOTE: Some systems such as power64 or mips use this variant with a twist: the
|
||||
// alignment is not present and the tp and DTV addresses are offset by a
|
||||
// constant.
|
||||
//
|
||||
// On the other hand the variant II has the following layout in memory:
|
||||
// ---------------------------------------
|
||||
// | TLS | TCB | Zig | DTV |
|
||||
// | block | | thread data | storage |
|
||||
// --------^------------------------------
|
||||
// `-- The thread pointer register points here
|
||||
//
|
||||
// The structure of the TCB is not defined by the ABI so we reserve enough space
|
||||
// for a single pointer as some architectures such as i386 and x86_64 need a
|
||||
// pointer to the TCB block itself at the address pointed by the tp.
|
||||
//
|
||||
// In this case the control structure and DTV are placed one after another right
|
||||
// after the TLS block data.
|
||||
//
|
||||
// At the moment the DTV is very simple since we only support static TLS, all we
|
||||
// need is a two word vector to hold the number of entries (1) and the address
|
||||
// of the first TLS block.
|
||||
//
|
||||
// [1] https://www.akkadia.org/drepper/tls.pdf
|
||||
|
||||
const TLSVariant = enum {
|
||||
VariantI,
|
||||
VariantII,
|
||||
};
|
||||
|
||||
const tls_variant = switch (builtin.arch) {
|
||||
.arm, .armeb, .aarch64, .aarch64_be => TLSVariant.VariantI,
|
||||
.x86_64, .i386 => TLSVariant.VariantII,
|
||||
else => @compileError("undefined tls_variant for this architecture"),
|
||||
};
|
||||
|
||||
// Controls how many bytes are reserved for the Thread Control Block
|
||||
const tls_tcb_size = switch (builtin.arch) {
|
||||
// ARM EABI mandates enough space for two pointers: the first one points to
|
||||
// the DTV while the second one is unspecified but reserved
|
||||
.arm, .armeb, .aarch64, .aarch64_be => 2 * @sizeOf(usize),
|
||||
.i386, .x86_64 => @sizeOf(usize),
|
||||
else => 0,
|
||||
};
|
||||
|
||||
// Controls if the TCB should be aligned according to the TLS segment p_align
|
||||
const tls_tcb_align_size = switch (builtin.arch) {
|
||||
.arm, .armeb, .aarch64, .aarch64_be => true,
|
||||
else => false,
|
||||
};
|
||||
|
||||
// Check if the architecture-specific parameters look correct
|
||||
comptime {
|
||||
if (tls_tcb_align_size and tls_variant != TLSVariant.VariantI) {
|
||||
@compileError("tls_tcb_align_size is only meaningful for variant I TLS");
|
||||
}
|
||||
}
|
||||
|
||||
// Some architectures add some offset to the tp and dtv addresses in order to
|
||||
// make the generated code more efficient
|
||||
|
||||
const tls_tp_offset = switch (builtin.arch) {
|
||||
else => 0,
|
||||
};
|
||||
|
||||
const tls_dtv_offset = switch (builtin.arch) {
|
||||
else => 0,
|
||||
};
|
||||
|
||||
// Per-thread storage for Zig's use
|
||||
const CustomData = packed struct {
|
||||
};
|
||||
|
||||
// Dynamic Thread Vector
|
||||
const DTV = packed struct {
|
||||
entries: usize,
|
||||
tls_block: [1]usize,
|
||||
};
|
||||
|
||||
// Holds all the information about the process TLS image
|
||||
const TLSImage = struct {
|
||||
data_src: []u8,
|
||||
alloc_size: usize,
|
||||
tcb_offset: usize,
|
||||
dtv_offset: usize,
|
||||
data_offset: usize,
|
||||
};
|
||||
|
||||
pub var tls_image: ?TLSImage = null;
|
||||
|
||||
pub fn setThreadPointer(addr: usize) void {
|
||||
switch (builtin.arch) {
|
||||
.x86_64 => {
|
||||
const ARCH_SET_FS = 0x1002;
|
||||
const rc = std.os.linux.syscall2(std.os.linux.SYS_arch_prctl, ARCH_SET_FS, addr);
|
||||
// arch_prctl is documented to never fail
|
||||
assert(rc == 0);
|
||||
},
|
||||
.aarch64 => {
|
||||
asm volatile (
|
||||
\\ msr tpidr_el0, %[addr]
|
||||
: : [addr] "r" (addr)
|
||||
);
|
||||
},
|
||||
else => @compileError("Unsupported architecture"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn initTLS() void {
|
||||
var tls_phdr: ?*elf.Phdr = null;
|
||||
var img_base: usize = 0;
|
||||
|
||||
if (std.os.linux_elf_aux_maybe) |auxv| {
|
||||
var at_phent: usize = undefined;
|
||||
var at_phnum: usize = undefined;
|
||||
var at_phdr: usize = undefined;
|
||||
|
||||
var i: usize = 0;
|
||||
while (auxv[i].a_type != std.elf.AT_NULL) : (i += 1) {
|
||||
switch (auxv[i].a_type) {
|
||||
elf.AT_PHENT => at_phent = auxv[i].a_un.a_val,
|
||||
elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val,
|
||||
elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val,
|
||||
else => continue,
|
||||
}
|
||||
}
|
||||
|
||||
// Sanity check
|
||||
assert(at_phent == @sizeOf(elf.Phdr));
|
||||
|
||||
// Search the TLS section
|
||||
const phdrs = (@intToPtr([*]elf.Phdr, at_phdr))[0..at_phnum];
|
||||
|
||||
for (phdrs) |*phdr| {
|
||||
switch (phdr.p_type) {
|
||||
elf.PT_PHDR => img_base = at_phdr - phdr.p_vaddr,
|
||||
elf.PT_TLS => tls_phdr = phdr,
|
||||
else => continue,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@panic("no auxv vector available!");
|
||||
}
|
||||
|
||||
if (tls_phdr) |phdr| {
|
||||
// Offsets into the allocated TLS area
|
||||
var tcb_offset: usize = undefined;
|
||||
var dtv_offset: usize = undefined;
|
||||
var data_offset: usize = undefined;
|
||||
var thread_data_offset: usize = undefined;
|
||||
// Compute the total size of the ABI-specific data plus our own control
|
||||
// structures
|
||||
const alloc_size = switch (tls_variant) {
|
||||
.VariantI => blk: {
|
||||
var l: usize = 0;
|
||||
dtv_offset = l;
|
||||
l += @sizeOf(DTV);
|
||||
thread_data_offset = l;
|
||||
l += @sizeOf(CustomData);
|
||||
l = mem.alignForward(l, phdr.p_align);
|
||||
tcb_offset = l;
|
||||
if (tls_tcb_align_size) {
|
||||
l += mem.alignForward(tls_tcb_size, phdr.p_align);
|
||||
} else {
|
||||
l += tls_tcb_size;
|
||||
}
|
||||
data_offset = l;
|
||||
l += phdr.p_memsz;
|
||||
break :blk l;
|
||||
},
|
||||
.VariantII => blk: {
|
||||
var l: usize = 0;
|
||||
data_offset = l;
|
||||
l += phdr.p_memsz;
|
||||
l = mem.alignForward(l, phdr.p_align);
|
||||
tcb_offset = l;
|
||||
l += tls_tcb_size;
|
||||
thread_data_offset = l;
|
||||
l += @sizeOf(CustomData);
|
||||
dtv_offset = l;
|
||||
l += @sizeOf(DTV);
|
||||
break :blk l;
|
||||
}
|
||||
};
|
||||
|
||||
tls_image = TLSImage{
|
||||
.data_src = @intToPtr([*]u8, phdr.p_vaddr + img_base)[0..phdr.p_filesz],
|
||||
.alloc_size = alloc_size,
|
||||
.tcb_offset = tcb_offset,
|
||||
.dtv_offset = dtv_offset,
|
||||
.data_offset = data_offset,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
pub fn copyTLS(addr: usize) usize {
|
||||
const tls_img = tls_image orelse @panic("copyTLS called with no TLS section!");
|
||||
|
||||
// Be paranoid, clear the area we're going to use
|
||||
@memset(@intToPtr([*]u8, addr), 0, tls_img.alloc_size);
|
||||
// Prepare the DTV
|
||||
const dtv = @intToPtr(*DTV, addr + tls_img.dtv_offset);
|
||||
dtv.entries = 1;
|
||||
dtv.tls_block[0] = addr + tls_img.data_offset + tls_dtv_offset;
|
||||
// Set-up the TCB
|
||||
const tcb_ptr = @intToPtr(*usize, addr + tls_img.tcb_offset);
|
||||
if (tls_variant == TLSVariant.VariantI) {
|
||||
tcb_ptr.* = addr + tls_img.dtv_offset;
|
||||
} else {
|
||||
tcb_ptr.* = addr + tls_img.tcb_offset;
|
||||
}
|
||||
// Copy the data
|
||||
@memcpy(@intToPtr([*]u8, addr + tls_img.data_offset), tls_img.data_src.ptr, tls_img.data_src.len);
|
||||
|
||||
// Return the corrected (if needed) value for the tp register
|
||||
return addr + tls_img.tcb_offset + tls_tp_offset;
|
||||
}
|
||||
|
||||
var main_thread_tls_buffer: [64]u8 align(32) = undefined;
|
||||
|
||||
pub fn allocateTLS(size: usize) usize {
|
||||
assert(size < main_thread_tls_buffer.len);
|
||||
return @ptrToInt(&main_thread_tls_buffer);
|
||||
}
|
|
@ -67,24 +67,19 @@ fn posixCallMainAndExit() noreturn {
|
|||
var envp_count: usize = 0;
|
||||
while (envp_optional[envp_count]) |_| : (envp_count += 1) {}
|
||||
const envp = @ptrCast([*][*]u8, envp_optional)[0..envp_count];
|
||||
|
||||
if (builtin.os == builtin.Os.linux) {
|
||||
// Scan auxiliary vector.
|
||||
const auxv = @ptrCast([*]std.elf.Auxv, envp.ptr + envp_count + 1);
|
||||
std.os.linux_elf_aux_maybe = auxv;
|
||||
var i: usize = 0;
|
||||
var at_phdr: usize = 0;
|
||||
var at_phnum: usize = 0;
|
||||
var at_phent: usize = 0;
|
||||
while (auxv[i].a_un.a_val != 0) : (i += 1) {
|
||||
switch (auxv[i].a_type) {
|
||||
std.elf.AT_PAGESZ => assert(auxv[i].a_un.a_val == std.os.page_size),
|
||||
std.elf.AT_PHDR => at_phdr = auxv[i].a_un.a_val,
|
||||
std.elf.AT_PHNUM => at_phnum = auxv[i].a_un.a_val,
|
||||
std.elf.AT_PHENT => at_phent = auxv[i].a_un.a_val,
|
||||
else => {},
|
||||
|
||||
std.os.linux.tls.initTLS();
|
||||
if (!builtin.single_threaded) {
|
||||
if (std.os.linux.tls.tls_image) |tls_img| {
|
||||
const tls_addr = std.os.linux.tls.allocateTLS(tls_img.alloc_size);
|
||||
const tp = std.os.linux.tls.copyTLS(tls_addr);
|
||||
std.os.linux.tls.setThreadPointer(tp);
|
||||
}
|
||||
}
|
||||
if (!builtin.single_threaded) linuxInitializeThreadLocalStorage(at_phdr, at_phnum, at_phent);
|
||||
}
|
||||
|
||||
std.os.posix.exit(callMainWithArgs(argc, argv, envp));
|
||||
|
@ -140,50 +135,3 @@ inline fn callMain() u8 {
|
|||
|
||||
const main_thread_tls_align = 32;
|
||||
var main_thread_tls_bytes: [64]u8 align(main_thread_tls_align) = [1]u8{0} ** 64;
|
||||
|
||||
fn linuxInitializeThreadLocalStorage(at_phdr: usize, at_phnum: usize, at_phent: usize) void {
|
||||
var phdr_addr = at_phdr;
|
||||
var n = at_phnum;
|
||||
var base: usize = 0;
|
||||
while (n != 0) : ({
|
||||
n -= 1;
|
||||
phdr_addr += at_phent;
|
||||
}) {
|
||||
const phdr = @intToPtr(*std.elf.Phdr, phdr_addr);
|
||||
// TODO look for PT_DYNAMIC when we have https://github.com/ziglang/zig/issues/1917
|
||||
switch (phdr.p_type) {
|
||||
std.elf.PT_PHDR => base = at_phdr - phdr.p_vaddr,
|
||||
std.elf.PT_TLS => std.os.linux_tls_phdr = phdr,
|
||||
else => continue,
|
||||
}
|
||||
}
|
||||
const tls_phdr = std.os.linux_tls_phdr orelse return;
|
||||
std.os.linux_tls_img_src = @intToPtr([*]const u8, base + tls_phdr.p_vaddr);
|
||||
const end_addr = @ptrToInt(&main_thread_tls_bytes) + tls_phdr.p_memsz;
|
||||
const max_end_addr = @ptrToInt(&main_thread_tls_bytes) + main_thread_tls_bytes.len;
|
||||
assert(max_end_addr >= end_addr + @sizeOf(usize)); // not enough preallocated Thread Local Storage
|
||||
assert(main_thread_tls_align >= tls_phdr.p_align); // preallocated Thread Local Storage not aligned enough
|
||||
@memcpy(&main_thread_tls_bytes, std.os.linux_tls_img_src, tls_phdr.p_filesz);
|
||||
const end_ptr = @intToPtr(*usize, end_addr);
|
||||
end_ptr.* = end_addr;
|
||||
linuxSetThreadArea(end_addr);
|
||||
}
|
||||
|
||||
fn linuxSetThreadArea(addr: usize) void {
|
||||
switch (builtin.arch) {
|
||||
builtin.Arch.x86_64 => {
|
||||
const ARCH_SET_FS = 0x1002;
|
||||
const rc = std.os.linux.syscall2(std.os.linux.SYS_arch_prctl, ARCH_SET_FS, addr);
|
||||
// acrh_prctl is documented to never fail
|
||||
assert(rc == 0);
|
||||
},
|
||||
builtin.Arch.aarch64 => {
|
||||
asm volatile (
|
||||
\\ msr tpidr_el0,x0
|
||||
\\ mov w0,#0
|
||||
\\ ret
|
||||
);
|
||||
},
|
||||
else => @compileError("Unsupported architecture"),
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue