From d665948dcf74cb4fff7e815dbcdabe363a337c76 Mon Sep 17 00:00:00 2001 From: Sahnvour Date: Wed, 8 May 2019 17:53:58 +0200 Subject: [PATCH 1/3] Duplicate windows's DirectAllocator as HeapAllocator, which it is in reality. --- std/heap.zig | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/std/heap.zig b/std/heap.zig index 4a7a33772..38d56584c 100644 --- a/std/heap.zig +++ b/std/heap.zig @@ -190,6 +190,104 @@ pub const DirectAllocator = struct { } }; +pub const HeapAllocator = switch (builtin.os) { + .windows => struct { + allocator: Allocator, + heap_handle: ?HeapHandle, + + const HeapHandle = os.windows.HANDLE; + + pub fn init() HeapAllocator { + return HeapAllocator{ + .allocator = Allocator{ + .reallocFn = realloc, + .shrinkFn = shrink, + }, + .heap_handle = null, + }; + } + + pub fn deinit(self: *HeapAllocator) void { + if (self.heap_handle) |heap_handle| { + _ = os.windows.HeapDestroy(heap_handle); + } + } + + fn alloc(allocator: *Allocator, n: usize, alignment: u29) error{OutOfMemory}![]u8 { + const self = @fieldParentPtr(HeapAllocator, "allocator", allocator); + if (n == 0) + return (([*]u8)(undefined))[0..0]; + + const amt = n + alignment + @sizeOf(usize); + const optional_heap_handle = @atomicLoad(?HeapHandle, &self.heap_handle, builtin.AtomicOrder.SeqCst); + const heap_handle = optional_heap_handle orelse blk: { + const hh = os.windows.HeapCreate(0, amt, 0) orelse return error.OutOfMemory; + const other_hh = @cmpxchgStrong(?HeapHandle, &self.heap_handle, null, hh, builtin.AtomicOrder.SeqCst, builtin.AtomicOrder.SeqCst) orelse break :blk hh; + _ = os.windows.HeapDestroy(hh); + break :blk other_hh.?; // can't be null because of the cmpxchg + }; + const ptr = os.windows.HeapAlloc(heap_handle, 0, amt) orelse return error.OutOfMemory; + const root_addr = @ptrToInt(ptr); + const adjusted_addr = mem.alignForward(root_addr, alignment); + const record_addr = adjusted_addr + n; + @intToPtr(*align(1) usize, record_addr).* = root_addr; + return @intToPtr([*]u8, adjusted_addr)[0..n]; + } + + fn shrink(allocator: *Allocator, old_mem: []u8, old_align: u29, new_size: usize, new_align: u29) []u8 { + return realloc(allocator, old_mem, old_align, new_size, new_align) catch { + const old_adjusted_addr = @ptrToInt(old_mem.ptr); + const old_record_addr = old_adjusted_addr + old_mem.len; + const root_addr = @intToPtr(*align(1) usize, old_record_addr).*; + const old_ptr = @intToPtr(*c_void, root_addr); + const new_record_addr = old_record_addr - new_size + old_mem.len; + @intToPtr(*align(1) usize, new_record_addr).* = root_addr; + return old_mem[0..new_size]; + }; + } + + fn realloc(allocator: *Allocator, old_mem: []u8, old_align: u29, new_size: usize, new_align: u29) ![]u8 { + if (old_mem.len == 0) return alloc(allocator, new_size, new_align); + + const self = @fieldParentPtr(HeapAllocator, "allocator", allocator); + const old_adjusted_addr = @ptrToInt(old_mem.ptr); + const old_record_addr = old_adjusted_addr + old_mem.len; + const root_addr = @intToPtr(*align(1) usize, old_record_addr).*; + const old_ptr = @intToPtr(*c_void, root_addr); + + if (new_size == 0) { + if (os.windows.HeapFree(self.heap_handle.?, 0, old_ptr) == 0) unreachable; + return old_mem[0..0]; + } + + const amt = new_size + new_align + @sizeOf(usize); + const new_ptr = os.windows.HeapReAlloc( + self.heap_handle.?, + 0, + old_ptr, + amt, + ) orelse return error.OutOfMemory; + const offset = old_adjusted_addr - root_addr; + const new_root_addr = @ptrToInt(new_ptr); + var new_adjusted_addr = new_root_addr + offset; + const offset_is_valid = new_adjusted_addr + new_size + @sizeOf(usize) <= new_root_addr + amt; + const offset_is_aligned = new_adjusted_addr % new_align == 0; + if (!offset_is_valid or !offset_is_aligned) { + // If HeapReAlloc didn't happen to move the memory to the new alignment, + // or the memory starting at the old offset would be outside of the new allocation, + // then we need to copy the memory to a valid aligned address and use that + const new_aligned_addr = mem.alignForward(new_root_addr, new_align); + @memcpy(@intToPtr([*]u8, new_aligned_addr), @intToPtr([*]u8, new_adjusted_addr), std.math.min(old_mem.len, new_size)); + new_adjusted_addr = new_aligned_addr; + } + const new_record_addr = new_adjusted_addr + new_size; + @intToPtr(*align(1) usize, new_record_addr).* = new_root_addr; + return @intToPtr([*]u8, new_adjusted_addr)[0..new_size]; + } + }, + else => @compileError("Unsupported OS"), +}; + /// This allocator takes an existing allocator, wraps it, and provides an interface /// where you can allocate without freeing, and then free it all together. pub const ArenaAllocator = struct { @@ -590,6 +688,19 @@ test "DirectAllocator" { try testAllocatorAlignedShrink(allocator); } +test "HeapAllocator" { + if (builtin.os == .windows) { + var heap_allocator = HeapAllocator.init(); + defer heap_allocator.deinit(); + + const allocator = &heap_allocator.allocator; + try testAllocator(allocator); + try testAllocatorAligned(allocator, 16); + try testAllocatorLargeAlignment(allocator); + try testAllocatorAlignedShrink(allocator); + } +} + test "ArenaAllocator" { var direct_allocator = DirectAllocator.init(); defer direct_allocator.deinit(); From a2d5b0fabe13d28075963e06b4bc6040888542af Mon Sep 17 00:00:00 2001 From: Sahnvour Date: Wed, 8 May 2019 18:02:37 +0200 Subject: [PATCH 2/3] Implement Windows' DirectAllocator on top of VirtualAlloc and VirtualFree. --- std/heap.zig | 216 ++++++++++++++++++++++++------------ std/os/windows.zig | 31 ++++++ std/os/windows/kernel32.zig | 3 + 3 files changed, 179 insertions(+), 71 deletions(-) diff --git a/std/heap.zig b/std/heap.zig index 38d56584c..e8bf02a2a 100644 --- a/std/heap.zig +++ b/std/heap.zig @@ -34,9 +34,6 @@ fn cShrink(self: *Allocator, old_mem: []u8, old_align: u29, new_size: usize, new /// Thread-safe and lock-free. pub const DirectAllocator = struct { allocator: Allocator, - heap_handle: ?HeapHandle, - - const HeapHandle = if (builtin.os == Os.windows) os.windows.HANDLE else void; pub fn init() DirectAllocator { return DirectAllocator{ @@ -44,18 +41,10 @@ pub const DirectAllocator = struct { .reallocFn = realloc, .shrinkFn = shrink, }, - .heap_handle = if (builtin.os == Os.windows) null else {}, }; } - pub fn deinit(self: *DirectAllocator) void { - switch (builtin.os) { - Os.windows => if (self.heap_handle) |heap_handle| { - _ = os.windows.HeapDestroy(heap_handle); - }, - else => {}, - } - } + pub fn deinit(self: *DirectAllocator) void {} fn alloc(allocator: *Allocator, n: usize, alignment: u29) error{OutOfMemory}![]u8 { const self = @fieldParentPtr(DirectAllocator, "allocator", allocator); @@ -89,21 +78,57 @@ pub const DirectAllocator = struct { return @intToPtr([*]u8, aligned_addr)[0..n]; }, - Os.windows => { - const amt = n + alignment + @sizeOf(usize); - const optional_heap_handle = @atomicLoad(?HeapHandle, &self.heap_handle, builtin.AtomicOrder.SeqCst); - const heap_handle = optional_heap_handle orelse blk: { - const hh = os.windows.HeapCreate(0, amt, 0) orelse return error.OutOfMemory; - const other_hh = @cmpxchgStrong(?HeapHandle, &self.heap_handle, null, hh, builtin.AtomicOrder.SeqCst, builtin.AtomicOrder.SeqCst) orelse break :blk hh; - _ = os.windows.HeapDestroy(hh); - break :blk other_hh.?; // can't be null because of the cmpxchg - }; - const ptr = os.windows.HeapAlloc(heap_handle, 0, amt) orelse return error.OutOfMemory; - const root_addr = @ptrToInt(ptr); - const adjusted_addr = mem.alignForward(root_addr, alignment); - const record_addr = adjusted_addr + n; - @intToPtr(*align(1) usize, record_addr).* = root_addr; - return @intToPtr([*]u8, adjusted_addr)[0..n]; + .windows => { + const w = os.windows; + + // Although officially it's at least aligned to page boundary, + // Windows is known to reserve pages on a 64K boundary. It's + // even more likely that the requested alignment is <= 64K than + // 4K, so we're just allocating blindly and hoping for the best. + // see https://devblogs.microsoft.com/oldnewthing/?p=42223 + const addr = w.VirtualAlloc( + null, + n, + w.MEM_COMMIT | w.MEM_RESERVE, + w.PAGE_READWRITE, + ) orelse return error.OutOfMemory; + + // If the allocation is sufficiently aligned, use it. + if (@ptrToInt(addr) & (alignment - 1) == 0) { + return @ptrCast([*]u8, addr)[0..n]; + } + + // If it wasn't, actually do an explicitely aligned allocation. + if (w.VirtualFree(addr, 0, w.MEM_RELEASE) == 0) unreachable; + const alloc_size = n + alignment; + + const final_addr = while (true) { + // Reserve a range of memory large enough to find a sufficiently + // aligned address. + const reserved_addr = w.VirtualAlloc( + null, + alloc_size, + w.MEM_RESERVE, + w.PAGE_NOACCESS, + ) orelse return error.OutOfMemory; + const aligned_addr = mem.alignForward(@ptrToInt(reserved_addr), alignment); + + // Release the reserved pages (not actually used). + if (w.VirtualFree(reserved_addr, 0, w.MEM_RELEASE) == 0) unreachable; + + // At this point, it is possible that another thread has + // obtained some memory space that will cause the next + // VirtualAlloc call to fail. To handle this, we will retry + // until it succeeds. + if (w.VirtualAlloc( + @intToPtr(*c_void, aligned_addr), + n, + w.MEM_COMMIT | w.MEM_RESERVE, + w.PAGE_READWRITE, + )) |ptr| break ptr; + } else unreachable; // TODO else unreachable should not be necessary + + return @ptrCast([*]u8, final_addr)[0..n]; }, else => @compileError("Unsupported OS"), } @@ -121,13 +146,31 @@ pub const DirectAllocator = struct { } return old_mem[0..new_size]; }, - Os.windows => return realloc(allocator, old_mem, old_align, new_size, new_align) catch { - const old_adjusted_addr = @ptrToInt(old_mem.ptr); - const old_record_addr = old_adjusted_addr + old_mem.len; - const root_addr = @intToPtr(*align(1) usize, old_record_addr).*; - const old_ptr = @intToPtr(*c_void, root_addr); - const new_record_addr = old_record_addr - new_size + old_mem.len; - @intToPtr(*align(1) usize, new_record_addr).* = root_addr; + .windows => { + const w = os.windows; + if (new_size == 0) { + // From the docs: + // "If the dwFreeType parameter is MEM_RELEASE, this parameter + // must be 0 (zero). The function frees the entire region that + // is reserved in the initial allocation call to VirtualAlloc." + // So we can only use MEM_RELEASE when actually releasing the + // whole allocation. + if (w.VirtualFree(old_mem.ptr, 0, w.MEM_RELEASE) == 0) unreachable; + } else { + const base_addr = @ptrToInt(old_mem.ptr); + const old_addr_end = base_addr + old_mem.len; + const new_addr_end = base_addr + new_size; + const new_addr_end_rounded = mem.alignForward(new_addr_end, os.page_size); + if (old_addr_end > new_addr_end_rounded) { + // For shrinking that is not releasing, we will only + // decommit the pages not needed anymore. + if (w.VirtualFree( + @intToPtr(*c_void, new_addr_end_rounded), + old_addr_end - new_addr_end_rounded, + w.MEM_DECOMMIT, + ) == 0) unreachable; + } + } return old_mem[0..new_size]; }, else => @compileError("Unsupported OS"), @@ -147,43 +190,59 @@ pub const DirectAllocator = struct { } return result; }, - Os.windows => { - if (old_mem.len == 0) return alloc(allocator, new_size, new_align); - - const self = @fieldParentPtr(DirectAllocator, "allocator", allocator); - const old_adjusted_addr = @ptrToInt(old_mem.ptr); - const old_record_addr = old_adjusted_addr + old_mem.len; - const root_addr = @intToPtr(*align(1) usize, old_record_addr).*; - const old_ptr = @intToPtr(*c_void, root_addr); - - if (new_size == 0) { - if (os.windows.HeapFree(self.heap_handle.?, 0, old_ptr) == 0) unreachable; - return old_mem[0..0]; + .windows => { + if (old_mem.len == 0) { + return alloc(allocator, new_size, new_align); } - const amt = new_size + new_align + @sizeOf(usize); - const new_ptr = os.windows.HeapReAlloc( - self.heap_handle.?, - 0, - old_ptr, - amt, - ) orelse return error.OutOfMemory; - const offset = old_adjusted_addr - root_addr; - const new_root_addr = @ptrToInt(new_ptr); - var new_adjusted_addr = new_root_addr + offset; - const offset_is_valid = new_adjusted_addr + new_size + @sizeOf(usize) <= new_root_addr + amt; - const offset_is_aligned = new_adjusted_addr % new_align == 0; - if (!offset_is_valid or !offset_is_aligned) { - // If HeapReAlloc didn't happen to move the memory to the new alignment, - // or the memory starting at the old offset would be outside of the new allocation, - // then we need to copy the memory to a valid aligned address and use that - const new_aligned_addr = mem.alignForward(new_root_addr, new_align); - @memcpy(@intToPtr([*]u8, new_aligned_addr), @intToPtr([*]u8, new_adjusted_addr), std.math.min(old_mem.len, new_size)); - new_adjusted_addr = new_aligned_addr; + if (new_size <= old_mem.len and new_align <= old_align) { + return shrink(allocator, old_mem, old_align, new_size, new_align); } - const new_record_addr = new_adjusted_addr + new_size; - @intToPtr(*align(1) usize, new_record_addr).* = new_root_addr; - return @intToPtr([*]u8, new_adjusted_addr)[0..new_size]; + + const w = os.windows; + const base_addr = @ptrToInt(old_mem.ptr); + + if (new_align > old_align and base_addr & (new_align - 1) != 0) { + // Current allocation doesn't satisfy the new alignment. + // For now we'll do a new one no matter what, but maybe + // there is something smarter to do instead. + const result = try alloc(allocator, new_size, new_align); + assert(old_mem.len != 0); + @memcpy(result.ptr, old_mem.ptr, std.math.min(old_mem.len, result.len)); + if (w.VirtualFree(old_mem.ptr, 0, w.MEM_RELEASE) == 0) unreachable; + + return result; + } + + const old_addr_end = base_addr + old_mem.len; + const old_addr_end_rounded = mem.alignForward(old_addr_end, os.page_size); + const new_addr_end = base_addr + new_size; + const new_addr_end_rounded = mem.alignForward(new_addr_end, os.page_size); + if (new_addr_end_rounded == old_addr_end_rounded) { + // The reallocation fits in the already allocated pages. + return @ptrCast([*]u8, old_mem.ptr)[0..new_size]; + } + assert(new_addr_end_rounded > old_addr_end_rounded); + + // We need to commit new pages. + const additional_size = new_addr_end - old_addr_end_rounded; + const realloc_addr = w.VirtualAlloc( + @intToPtr(*c_void, old_addr_end_rounded), + additional_size, + w.MEM_COMMIT | w.MEM_RESERVE, + w.PAGE_READWRITE, + ) orelse { + // Committing new pages at the end of the existing allocation + // failed, we need to try a new one. + const new_alloc_mem = try alloc(allocator, new_size, new_align); + @memcpy(new_alloc_mem.ptr, old_mem.ptr, old_mem.len); + if (w.VirtualFree(old_mem.ptr, 0, w.MEM_RELEASE) == 0) unreachable; + + return new_alloc_mem; + }; + + assert(@ptrToInt(realloc_addr) == old_addr_end_rounded); + return @ptrCast([*]u8, old_mem.ptr)[0..new_size]; }, else => @compileError("Unsupported OS"), } @@ -686,6 +745,17 @@ test "DirectAllocator" { try testAllocatorAligned(allocator, 16); try testAllocatorLargeAlignment(allocator); try testAllocatorAlignedShrink(allocator); + + if (builtin.os == .windows) { + // Trying really large alignment. As mentionned in the implementation, + // VirtualAlloc returns 64K aligned addresses. We want to make sure + // DirectAllocator works beyond that, as it's not tested by + // `testAllocatorLargeAlignment`. + const slice = try allocator.alignedAlloc(u8, 1 << 20, 128); + slice[0] = 0x12; + slice[127] = 0x34; + allocator.free(slice); + } } test "HeapAllocator" { @@ -714,7 +784,7 @@ test "ArenaAllocator" { try testAllocatorAlignedShrink(&arena_allocator.allocator); } -var test_fixed_buffer_allocator_memory: [40000 * @sizeOf(u64)]u8 = undefined; +var test_fixed_buffer_allocator_memory: [80000 * @sizeOf(u64)]u8 = undefined; test "FixedBufferAllocator" { var fixed_buffer_allocator = FixedBufferAllocator.init(test_fixed_buffer_allocator_memory[0..]); @@ -852,7 +922,11 @@ fn testAllocatorAlignedShrink(allocator: *mem.Allocator) mem.Allocator.Error!voi defer allocator.free(slice); var stuff_to_free = std.ArrayList([]align(16) u8).init(debug_allocator); - while (@ptrToInt(slice.ptr) == mem.alignForward(@ptrToInt(slice.ptr), os.page_size * 2)) { + // On Windows, VirtualAlloc returns addresses aligned to a 64K boundary, + // which is 16 pages, hence the 32. This test may require to increase + // the size of the allocations feeding the `allocator` parameter if they + // fail, because of this high over-alignment we want to have. + while (@ptrToInt(slice.ptr) == mem.alignForward(@ptrToInt(slice.ptr), os.page_size * 32)) { try stuff_to_free.append(slice); slice = try allocator.alignedAlloc(u8, 16, alloc_size); } @@ -863,7 +937,7 @@ fn testAllocatorAlignedShrink(allocator: *mem.Allocator) mem.Allocator.Error!voi slice[60] = 0x34; // realloc to a smaller size but with a larger alignment - slice = try allocator.alignedRealloc(slice, os.page_size * 2, alloc_size / 2); + slice = try allocator.alignedRealloc(slice, os.page_size * 32, alloc_size / 2); testing.expect(slice[0] == 0x12); testing.expect(slice[60] == 0x34); } diff --git a/std/os/windows.zig b/std/os/windows.zig index 96274632c..e134d87ea 100644 --- a/std/os/windows.zig +++ b/std/os/windows.zig @@ -239,6 +239,37 @@ pub const HEAP_CREATE_ENABLE_EXECUTE = 0x00040000; pub const HEAP_GENERATE_EXCEPTIONS = 0x00000004; pub const HEAP_NO_SERIALIZE = 0x00000001; +// AllocationType values +pub const MEM_COMMIT = 0x1000; +pub const MEM_RESERVE = 0x2000; +pub const MEM_RESET = 0x80000; +pub const MEM_RESET_UNDO = 0x1000000; +pub const MEM_LARGE_PAGES = 0x20000000; +pub const MEM_PHYSICAL = 0x400000; +pub const MEM_TOP_DOWN = 0x100000; +pub const MEM_WRITE_WATCH = 0x200000; + +// Protect values +pub const PAGE_EXECUTE = 0x10; +pub const PAGE_EXECUTE_READ = 0x20; +pub const PAGE_EXECUTE_READWRITE = 0x40; +pub const PAGE_EXECUTE_WRITECOPY = 0x80; +pub const PAGE_NOACCESS = 0x01; +pub const PAGE_READONLY = 0x02; +pub const PAGE_READWRITE = 0x04; +pub const PAGE_WRITECOPY = 0x08; +pub const PAGE_TARGETS_INVALID = 0x40000000; +pub const PAGE_TARGETS_NO_UPDATE = 0x40000000; // Same as PAGE_TARGETS_INVALID +pub const PAGE_GUARD = 0x100; +pub const PAGE_NOCACHE = 0x200; +pub const PAGE_WRITECOMBINE = 0x400; + +// FreeType values +pub const MEM_COALESCE_PLACEHOLDERS = 0x1; +pub const MEM_RESERVE_PLACEHOLDERS = 0x2; +pub const MEM_DECOMMIT = 0x4000; +pub const MEM_RELEASE = 0x8000; + pub const PTHREAD_START_ROUTINE = extern fn (LPVOID) DWORD; pub const LPTHREAD_START_ROUTINE = PTHREAD_START_ROUTINE; diff --git a/std/os/windows/kernel32.zig b/std/os/windows/kernel32.zig index 64a97ca87..7e99f7eda 100644 --- a/std/os/windows/kernel32.zig +++ b/std/os/windows/kernel32.zig @@ -116,6 +116,9 @@ pub extern "kernel32" stdcallcc fn HeapFree(hHeap: HANDLE, dwFlags: DWORD, lpMem pub extern "kernel32" stdcallcc fn HeapValidate(hHeap: HANDLE, dwFlags: DWORD, lpMem: ?*const c_void) BOOL; +pub extern "kernel32" stdcallcc fn VirtualAlloc(lpAddress: ?LPVOID, dwSize: SIZE_T, flAllocationType: DWORD, flProtect: DWORD) ?LPVOID; +pub extern "kernel32" stdcallcc fn VirtualFree(lpAddress: ?LPVOID, dwSize: SIZE_T, dwFreeType: DWORD) BOOL; + pub extern "kernel32" stdcallcc fn MoveFileExW( lpExistingFileName: [*]const u16, lpNewFileName: [*]const u16, From cd537f822b0b60e00eff34c4e11c43f718705699 Mon Sep 17 00:00:00 2001 From: Sahnvour Date: Wed, 8 May 2019 18:14:35 +0200 Subject: [PATCH 3/3] Use unprotected heap when in single_threaded mode. --- std/heap.zig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/std/heap.zig b/std/heap.zig index e8bf02a2a..8e3cccc36 100644 --- a/std/heap.zig +++ b/std/heap.zig @@ -280,7 +280,8 @@ pub const HeapAllocator = switch (builtin.os) { const amt = n + alignment + @sizeOf(usize); const optional_heap_handle = @atomicLoad(?HeapHandle, &self.heap_handle, builtin.AtomicOrder.SeqCst); const heap_handle = optional_heap_handle orelse blk: { - const hh = os.windows.HeapCreate(0, amt, 0) orelse return error.OutOfMemory; + const options = if (builtin.single_threaded) os.windows.HEAP_NO_SERIALIZE else 0; + const hh = os.windows.HeapCreate(options, amt, 0) orelse return error.OutOfMemory; const other_hh = @cmpxchgStrong(?HeapHandle, &self.heap_handle, null, hh, builtin.AtomicOrder.SeqCst, builtin.AtomicOrder.SeqCst) orelse break :blk hh; _ = os.windows.HeapDestroy(hh); break :blk other_hh.?; // can't be null because of the cmpxchg