Merge branch 'master' of github.com:ziglang/zig

2019-05-11 20:12:08 +02:00 · 2019-05-11 20:12:08 +02:00 · e5a0e21a53
parent fb3b943b07 32efa68f90
commit e5a0e21a53
3 changed files with 291 additions and 71 deletions
--- a/std/heap.zig
+++ b/std/heap.zig
@ -34,9 +34,6 @@ fn cShrink(self: *Allocator, old_mem: []u8, old_align: u29, new_size: usize, new
 /// Thread-safe and lock-free.
 pub const DirectAllocator = struct {
    allocator: Allocator,
-    heap_handle: ?HeapHandle,
-
-    const HeapHandle = if (builtin.os == Os.windows) os.windows.HANDLE else void;

    pub fn init() DirectAllocator {
        return DirectAllocator{
@ -44,18 +41,10 @@ pub const DirectAllocator = struct {
                .reallocFn = realloc,
                .shrinkFn = shrink,
            },
-            .heap_handle = if (builtin.os == Os.windows) null else {},
        };
    }

-    pub fn deinit(self: *DirectAllocator) void {
-        switch (builtin.os) {
-            Os.windows => if (self.heap_handle) |heap_handle| {
-                _ = os.windows.HeapDestroy(heap_handle);
-            },
-            else => {},
-        }
-    }
+    pub fn deinit(self: *DirectAllocator) void {}

    fn alloc(allocator: *Allocator, n: usize, alignment: u29) error{OutOfMemory}![]u8 {
        const self = @fieldParentPtr(DirectAllocator, "allocator", allocator);
@ -89,21 +78,57 @@ pub const DirectAllocator = struct {

                return @intToPtr([*]u8, aligned_addr)[0..n];
            },
-            Os.windows => {
-                const amt = n + alignment + @sizeOf(usize);
-                const optional_heap_handle = @atomicLoad(?HeapHandle, &self.heap_handle, builtin.AtomicOrder.SeqCst);
-                const heap_handle = optional_heap_handle orelse blk: {
-                    const hh = os.windows.HeapCreate(0, amt, 0) orelse return error.OutOfMemory;
-                    const other_hh = @cmpxchgStrong(?HeapHandle, &self.heap_handle, null, hh, builtin.AtomicOrder.SeqCst, builtin.AtomicOrder.SeqCst) orelse break :blk hh;
-                    _ = os.windows.HeapDestroy(hh);
-                    break :blk other_hh.?; // can't be null because of the cmpxchg
-                };
-                const ptr = os.windows.HeapAlloc(heap_handle, 0, amt) orelse return error.OutOfMemory;
-                const root_addr = @ptrToInt(ptr);
-                const adjusted_addr = mem.alignForward(root_addr, alignment);
-                const record_addr = adjusted_addr + n;
-                @intToPtr(*align(1) usize, record_addr).* = root_addr;
-                return @intToPtr([*]u8, adjusted_addr)[0..n];
+            .windows => {
+                const w = os.windows;
+
+                // Although officially it's at least aligned to page boundary,
+                // Windows is known to reserve pages on a 64K boundary. It's
+                // even more likely that the requested alignment is <= 64K than
+                // 4K, so we're just allocating blindly and hoping for the best.
+                // see https://devblogs.microsoft.com/oldnewthing/?p=42223
+                const addr = w.VirtualAlloc(
+                    null,
+                    n,
+                    w.MEM_COMMIT | w.MEM_RESERVE,
+                    w.PAGE_READWRITE,
+                ) orelse return error.OutOfMemory;
+
+                // If the allocation is sufficiently aligned, use it.
+                if (@ptrToInt(addr) & (alignment - 1) == 0) {
+                    return @ptrCast([*]u8, addr)[0..n];
+                }
+
+                // If it wasn't, actually do an explicitely aligned allocation.
+                if (w.VirtualFree(addr, 0, w.MEM_RELEASE) == 0) unreachable;
+                const alloc_size = n + alignment;
+
+                const final_addr = while (true) {
+                    // Reserve a range of memory large enough to find a sufficiently
+                    // aligned address.
+                    const reserved_addr = w.VirtualAlloc(
+                        null,
+                        alloc_size,
+                        w.MEM_RESERVE,
+                        w.PAGE_NOACCESS,
+                    ) orelse return error.OutOfMemory;
+                    const aligned_addr = mem.alignForward(@ptrToInt(reserved_addr), alignment);
+
+                    // Release the reserved pages (not actually used).
+                    if (w.VirtualFree(reserved_addr, 0, w.MEM_RELEASE) == 0) unreachable;
+
+                    // At this point, it is possible that another thread has
+                    // obtained some memory space that will cause the next
+                    // VirtualAlloc call to fail. To handle this, we will retry
+                    // until it succeeds.
+                    if (w.VirtualAlloc(
+                        @intToPtr(*c_void, aligned_addr),
+                        n,
+                        w.MEM_COMMIT | w.MEM_RESERVE,
+                        w.PAGE_READWRITE,
+                    )) |ptr| break ptr;
+                } else unreachable; // TODO else unreachable should not be necessary
+
+                return @ptrCast([*]u8, final_addr)[0..n];
            },
            else => @compileError("Unsupported OS"),
        }
@ -121,13 +146,31 @@ pub const DirectAllocator = struct {
                }
                return old_mem[0..new_size];
            },
-            Os.windows => return realloc(allocator, old_mem, old_align, new_size, new_align) catch {
-                const old_adjusted_addr = @ptrToInt(old_mem.ptr);
-                const old_record_addr = old_adjusted_addr + old_mem.len;
-                const root_addr = @intToPtr(*align(1) usize, old_record_addr).*;
-                const old_ptr = @intToPtr(*c_void, root_addr);
-                const new_record_addr = old_record_addr - new_size + old_mem.len;
-                @intToPtr(*align(1) usize, new_record_addr).* = root_addr;
+            .windows => {
+                const w = os.windows;
+                if (new_size == 0) {
+                    // From the docs:
+                    // "If the dwFreeType parameter is MEM_RELEASE, this parameter
+                    // must be 0 (zero). The function frees the entire region that
+                    // is reserved in the initial allocation call to VirtualAlloc."
+                    // So we can only use MEM_RELEASE when actually releasing the
+                    // whole allocation.
+                    if (w.VirtualFree(old_mem.ptr, 0, w.MEM_RELEASE) == 0) unreachable;
+                } else {
+                    const base_addr = @ptrToInt(old_mem.ptr);
+                    const old_addr_end = base_addr + old_mem.len;
+                    const new_addr_end = base_addr + new_size;
+                    const new_addr_end_rounded = mem.alignForward(new_addr_end, os.page_size);
+                    if (old_addr_end > new_addr_end_rounded) {
+                        // For shrinking that is not releasing, we will only
+                        // decommit the pages not needed anymore.
+                        if (w.VirtualFree(
+                            @intToPtr(*c_void, new_addr_end_rounded),
+                            old_addr_end - new_addr_end_rounded,
+                            w.MEM_DECOMMIT,
+                        ) == 0) unreachable;
+                    }
+                }
                return old_mem[0..new_size];
            },
            else => @compileError("Unsupported OS"),
@ -147,49 +190,164 @@ pub const DirectAllocator = struct {
                }
                return result;
            },
-            Os.windows => {
-                if (old_mem.len == 0) return alloc(allocator, new_size, new_align);
-
-                const self = @fieldParentPtr(DirectAllocator, "allocator", allocator);
-                const old_adjusted_addr = @ptrToInt(old_mem.ptr);
-                const old_record_addr = old_adjusted_addr + old_mem.len;
-                const root_addr = @intToPtr(*align(1) usize, old_record_addr).*;
-                const old_ptr = @intToPtr(*c_void, root_addr);
-
-                if (new_size == 0) {
-                    if (os.windows.HeapFree(self.heap_handle.?, 0, old_ptr) == 0) unreachable;
-                    return old_mem[0..0];
+            .windows => {
+                if (old_mem.len == 0) {
+                    return alloc(allocator, new_size, new_align);
                }

-                const amt = new_size + new_align + @sizeOf(usize);
-                const new_ptr = os.windows.HeapReAlloc(
-                    self.heap_handle.?,
-                    0,
-                    old_ptr,
-                    amt,
-                ) orelse return error.OutOfMemory;
-                const offset = old_adjusted_addr - root_addr;
-                const new_root_addr = @ptrToInt(new_ptr);
-                var new_adjusted_addr = new_root_addr + offset;
-                const offset_is_valid = new_adjusted_addr + new_size + @sizeOf(usize) <= new_root_addr + amt;
-                const offset_is_aligned = new_adjusted_addr % new_align == 0;
-                if (!offset_is_valid or !offset_is_aligned) {
-                    // If HeapReAlloc didn't happen to move the memory to the new alignment,
-                    // or the memory starting at the old offset would be outside of the new allocation,
-                    // then we need to copy the memory to a valid aligned address and use that
-                    const new_aligned_addr = mem.alignForward(new_root_addr, new_align);
-                    @memcpy(@intToPtr([*]u8, new_aligned_addr), @intToPtr([*]u8, new_adjusted_addr), std.math.min(old_mem.len, new_size));
-                    new_adjusted_addr = new_aligned_addr;
+                if (new_size <= old_mem.len and new_align <= old_align) {
+                    return shrink(allocator, old_mem, old_align, new_size, new_align);
                }
-                const new_record_addr = new_adjusted_addr + new_size;
-                @intToPtr(*align(1) usize, new_record_addr).* = new_root_addr;
-                return @intToPtr([*]u8, new_adjusted_addr)[0..new_size];
+
+                const w = os.windows;
+                const base_addr = @ptrToInt(old_mem.ptr);
+
+                if (new_align > old_align and base_addr & (new_align - 1) != 0) {
+                    // Current allocation doesn't satisfy the new alignment.
+                    // For now we'll do a new one no matter what, but maybe
+                    // there is something smarter to do instead.
+                    const result = try alloc(allocator, new_size, new_align);
+                    assert(old_mem.len != 0);
+                    @memcpy(result.ptr, old_mem.ptr, std.math.min(old_mem.len, result.len));
+                    if (w.VirtualFree(old_mem.ptr, 0, w.MEM_RELEASE) == 0) unreachable;
+
+                    return result;
+                }
+
+                const old_addr_end = base_addr + old_mem.len;
+                const old_addr_end_rounded = mem.alignForward(old_addr_end, os.page_size);
+                const new_addr_end = base_addr + new_size;
+                const new_addr_end_rounded = mem.alignForward(new_addr_end, os.page_size);
+                if (new_addr_end_rounded == old_addr_end_rounded) {
+                    // The reallocation fits in the already allocated pages.
+                    return @ptrCast([*]u8, old_mem.ptr)[0..new_size];
+                }
+                assert(new_addr_end_rounded > old_addr_end_rounded);
+
+                // We need to commit new pages.
+                const additional_size = new_addr_end - old_addr_end_rounded;
+                const realloc_addr = w.VirtualAlloc(
+                    @intToPtr(*c_void, old_addr_end_rounded),
+                    additional_size,
+                    w.MEM_COMMIT | w.MEM_RESERVE,
+                    w.PAGE_READWRITE,
+                ) orelse {
+                    // Committing new pages at the end of the existing allocation
+                    // failed, we need to try a new one.
+                    const new_alloc_mem = try alloc(allocator, new_size, new_align);
+                    @memcpy(new_alloc_mem.ptr, old_mem.ptr, old_mem.len);
+                    if (w.VirtualFree(old_mem.ptr, 0, w.MEM_RELEASE) == 0) unreachable;
+
+                    return new_alloc_mem;
+                };
+
+                assert(@ptrToInt(realloc_addr) == old_addr_end_rounded);
+                return @ptrCast([*]u8, old_mem.ptr)[0..new_size];
            },
            else => @compileError("Unsupported OS"),
        }
    }
 };

+pub const HeapAllocator = switch (builtin.os) {
+    .windows => struct {
+        allocator: Allocator,
+        heap_handle: ?HeapHandle,
+
+        const HeapHandle = os.windows.HANDLE;
+
+        pub fn init() HeapAllocator {
+            return HeapAllocator{
+                .allocator = Allocator{
+                    .reallocFn = realloc,
+                    .shrinkFn = shrink,
+                },
+                .heap_handle = null,
+            };
+        }
+
+        pub fn deinit(self: *HeapAllocator) void {
+            if (self.heap_handle) |heap_handle| {
+                _ = os.windows.HeapDestroy(heap_handle);
+            }
+        }
+
+        fn alloc(allocator: *Allocator, n: usize, alignment: u29) error{OutOfMemory}![]u8 {
+            const self = @fieldParentPtr(HeapAllocator, "allocator", allocator);
+            if (n == 0)
+                return (([*]u8)(undefined))[0..0];
+
+            const amt = n + alignment + @sizeOf(usize);
+            const optional_heap_handle = @atomicLoad(?HeapHandle, &self.heap_handle, builtin.AtomicOrder.SeqCst);
+            const heap_handle = optional_heap_handle orelse blk: {
+                const options = if (builtin.single_threaded) os.windows.HEAP_NO_SERIALIZE else 0;
+                const hh = os.windows.HeapCreate(options, amt, 0) orelse return error.OutOfMemory;
+                const other_hh = @cmpxchgStrong(?HeapHandle, &self.heap_handle, null, hh, builtin.AtomicOrder.SeqCst, builtin.AtomicOrder.SeqCst) orelse break :blk hh;
+                _ = os.windows.HeapDestroy(hh);
+                break :blk other_hh.?; // can't be null because of the cmpxchg
+            };
+            const ptr = os.windows.HeapAlloc(heap_handle, 0, amt) orelse return error.OutOfMemory;
+            const root_addr = @ptrToInt(ptr);
+            const adjusted_addr = mem.alignForward(root_addr, alignment);
+            const record_addr = adjusted_addr + n;
+            @intToPtr(*align(1) usize, record_addr).* = root_addr;
+            return @intToPtr([*]u8, adjusted_addr)[0..n];
+        }
+
+        fn shrink(allocator: *Allocator, old_mem: []u8, old_align: u29, new_size: usize, new_align: u29) []u8 {
+            return realloc(allocator, old_mem, old_align, new_size, new_align) catch {
+                const old_adjusted_addr = @ptrToInt(old_mem.ptr);
+                const old_record_addr = old_adjusted_addr + old_mem.len;
+                const root_addr = @intToPtr(*align(1) usize, old_record_addr).*;
+                const old_ptr = @intToPtr(*c_void, root_addr);
+                const new_record_addr = old_record_addr - new_size + old_mem.len;
+                @intToPtr(*align(1) usize, new_record_addr).* = root_addr;
+                return old_mem[0..new_size];
+            };
+        }
+
+        fn realloc(allocator: *Allocator, old_mem: []u8, old_align: u29, new_size: usize, new_align: u29) ![]u8 {
+            if (old_mem.len == 0) return alloc(allocator, new_size, new_align);
+
+            const self = @fieldParentPtr(HeapAllocator, "allocator", allocator);
+            const old_adjusted_addr = @ptrToInt(old_mem.ptr);
+            const old_record_addr = old_adjusted_addr + old_mem.len;
+            const root_addr = @intToPtr(*align(1) usize, old_record_addr).*;
+            const old_ptr = @intToPtr(*c_void, root_addr);
+
+            if (new_size == 0) {
+                if (os.windows.HeapFree(self.heap_handle.?, 0, old_ptr) == 0) unreachable;
+                return old_mem[0..0];
+            }
+
+            const amt = new_size + new_align + @sizeOf(usize);
+            const new_ptr = os.windows.HeapReAlloc(
+                self.heap_handle.?,
+                0,
+                old_ptr,
+                amt,
+            ) orelse return error.OutOfMemory;
+            const offset = old_adjusted_addr - root_addr;
+            const new_root_addr = @ptrToInt(new_ptr);
+            var new_adjusted_addr = new_root_addr + offset;
+            const offset_is_valid = new_adjusted_addr + new_size + @sizeOf(usize) <= new_root_addr + amt;
+            const offset_is_aligned = new_adjusted_addr % new_align == 0;
+            if (!offset_is_valid or !offset_is_aligned) {
+                // If HeapReAlloc didn't happen to move the memory to the new alignment,
+                // or the memory starting at the old offset would be outside of the new allocation,
+                // then we need to copy the memory to a valid aligned address and use that
+                const new_aligned_addr = mem.alignForward(new_root_addr, new_align);
+                @memcpy(@intToPtr([*]u8, new_aligned_addr), @intToPtr([*]u8, new_adjusted_addr), std.math.min(old_mem.len, new_size));
+                new_adjusted_addr = new_aligned_addr;
+            }
+            const new_record_addr = new_adjusted_addr + new_size;
+            @intToPtr(*align(1) usize, new_record_addr).* = new_root_addr;
+            return @intToPtr([*]u8, new_adjusted_addr)[0..new_size];
+        }
+    },
+    else => @compileError("Unsupported OS"),
+};
+
 /// This allocator takes an existing allocator, wraps it, and provides an interface
 /// where you can allocate without freeing, and then free it all together.
 pub const ArenaAllocator = struct {
@ -588,6 +746,30 @@ test "DirectAllocator" {
    try testAllocatorAligned(allocator, 16);
    try testAllocatorLargeAlignment(allocator);
    try testAllocatorAlignedShrink(allocator);
+
+    if (builtin.os == .windows) {
+        // Trying really large alignment. As mentionned in the implementation,
+        // VirtualAlloc returns 64K aligned addresses. We want to make sure
+        // DirectAllocator works beyond that, as it's not tested by
+        // `testAllocatorLargeAlignment`.
+        const slice = try allocator.alignedAlloc(u8, 1 << 20, 128);
+        slice[0] = 0x12;
+        slice[127] = 0x34;
+        allocator.free(slice);
+    }
+}
+
+test "HeapAllocator" {
+    if (builtin.os == .windows) {
+        var heap_allocator = HeapAllocator.init();
+        defer heap_allocator.deinit();
+
+        const allocator = &heap_allocator.allocator;
+        try testAllocator(allocator);
+        try testAllocatorAligned(allocator, 16);
+        try testAllocatorLargeAlignment(allocator);
+        try testAllocatorAlignedShrink(allocator);
+    }
 }

 test "ArenaAllocator" {
@ -603,7 +785,7 @@ test "ArenaAllocator" {
    try testAllocatorAlignedShrink(&arena_allocator.allocator);
 }

-var test_fixed_buffer_allocator_memory: [40000 * @sizeOf(u64)]u8 = undefined;
+var test_fixed_buffer_allocator_memory: [80000 * @sizeOf(u64)]u8 = undefined;
 test "FixedBufferAllocator" {
    var fixed_buffer_allocator = FixedBufferAllocator.init(test_fixed_buffer_allocator_memory[0..]);

@ -741,7 +923,11 @@ fn testAllocatorAlignedShrink(allocator: *mem.Allocator) mem.Allocator.Error!voi
    defer allocator.free(slice);

    var stuff_to_free = std.ArrayList([]align(16) u8).init(debug_allocator);
-    while (@ptrToInt(slice.ptr) == mem.alignForward(@ptrToInt(slice.ptr), os.page_size * 2)) {
+    // On Windows, VirtualAlloc returns addresses aligned to a 64K boundary,
+    // which is 16 pages, hence the 32. This test may require to increase
+    // the size of the allocations feeding the `allocator` parameter if they
+    // fail, because of this high over-alignment we want to have.
+    while (@ptrToInt(slice.ptr) == mem.alignForward(@ptrToInt(slice.ptr), os.page_size * 32)) {
        try stuff_to_free.append(slice);
        slice = try allocator.alignedAlloc(u8, 16, alloc_size);
    }
@ -752,7 +938,7 @@ fn testAllocatorAlignedShrink(allocator: *mem.Allocator) mem.Allocator.Error!voi
    slice[60] = 0x34;

    // realloc to a smaller size but with a larger alignment
-    slice = try allocator.alignedRealloc(slice, os.page_size * 2, alloc_size / 2);
+    slice = try allocator.alignedRealloc(slice, os.page_size * 32, alloc_size / 2);
    testing.expect(slice[0] == 0x12);
    testing.expect(slice[60] == 0x34);
 }
--- a/std/os/windows.zig
+++ b/std/os/windows.zig
@ -239,6 +239,37 @@ pub const HEAP_CREATE_ENABLE_EXECUTE = 0x00040000;
 pub const HEAP_GENERATE_EXCEPTIONS = 0x00000004;
 pub const HEAP_NO_SERIALIZE = 0x00000001;

+// AllocationType values
+pub const MEM_COMMIT = 0x1000;
+pub const MEM_RESERVE = 0x2000;
+pub const MEM_RESET = 0x80000;
+pub const MEM_RESET_UNDO = 0x1000000;
+pub const MEM_LARGE_PAGES = 0x20000000;
+pub const MEM_PHYSICAL = 0x400000;
+pub const MEM_TOP_DOWN = 0x100000;
+pub const MEM_WRITE_WATCH = 0x200000;
+
+// Protect values
+pub const PAGE_EXECUTE = 0x10;
+pub const PAGE_EXECUTE_READ = 0x20;
+pub const PAGE_EXECUTE_READWRITE = 0x40;
+pub const PAGE_EXECUTE_WRITECOPY = 0x80;
+pub const PAGE_NOACCESS = 0x01;
+pub const PAGE_READONLY = 0x02;
+pub const PAGE_READWRITE = 0x04;
+pub const PAGE_WRITECOPY = 0x08;
+pub const PAGE_TARGETS_INVALID = 0x40000000;
+pub const PAGE_TARGETS_NO_UPDATE = 0x40000000; // Same as PAGE_TARGETS_INVALID
+pub const PAGE_GUARD = 0x100;
+pub const PAGE_NOCACHE = 0x200;
+pub const PAGE_WRITECOMBINE = 0x400;
+
+// FreeType values
+pub const MEM_COALESCE_PLACEHOLDERS = 0x1;
+pub const MEM_RESERVE_PLACEHOLDERS = 0x2;
+pub const MEM_DECOMMIT = 0x4000;
+pub const MEM_RELEASE = 0x8000;
+
 pub const PTHREAD_START_ROUTINE = extern fn (LPVOID) DWORD;
 pub const LPTHREAD_START_ROUTINE = PTHREAD_START_ROUTINE;

--- a/std/os/windows/kernel32.zig
+++ b/std/os/windows/kernel32.zig
@ -116,6 +116,9 @@ pub extern "kernel32" stdcallcc fn HeapFree(hHeap: HANDLE, dwFlags: DWORD, lpMem

 pub extern "kernel32" stdcallcc fn HeapValidate(hHeap: HANDLE, dwFlags: DWORD, lpMem: ?*const c_void) BOOL;

+pub extern "kernel32" stdcallcc fn VirtualAlloc(lpAddress: ?LPVOID, dwSize: SIZE_T, flAllocationType: DWORD, flProtect: DWORD) ?LPVOID;
+pub extern "kernel32" stdcallcc fn VirtualFree(lpAddress: ?LPVOID, dwSize: SIZE_T, dwFreeType: DWORD) BOOL;
+
 pub extern "kernel32" stdcallcc fn MoveFileExW(
    lpExistingFileName: [*]const u16,
    lpNewFileName: [*]const u16,