From c24e5b5c8aff88a116ae543e06cf44a07e44949e Mon Sep 17 00:00:00 2001 From: Stephen Dolan Date: Sat, 4 May 2019 09:01:23 +0100 Subject: [PATCH] Ensure that Gc.minor_words remains accurate after a GC (#8619) If an allocation fails, the decrement of young_ptr should be undone before the GC is entered. This happened correctly on bytecode but not on native code. This commit (squash of pull request #8619) fixes it for all the platforms supported by ocamlopt. amd64: add alternate entry points caml_call_gc{1,2,3} for code size optimisation. powerpc: introduce one GC call point per allocation size per function. Each call point corrects the allocation pointer r31 before calling caml_call_gc. i386, arm, arm64, s390x: update the allocation pointer after the conditional branch to the GC, not before. arm64: simplify the code generator: Ialloc can assume that less than 0x1_0000 bytes are allocated, since the max allocation size for the minor heap is less than that. This is a partial cherry-pick of commit 8ceec on multicore. --- Changes | 4 + asmcomp/amd64/emit.mlp | 17 +++- asmcomp/arm/emit.mlp | 17 ++-- asmcomp/arm64/emit.mlp | 23 ++--- asmcomp/i386/emit.mlp | 2 +- asmcomp/power/emit.mlp | 87 +++++++++++++------ asmcomp/s390x/emit.mlp | 9 +- runtime/amd64.S | 25 ++++++ runtime/amd64nt.asm | 24 +++++ runtime/arm.S | 12 ++- runtime/arm64.S | 12 ++- runtime/i386.S | 7 +- runtime/i386nt.asm | 7 +- stdlib/gc.mli | 4 +- testsuite/tests/regression/pr7798/ocamltests | 1 + testsuite/tests/regression/pr7798/pr7798.ml | 57 ++++++++++++ .../tests/regression/pr7798/pr7798.reference | 1 + 17 files changed, 240 insertions(+), 69 deletions(-) create mode 100644 testsuite/tests/regression/pr7798/ocamltests create mode 100644 testsuite/tests/regression/pr7798/pr7798.ml create mode 100644 testsuite/tests/regression/pr7798/pr7798.reference diff --git a/Changes b/Changes index 12bf0ecfc..1cc1bce07 100644 --- a/Changes +++ b/Changes @@ -156,6 +156,10 @@ Working version - #8607: Remove obsolete macros for pre-2002 MSVC support (Stephen Dolan, review by Nicolás Ojeda Bär and David Allsopp) +- #8619: Ensure Gc.minor_words remains accurate after a GC. + (Stephen Dolan, Xavier Leroy and David Allsopp, + review by Xavier Leroy and Gabriel Scherer) + ### Standard library: - #2262: take precision (.) and flags ('+' and ' ') into account diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp index d16c51790..1a6498a66 100644 --- a/asmcomp/amd64/emit.mlp +++ b/asmcomp/amd64/emit.mlp @@ -273,7 +273,8 @@ let spacetime_before_uninstrumented_call ~node_ptr ~index = (* Record calls to the GC -- we've moved them out of the way *) type gc_call = - { gc_lbl: label; (* Entry label *) + { gc_size: int; (* Allocation size, in bytes *) + gc_lbl: label; (* Entry label *) gc_return_lbl: label; (* Where to branch after GC *) gc_frame: label; (* Label of frame descriptor *) gc_spacetime : (X86_ast.arg * int) option; @@ -290,7 +291,13 @@ let emit_call_gc gc = assert Config.spacetime; spacetime_before_uninstrumented_call ~node_ptr ~index end; - emit_call "caml_call_gc"; + begin match gc.gc_size with + | 16 -> emit_call "caml_call_gc1" + | 24 -> emit_call "caml_call_gc2" + | 32 -> emit_call "caml_call_gc3" + | n -> I.add (int n) r15; + emit_call "caml_call_gc" + end; def_label gc.gc_frame; I.jmp (label gc.gc_return_lbl) @@ -660,7 +667,8 @@ let emit_instr fallthrough i = else Some (arg i 0, spacetime_index) in call_gc_sites := - { gc_lbl = lbl_call_gc; + { gc_size = n; + gc_lbl = lbl_call_gc; gc_return_lbl = lbl_redo; gc_frame = lbl_frame; gc_spacetime; } :: !call_gc_sites @@ -969,6 +977,9 @@ let begin_assembly() = D.extrn "caml_young_limit" QWORD; D.extrn "caml_exception_pointer" QWORD; D.extrn "caml_call_gc" NEAR; + D.extrn "caml_call_gc1" NEAR; + D.extrn "caml_call_gc2" NEAR; + D.extrn "caml_call_gc3" NEAR; D.extrn "caml_c_call" NEAR; D.extrn "caml_allocN" NEAR; D.extrn "caml_alloc1" NEAR; diff --git a/asmcomp/arm/emit.mlp b/asmcomp/arm/emit.mlp index 00d01748f..2d9802c11 100644 --- a/asmcomp/arm/emit.mlp +++ b/asmcomp/arm/emit.mlp @@ -633,14 +633,17 @@ let emit_instr i = if !fastcode_flag then begin let lbl_redo = new_label() in `{emit_label lbl_redo}:`; - let ninstr = decompose_intconst - (Int32.of_int n) - (fun i -> - ` sub alloc_ptr, alloc_ptr, #{emit_int32 i}\n`) in - ` cmp alloc_ptr, alloc_limit\n`; - ` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`; + let first = ref true in + let ninstr = + decompose_intconst (Int32.of_int (n - 4)) (fun a -> + if !first + then ` sub {emit_reg i.res.(0)}, alloc_ptr, #{emit_int32 a}\n` + else ` sub {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, #{emit_int32 a}\n`; + first := false) in + ` cmp {emit_reg i.res.(0)}, alloc_limit\n`; let lbl_call_gc = new_label() in - ` bcc {emit_label lbl_call_gc}\n`; + ` bls {emit_label lbl_call_gc}\n`; + ` sub alloc_ptr, {emit_reg i.res.(0)}, #4\n`; call_gc_sites := { gc_lbl = lbl_call_gc; gc_return_lbl = lbl_redo; diff --git a/asmcomp/arm64/emit.mlp b/asmcomp/arm64/emit.mlp index a00cbced8..cc575e083 100644 --- a/asmcomp/arm64/emit.mlp +++ b/asmcomp/arm64/emit.mlp @@ -527,23 +527,26 @@ let assembly_code_for_allocation ?label_after_call_gc i ~n ~far = if !fastcode_flag then begin let lbl_redo = new_label() in let lbl_call_gc = new_label() in - assert (n < 0x1_000_000); - let nl = n land 0xFFF and nh = n land 0xFFF_000 in + (* n is at most Max_young_whsize * 8, i.e. currently 0x808, + so it is reasonable to assume n < 0x1_000. This makes + the generated code simpler. *) + assert (16 <= n && n < 0x1_000 && n land 0x7 = 0); + (* Instead of checking whether young_ptr - n < young_limit, we check whether + young_ptr - (n - 8) <= young_limit. It's equivalent, but this way around + we can avoid mutating young_ptr on failed allocations, by doing the + calculations in i.res.(0) instead of young_ptr. *) `{emit_label lbl_redo}:`; - if nh <> 0 then - ` sub {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, #{emit_int nh}\n`; - if nl <> 0 then - ` sub {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, #{emit_int nl}\n`; - ` cmp {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_limit}\n`; - ` add {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, #8\n`; + ` sub {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, #{emit_int (n - 8)}\n`; + ` cmp {emit_reg i.res.(0)}, {emit_reg reg_alloc_limit}\n`; if not far then begin - ` b.lo {emit_label lbl_call_gc}\n` + ` b.ls {emit_label lbl_call_gc}\n` end else begin let lbl = new_label () in - ` b.cs {emit_label lbl}\n`; + ` b.hi {emit_label lbl}\n`; ` b {emit_label lbl_call_gc}\n`; `{emit_label lbl}:\n` end; + ` sub {emit_reg reg_alloc_ptr}, {emit_reg i.res.(0)}, #8\n`; call_gc_sites := { gc_lbl = lbl_call_gc; gc_return_lbl = lbl_redo; diff --git a/asmcomp/i386/emit.mlp b/asmcomp/i386/emit.mlp index 9f55cd293..926215134 100644 --- a/asmcomp/i386/emit.mlp +++ b/asmcomp/i386/emit.mlp @@ -600,11 +600,11 @@ let emit_instr fallthrough i = def_label lbl_redo; I.mov (sym32 "caml_young_ptr") eax; I.sub (int n) eax; - I.mov eax (sym32 "caml_young_ptr"); I.cmp (sym32 "caml_young_limit") eax; let lbl_call_gc = new_label() in let lbl_frame = record_frame_label i.live false Debuginfo.none in I.jb (label lbl_call_gc); + I.mov eax (sym32 "caml_young_ptr"); I.lea (mem32 NONE 4 RAX) (reg i.res.(0)); call_gc_sites := { gc_lbl = lbl_call_gc; diff --git a/asmcomp/power/emit.mlp b/asmcomp/power/emit.mlp index 558d1a1e8..7bd96e551 100644 --- a/asmcomp/power/emit.mlp +++ b/asmcomp/power/emit.mlp @@ -16,7 +16,6 @@ (* Emission of PowerPC assembly code *) -open Misc open Cmm open Arch open Proc @@ -124,7 +123,7 @@ let emit_gpr = emit_int let emit_reg r = match r.loc with | Reg r -> emit_string (register_name r) - | _ -> fatal_error "Emit.emit_reg" + | _ -> Misc.fatal_error "Emit.emit_reg" (* Output a stack reference *) @@ -132,7 +131,7 @@ let emit_stack r = match r.loc with | Stack s -> let ofs = slot_offset s (register_class r) in `{emit_int ofs}(1)` - | _ -> fatal_error "Emit.emit_stack" + | _ -> Misc.fatal_error "Emit.emit_stack" (* Output the name of a symbol plus an optional offset *) @@ -393,8 +392,38 @@ let name_for_specific = function let function_name = ref "" (* Entry point for tail recursive calls *) let tailrec_entry_point = ref 0 -(* Label of glue code for calling the GC *) -let call_gc_label = ref 0 + +module IntSet = Stdlib.Set.Make(Stdlib.Int) +module IntMap = Stdlib.Map.Make(Stdlib.Int) + +(* Labels of glue code for calling the GC. + There is one label per size allocated. *) +let call_gc_labels : label IntMap.t ref = ref IntMap.empty + (* size -> label *) + +(* Return the label of the call GC point for the given size *) + +let label_for_call_gc ?label_after_call_gc sz = + match IntMap.find_opt sz !call_gc_labels with + | Some lbl -> lbl + | None -> + let lbl = + match label_after_call_gc with Some l -> l | None -> new_label() in + call_gc_labels := IntMap.add sz lbl !call_gc_labels; + lbl + +(* Number of call GC points *) + +let num_call_gc instr = + let rec loop i cg = + match i.desc with + | Lend -> IntSet.cardinal cg + | Lop (Ialloc {bytes = sz}) -> loop i.next (IntSet.add sz cg) + (* The following should never be seen, since this function is run + before branch relaxation. *) + | Lop (Ispecific (Ialloc_far _)) -> assert false + | _ -> loop i.next cg + in loop instr IntSet.empty (* Relaxation of branches that exceed the span of a relative branch. *) @@ -560,7 +589,7 @@ let emit_instr i = | {loc = Stack _; typ = Float}, {loc = Reg _} -> ` lfd {emit_reg dst}, {emit_stack src}\n` | (_, _) -> - fatal_error "Emit: Imove" + Misc.fatal_error "Emit: Imove" end | Lop(Iconst_int n) -> if is_native_immediate n then @@ -751,28 +780,20 @@ let emit_instr i = | Double | Double_u -> "stfd" in emit_load_store storeinstr addr i.arg 1 i.arg.(0) | Lop(Ialloc { bytes = n; label_after_call_gc; }) -> - if !call_gc_label = 0 then begin - match label_after_call_gc with - | None -> call_gc_label := new_label () - | Some label -> call_gc_label := label - end; + let call_gc_lbl = label_for_call_gc ?label_after_call_gc n in ` addi 31, 31, {emit_int(-n)}\n`; ` {emit_string cmplg} 31, 30\n`; ` addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n`; - ` bltl {emit_label !call_gc_label}\n`; + ` bltl {emit_label call_gc_lbl}\n`; (* Exactly 4 instructions after the beginning of the alloc sequence *) record_frame i.live false Debuginfo.none | Lop(Ispecific(Ialloc_far { bytes = n; label_after_call_gc; })) -> - if !call_gc_label = 0 then begin - match label_after_call_gc with - | None -> call_gc_label := new_label () - | Some label -> call_gc_label := label - end; + let call_gc_lbl = label_for_call_gc ?label_after_call_gc n in let lbl = new_label() in ` addi 31, 31, {emit_int(-n)}\n`; ` {emit_string cmplg} 31, 30\n`; ` bge {emit_label lbl}\n`; - ` bl {emit_label !call_gc_label}\n`; + ` bl {emit_label call_gc_lbl}\n`; (* Exactly 4 instructions after the beginning of the alloc sequence *) record_frame i.live false Debuginfo.none; `{emit_label lbl}: addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n` @@ -1009,7 +1030,7 @@ let fundecl fundecl = function_name := fundecl.fun_name; tailrec_entry_point := fundecl.fun_tailrec_entry_point_label; stack_offset := 0; - call_gc_label := 0; + call_gc_labels := IntMap.empty; float_literals := []; jumptables := []; jumptables_lbl := -1; begin match abi with @@ -1041,14 +1062,30 @@ let fundecl fundecl = end; emit_debug_info fundecl.fun_dbg; cfi_startproc(); - (* On this target, there is at most one "out of line" code block per - function: a single "call GC" point. It comes immediately after the - function's body. *) - BR.relax fundecl.fun_body ~max_out_of_line_code_offset:0; + let num_call_gc = num_call_gc fundecl.fun_body in + let max_out_of_line_code_offset = max (num_call_gc - 1) 0 in + BR.relax fundecl.fun_body ~max_out_of_line_code_offset; emit_all fundecl.fun_body; (* Emit the glue code to call the GC *) - if !call_gc_label > 0 then begin - `{emit_label !call_gc_label}:\n`; + assert (IntMap.cardinal !call_gc_labels = num_call_gc); + if num_call_gc > 0 then begin + (* Replace sizes by deltas with next size *) + let rec delta_encode = function + | (sz1, lbl1) :: ((sz2, _) :: _ as l) -> + (sz1 - sz2, lbl1) :: delta_encode l + | ([] | [(_,_)]) as l -> l in + (* Enumerate the GC call points by decreasing size. This is not + necessary for correctness, but it is nice for two reasons: + 1- all deltas are positive, making the generated code + easier to read, and + 2- smaller allocation sizes, which are more frequent, execute + fewer instructions before calling the GC. *) + let delta_lbl_list = + delta_encode (List.rev (IntMap.bindings !call_gc_labels)) in + List.iter + (fun (delta, lbl) -> + `{emit_label lbl}: addi 31, 31, {emit_int delta}\n`) + delta_lbl_list; match abi with | ELF32 -> ` b {emit_symbol "caml_call_gc"}\n` diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp index 619b454fe..4cf31436f 100644 --- a/asmcomp/s390x/emit.mlp +++ b/asmcomp/s390x/emit.mlp @@ -429,10 +429,11 @@ let emit_instr i = gc_return_lbl = lbl_redo; gc_frame_lbl = lbl_frame } :: !call_gc_sites; `{emit_label lbl_redo}:`; - ` lay %r11, {emit_int(-n)}(%r11)\n`; - ` clgr %r11, %r10\n`; - ` brcl 4, {emit_label lbl_call_gc}\n`; (* less than *) - ` la {emit_reg i.res.(0)}, 8(%r11)\n` + ` lay {emit_reg i.res.(0)}, {emit_int(-n+8)}(%r11)\n`; + ` clgr {emit_reg i.res.(0)}, %r10\n`; + ` brcl 12, {emit_label lbl_call_gc}\n`; + (* less than or equal *) + ` lay %r11, -8({emit_reg i.res.(0)})\n` | Lop(Iintop Imulh) -> (* Hacker's Delight section 8.3: diff --git a/runtime/amd64.S b/runtime/amd64.S index 1a024283e..66cd00027 100644 --- a/runtime/amd64.S +++ b/runtime/amd64.S @@ -397,6 +397,7 @@ LBL(caml_alloc1): jb LBL(100) ret LBL(100): + addq $16, %r15 RECORD_STACK_FRAME(0) ENTER_FUNCTION /* subq $8, %rsp; CFI_ADJUST (8); */ @@ -414,6 +415,7 @@ LBL(caml_alloc2): jb LBL(101) ret LBL(101): + addq $24, %r15 RECORD_STACK_FRAME(0) ENTER_FUNCTION /* subq $8, %rsp; CFI_ADJUST (8); */ @@ -431,6 +433,7 @@ LBL(caml_alloc3): jb LBL(102) ret LBL(102): + addq $32, %r15 RECORD_STACK_FRAME(0) ENTER_FUNCTION /* subq $8, %rsp; CFI_ADJUST (8) */ @@ -450,6 +453,7 @@ LBL(caml_allocN): addq $8, %rsp; CFI_ADJUST (-8) /* drop desired size */ ret LBL(103): + addq 0(%rsp), %r15 CFI_ADJUST(8) RECORD_STACK_FRAME(8) #ifdef WITH_FRAME_POINTERS @@ -467,6 +471,27 @@ LBL(103): jmp LBL(caml_allocN) CFI_ENDPROC +/* Reset the allocation pointer and invoke the GC */ + +FUNCTION(G(caml_call_gc1)) +CFI_STARTPROC + addq $16, %r15 + jmp GCALL(caml_call_gc) +CFI_ENDPROC + +FUNCTION(G(caml_call_gc2)) +CFI_STARTPROC + addq $24, %r15 + jmp GCALL(caml_call_gc) +CFI_ENDPROC + +FUNCTION(G(caml_call_gc3)) +CFI_STARTPROC + addq $32, %r15 + jmp GCALL(caml_call_gc) +CFI_ENDPROC + + /* Call a C function from OCaml */ FUNCTION(G(caml_c_call)) diff --git a/runtime/amd64nt.asm b/runtime/amd64nt.asm index f7509ce16..e6b173771 100644 --- a/runtime/amd64nt.asm +++ b/runtime/amd64nt.asm @@ -150,6 +150,7 @@ caml_alloc1: jb L100 ret L100: + add r15, 16 mov rax, [rsp + 0] mov caml_last_return_address, rax lea rax, [rsp + 8] @@ -167,6 +168,7 @@ caml_alloc2: jb L101 ret L101: + add r15, 24 mov rax, [rsp + 0] mov caml_last_return_address, rax lea rax, [rsp + 8] @@ -184,6 +186,7 @@ caml_alloc3: jb L102 ret L102: + add r15, 32 mov rax, [rsp + 0] mov caml_last_return_address, rax lea rax, [rsp + 8] @@ -201,6 +204,7 @@ caml_allocN: jb L103 ret L103: + add r15, rax push rax ; save desired size mov rax, [rsp + 8] mov caml_last_return_address, rax @@ -210,6 +214,26 @@ L103: pop rax ; recover desired size jmp caml_allocN +; Reset the allocation pointer and invoke the GC + + PUBLIC caml_call_gc1 + ALIGN 16 +caml_call_gc1: + add r15, 16 + jmp caml_call_gc + + PUBLIC caml_call_gc2 + ALIGN 16 +caml_call_gc2: + add r15, 24 + jmp caml_call_gc + + PUBLIC caml_call_gc3 + ALIGN 16 +caml_call_gc3: + add r15, 32 + jmp caml_call_gc + ; Call a C function from OCaml PUBLIC caml_c_call diff --git a/runtime/arm.S b/runtime/arm.S index fd43b2141..2f2275cb8 100644 --- a/runtime/arm.S +++ b/runtime/arm.S @@ -163,7 +163,8 @@ caml_alloc1: cmp alloc_ptr, alloc_limit bcc 1f bx lr -1: /* Record return address */ +1: add alloc_ptr, alloc_ptr, 8 + /* Record return address */ ldr r7, =caml_last_return_address str lr, [r7] /* Call GC (preserves r7) */ @@ -185,7 +186,8 @@ caml_alloc2: cmp alloc_ptr, alloc_limit bcc 1f bx lr -1: /* Record return address */ +1: add alloc_ptr, alloc_ptr, 12 + /* Record return address */ ldr r7, =caml_last_return_address str lr, [r7] /* Call GC (preserves r7) */ @@ -208,7 +210,8 @@ caml_alloc3: cmp alloc_ptr, alloc_limit bcc 1f bx lr -1: /* Record return address */ +1: add alloc_ptr, alloc_ptr, 16 + /* Record return address */ ldr r7, =caml_last_return_address str lr, [r7] /* Call GC (preserves r7) */ @@ -230,7 +233,8 @@ caml_allocN: cmp alloc_ptr, alloc_limit bcc 1f bx lr -1: /* Record return address */ +1: add alloc_ptr, alloc_ptr, r7 + /* Record return address */ ldr r12, =caml_last_return_address str lr, [r12] /* Call GC (preserves r7) */ diff --git a/runtime/arm64.S b/runtime/arm64.S index f78572639..7c26ddad2 100644 --- a/runtime/arm64.S +++ b/runtime/arm64.S @@ -185,7 +185,8 @@ caml_alloc1: cmp ALLOC_PTR, ALLOC_LIMIT b.lo 2f ret -2: stp x29, x30, [sp, -16]! +2: add ALLOC_PTR, ALLOC_PTR, #16 + stp x29, x30, [sp, -16]! CFI_ADJUST(16) /* Record the lowest address of the caller's stack frame. This is the address immediately above the pair of words (x29 and x30) we just @@ -217,7 +218,8 @@ caml_alloc2: cmp ALLOC_PTR, ALLOC_LIMIT b.lo 2f ret -2: stp x29, x30, [sp, -16]! +2: add ALLOC_PTR, ALLOC_PTR, #24 + stp x29, x30, [sp, -16]! CFI_ADJUST(16) /* Record the lowest address of the caller's stack frame. See comment above. */ @@ -245,7 +247,8 @@ caml_alloc3: cmp ALLOC_PTR, ALLOC_LIMIT b.lo 2f ret -2: stp x29, x30, [sp, -16]! +2: add ALLOC_PTR, ALLOC_PTR, #32 + stp x29, x30, [sp, -16]! CFI_ADJUST(16) /* Record the lowest address of the caller's stack frame. See comment above. */ @@ -273,7 +276,8 @@ caml_allocN: cmp ALLOC_PTR, ALLOC_LIMIT b.lo 2f ret -2: stp x29, x30, [sp, -16]! +2: add ALLOC_PTR, ALLOC_PTR, ARG + stp x29, x30, [sp, -16]! CFI_ADJUST(16) /* Record the lowest address of the caller's stack frame. See comment above. */ diff --git a/runtime/i386.S b/runtime/i386.S index 314dfe43b..007b3a01e 100644 --- a/runtime/i386.S +++ b/runtime/i386.S @@ -113,9 +113,9 @@ FUNCTION(caml_alloc1) CFI_STARTPROC movl G(caml_young_ptr), %eax subl $8, %eax - movl %eax, G(caml_young_ptr) cmpl G(caml_young_limit), %eax jb LBL(100) + movl %eax, G(caml_young_ptr) ret LBL(100): movl 0(%esp), %eax @@ -132,9 +132,9 @@ FUNCTION(caml_alloc2) CFI_STARTPROC movl G(caml_young_ptr), %eax subl $12, %eax - movl %eax, G(caml_young_ptr) cmpl G(caml_young_limit), %eax jb LBL(101) + movl %eax, G(caml_young_ptr) ret LBL(101): movl 0(%esp), %eax @@ -151,9 +151,9 @@ FUNCTION(caml_alloc3) CFI_STARTPROC movl G(caml_young_ptr), %eax subl $16, %eax - movl %eax, G(caml_young_ptr) cmpl G(caml_young_limit), %eax jb LBL(102) + movl %eax, G(caml_young_ptr) ret LBL(102): movl 0(%esp), %eax @@ -178,7 +178,6 @@ LBL(103): subl G(caml_young_ptr), %eax /* eax = - size */ negl %eax /* eax = size */ pushl %eax; CFI_ADJUST(4) /* save desired size */ - subl %eax, G(caml_young_ptr) /* must update young_ptr */ movl 4(%esp), %eax movl %eax, G(caml_last_return_address) leal 8(%esp), %eax diff --git a/runtime/i386nt.asm b/runtime/i386nt.asm index b67306769..b8d5cc4bc 100644 --- a/runtime/i386nt.asm +++ b/runtime/i386nt.asm @@ -74,9 +74,9 @@ L105: push ebp _caml_alloc1: mov eax, _caml_young_ptr sub eax, 8 - mov _caml_young_ptr, eax cmp eax, _caml_young_limit jb L100 + mov _caml_young_ptr, eax ret L100: mov eax, [esp] mov _caml_last_return_address, eax @@ -89,9 +89,9 @@ L100: mov eax, [esp] _caml_alloc2: mov eax, _caml_young_ptr sub eax, 12 - mov _caml_young_ptr, eax cmp eax, _caml_young_limit jb L101 + mov _caml_young_ptr, eax ret L101: mov eax, [esp] mov _caml_last_return_address, eax @@ -104,9 +104,9 @@ L101: mov eax, [esp] _caml_alloc3: mov eax, _caml_young_ptr sub eax, 16 - mov _caml_young_ptr, eax cmp eax, _caml_young_limit jb L102 + mov _caml_young_ptr, eax ret L102: mov eax, [esp] mov _caml_last_return_address, eax @@ -126,7 +126,6 @@ _caml_allocN: L103: sub eax, _caml_young_ptr ; eax = - size neg eax ; eax = size push eax ; save desired size - sub _caml_young_ptr, eax ; must update young_ptr mov eax, [esp+4] mov _caml_last_return_address, eax lea eax, [esp+8] diff --git a/stdlib/gc.mli b/stdlib/gc.mli index 47e7dedf4..813a4ada0 100644 --- a/stdlib/gc.mli +++ b/stdlib/gc.mli @@ -18,9 +18,7 @@ type stat = { minor_words : float; (** Number of words allocated in the minor heap since - the program was started. This number is accurate in - byte-code programs, but only an approximation in programs - compiled to native code. *) + the program was started. *) promoted_words : float; (** Number of words allocated in the minor heap that diff --git a/testsuite/tests/regression/pr7798/ocamltests b/testsuite/tests/regression/pr7798/ocamltests new file mode 100644 index 000000000..86ca3eab1 --- /dev/null +++ b/testsuite/tests/regression/pr7798/ocamltests @@ -0,0 +1 @@ +pr7798.ml diff --git a/testsuite/tests/regression/pr7798/pr7798.ml b/testsuite/tests/regression/pr7798/pr7798.ml new file mode 100644 index 000000000..a91b4dc2f --- /dev/null +++ b/testsuite/tests/regression/pr7798/pr7798.ml @@ -0,0 +1,57 @@ +(* TEST + * bytecode + * native + * native + ocamlopt_flags = "-compact" +*) + +type mut2 = { mutable p: int; mutable q:int } +type mut3 = { mutable s: int; mutable t:int; mutable u:int } + +type mut_record = + { mutable a : int; + mutable b : int; + mutable c : int; + mutable d : int; + mutable e : int; + mutable f : int; } + +let go () = + let pre_before = Gc.minor_words () in + let before = Gc.minor_words () in + let alloc_per_minor_words = int_of_float (before -. pre_before) in + if Sys.backend_type = Sys.Native then assert (alloc_per_minor_words = 0); + let allocs = ref alloc_per_minor_words in + let n = 1_000_000 in + for i = 1 to n do + Sys.opaque_identity (ref i) + |> ignore; + allocs := !allocs + 2; + done; + for i = 1 to n do + Sys.opaque_identity { p = i; q = i } + |> ignore; + allocs := !allocs + 3; + done; + for i = 1 to n do + Sys.opaque_identity { s = i; t = i; u = i } + |> ignore; + allocs := !allocs + 4; + done; + for i = 1 to n do + Sys.opaque_identity { a = i; b = i; c = i; d = i; e = i; f = i } + |> ignore; + allocs := !allocs + 7; + if i mod (n/3) == 0 then Gc.full_major (); + done; + for i = 1 to n do + Sys.opaque_identity (Array.make 8 i) + |> ignore; + allocs := !allocs + 9; + if i mod (n/3) == 0 then Gc.compact (); + done; + let after = Gc.minor_words () in + let measured_allocs = int_of_float (after -. before) - alloc_per_minor_words in + Printf.printf "%d\n" (measured_allocs - !allocs) + +let () = go () diff --git a/testsuite/tests/regression/pr7798/pr7798.reference b/testsuite/tests/regression/pr7798/pr7798.reference new file mode 100644 index 000000000..573541ac9 --- /dev/null +++ b/testsuite/tests/regression/pr7798/pr7798.reference @@ -0,0 +1 @@ +0