Ensure that Gc.minor_words remains accurate after a GC (#8619)
If an allocation fails, the decrement of young_ptr should be undone before the GC is entered. This happened correctly on bytecode but not on native code. This commit (squash of pull request #8619) fixes it for all the platforms supported by ocamlopt. amd64: add alternate entry points caml_call_gc{1,2,3} for code size optimisation. powerpc: introduce one GC call point per allocation size per function. Each call point corrects the allocation pointer r31 before calling caml_call_gc. i386, arm, arm64, s390x: update the allocation pointer after the conditional branch to the GC, not before. arm64: simplify the code generator: Ialloc can assume that less than 0x1_0000 bytes are allocated, since the max allocation size for the minor heap is less than that. This is a partial cherry-pick of commit 8ceec on multicore.master
parent
b818e2af91
commit
c24e5b5c8a
4
Changes
4
Changes
|
@ -156,6 +156,10 @@ Working version
|
|||
- #8607: Remove obsolete macros for pre-2002 MSVC support
|
||||
(Stephen Dolan, review by Nicolás Ojeda Bär and David Allsopp)
|
||||
|
||||
- #8619: Ensure Gc.minor_words remains accurate after a GC.
|
||||
(Stephen Dolan, Xavier Leroy and David Allsopp,
|
||||
review by Xavier Leroy and Gabriel Scherer)
|
||||
|
||||
### Standard library:
|
||||
|
||||
- #2262: take precision (.<n>) and flags ('+' and ' ') into account
|
||||
|
|
|
@ -273,7 +273,8 @@ let spacetime_before_uninstrumented_call ~node_ptr ~index =
|
|||
(* Record calls to the GC -- we've moved them out of the way *)
|
||||
|
||||
type gc_call =
|
||||
{ gc_lbl: label; (* Entry label *)
|
||||
{ gc_size: int; (* Allocation size, in bytes *)
|
||||
gc_lbl: label; (* Entry label *)
|
||||
gc_return_lbl: label; (* Where to branch after GC *)
|
||||
gc_frame: label; (* Label of frame descriptor *)
|
||||
gc_spacetime : (X86_ast.arg * int) option;
|
||||
|
@ -290,7 +291,13 @@ let emit_call_gc gc =
|
|||
assert Config.spacetime;
|
||||
spacetime_before_uninstrumented_call ~node_ptr ~index
|
||||
end;
|
||||
emit_call "caml_call_gc";
|
||||
begin match gc.gc_size with
|
||||
| 16 -> emit_call "caml_call_gc1"
|
||||
| 24 -> emit_call "caml_call_gc2"
|
||||
| 32 -> emit_call "caml_call_gc3"
|
||||
| n -> I.add (int n) r15;
|
||||
emit_call "caml_call_gc"
|
||||
end;
|
||||
def_label gc.gc_frame;
|
||||
I.jmp (label gc.gc_return_lbl)
|
||||
|
||||
|
@ -660,7 +667,8 @@ let emit_instr fallthrough i =
|
|||
else Some (arg i 0, spacetime_index)
|
||||
in
|
||||
call_gc_sites :=
|
||||
{ gc_lbl = lbl_call_gc;
|
||||
{ gc_size = n;
|
||||
gc_lbl = lbl_call_gc;
|
||||
gc_return_lbl = lbl_redo;
|
||||
gc_frame = lbl_frame;
|
||||
gc_spacetime; } :: !call_gc_sites
|
||||
|
@ -969,6 +977,9 @@ let begin_assembly() =
|
|||
D.extrn "caml_young_limit" QWORD;
|
||||
D.extrn "caml_exception_pointer" QWORD;
|
||||
D.extrn "caml_call_gc" NEAR;
|
||||
D.extrn "caml_call_gc1" NEAR;
|
||||
D.extrn "caml_call_gc2" NEAR;
|
||||
D.extrn "caml_call_gc3" NEAR;
|
||||
D.extrn "caml_c_call" NEAR;
|
||||
D.extrn "caml_allocN" NEAR;
|
||||
D.extrn "caml_alloc1" NEAR;
|
||||
|
|
|
@ -633,14 +633,17 @@ let emit_instr i =
|
|||
if !fastcode_flag then begin
|
||||
let lbl_redo = new_label() in
|
||||
`{emit_label lbl_redo}:`;
|
||||
let ninstr = decompose_intconst
|
||||
(Int32.of_int n)
|
||||
(fun i ->
|
||||
` sub alloc_ptr, alloc_ptr, #{emit_int32 i}\n`) in
|
||||
` cmp alloc_ptr, alloc_limit\n`;
|
||||
` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
|
||||
let first = ref true in
|
||||
let ninstr =
|
||||
decompose_intconst (Int32.of_int (n - 4)) (fun a ->
|
||||
if !first
|
||||
then ` sub {emit_reg i.res.(0)}, alloc_ptr, #{emit_int32 a}\n`
|
||||
else ` sub {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, #{emit_int32 a}\n`;
|
||||
first := false) in
|
||||
` cmp {emit_reg i.res.(0)}, alloc_limit\n`;
|
||||
let lbl_call_gc = new_label() in
|
||||
` bcc {emit_label lbl_call_gc}\n`;
|
||||
` bls {emit_label lbl_call_gc}\n`;
|
||||
` sub alloc_ptr, {emit_reg i.res.(0)}, #4\n`;
|
||||
call_gc_sites :=
|
||||
{ gc_lbl = lbl_call_gc;
|
||||
gc_return_lbl = lbl_redo;
|
||||
|
|
|
@ -527,23 +527,26 @@ let assembly_code_for_allocation ?label_after_call_gc i ~n ~far =
|
|||
if !fastcode_flag then begin
|
||||
let lbl_redo = new_label() in
|
||||
let lbl_call_gc = new_label() in
|
||||
assert (n < 0x1_000_000);
|
||||
let nl = n land 0xFFF and nh = n land 0xFFF_000 in
|
||||
(* n is at most Max_young_whsize * 8, i.e. currently 0x808,
|
||||
so it is reasonable to assume n < 0x1_000. This makes
|
||||
the generated code simpler. *)
|
||||
assert (16 <= n && n < 0x1_000 && n land 0x7 = 0);
|
||||
(* Instead of checking whether young_ptr - n < young_limit, we check whether
|
||||
young_ptr - (n - 8) <= young_limit. It's equivalent, but this way around
|
||||
we can avoid mutating young_ptr on failed allocations, by doing the
|
||||
calculations in i.res.(0) instead of young_ptr. *)
|
||||
`{emit_label lbl_redo}:`;
|
||||
if nh <> 0 then
|
||||
` sub {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, #{emit_int nh}\n`;
|
||||
if nl <> 0 then
|
||||
` sub {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, #{emit_int nl}\n`;
|
||||
` cmp {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_limit}\n`;
|
||||
` add {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, #8\n`;
|
||||
` sub {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, #{emit_int (n - 8)}\n`;
|
||||
` cmp {emit_reg i.res.(0)}, {emit_reg reg_alloc_limit}\n`;
|
||||
if not far then begin
|
||||
` b.lo {emit_label lbl_call_gc}\n`
|
||||
` b.ls {emit_label lbl_call_gc}\n`
|
||||
end else begin
|
||||
let lbl = new_label () in
|
||||
` b.cs {emit_label lbl}\n`;
|
||||
` b.hi {emit_label lbl}\n`;
|
||||
` b {emit_label lbl_call_gc}\n`;
|
||||
`{emit_label lbl}:\n`
|
||||
end;
|
||||
` sub {emit_reg reg_alloc_ptr}, {emit_reg i.res.(0)}, #8\n`;
|
||||
call_gc_sites :=
|
||||
{ gc_lbl = lbl_call_gc;
|
||||
gc_return_lbl = lbl_redo;
|
||||
|
|
|
@ -600,11 +600,11 @@ let emit_instr fallthrough i =
|
|||
def_label lbl_redo;
|
||||
I.mov (sym32 "caml_young_ptr") eax;
|
||||
I.sub (int n) eax;
|
||||
I.mov eax (sym32 "caml_young_ptr");
|
||||
I.cmp (sym32 "caml_young_limit") eax;
|
||||
let lbl_call_gc = new_label() in
|
||||
let lbl_frame = record_frame_label i.live false Debuginfo.none in
|
||||
I.jb (label lbl_call_gc);
|
||||
I.mov eax (sym32 "caml_young_ptr");
|
||||
I.lea (mem32 NONE 4 RAX) (reg i.res.(0));
|
||||
call_gc_sites :=
|
||||
{ gc_lbl = lbl_call_gc;
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
|
||||
(* Emission of PowerPC assembly code *)
|
||||
|
||||
open Misc
|
||||
open Cmm
|
||||
open Arch
|
||||
open Proc
|
||||
|
@ -124,7 +123,7 @@ let emit_gpr = emit_int
|
|||
let emit_reg r =
|
||||
match r.loc with
|
||||
| Reg r -> emit_string (register_name r)
|
||||
| _ -> fatal_error "Emit.emit_reg"
|
||||
| _ -> Misc.fatal_error "Emit.emit_reg"
|
||||
|
||||
(* Output a stack reference *)
|
||||
|
||||
|
@ -132,7 +131,7 @@ let emit_stack r =
|
|||
match r.loc with
|
||||
| Stack s ->
|
||||
let ofs = slot_offset s (register_class r) in `{emit_int ofs}(1)`
|
||||
| _ -> fatal_error "Emit.emit_stack"
|
||||
| _ -> Misc.fatal_error "Emit.emit_stack"
|
||||
|
||||
(* Output the name of a symbol plus an optional offset *)
|
||||
|
||||
|
@ -393,8 +392,38 @@ let name_for_specific = function
|
|||
let function_name = ref ""
|
||||
(* Entry point for tail recursive calls *)
|
||||
let tailrec_entry_point = ref 0
|
||||
(* Label of glue code for calling the GC *)
|
||||
let call_gc_label = ref 0
|
||||
|
||||
module IntSet = Stdlib.Set.Make(Stdlib.Int)
|
||||
module IntMap = Stdlib.Map.Make(Stdlib.Int)
|
||||
|
||||
(* Labels of glue code for calling the GC.
|
||||
There is one label per size allocated. *)
|
||||
let call_gc_labels : label IntMap.t ref = ref IntMap.empty
|
||||
(* size -> label *)
|
||||
|
||||
(* Return the label of the call GC point for the given size *)
|
||||
|
||||
let label_for_call_gc ?label_after_call_gc sz =
|
||||
match IntMap.find_opt sz !call_gc_labels with
|
||||
| Some lbl -> lbl
|
||||
| None ->
|
||||
let lbl =
|
||||
match label_after_call_gc with Some l -> l | None -> new_label() in
|
||||
call_gc_labels := IntMap.add sz lbl !call_gc_labels;
|
||||
lbl
|
||||
|
||||
(* Number of call GC points *)
|
||||
|
||||
let num_call_gc instr =
|
||||
let rec loop i cg =
|
||||
match i.desc with
|
||||
| Lend -> IntSet.cardinal cg
|
||||
| Lop (Ialloc {bytes = sz}) -> loop i.next (IntSet.add sz cg)
|
||||
(* The following should never be seen, since this function is run
|
||||
before branch relaxation. *)
|
||||
| Lop (Ispecific (Ialloc_far _)) -> assert false
|
||||
| _ -> loop i.next cg
|
||||
in loop instr IntSet.empty
|
||||
|
||||
(* Relaxation of branches that exceed the span of a relative branch. *)
|
||||
|
||||
|
@ -560,7 +589,7 @@ let emit_instr i =
|
|||
| {loc = Stack _; typ = Float}, {loc = Reg _} ->
|
||||
` lfd {emit_reg dst}, {emit_stack src}\n`
|
||||
| (_, _) ->
|
||||
fatal_error "Emit: Imove"
|
||||
Misc.fatal_error "Emit: Imove"
|
||||
end
|
||||
| Lop(Iconst_int n) ->
|
||||
if is_native_immediate n then
|
||||
|
@ -751,28 +780,20 @@ let emit_instr i =
|
|||
| Double | Double_u -> "stfd" in
|
||||
emit_load_store storeinstr addr i.arg 1 i.arg.(0)
|
||||
| Lop(Ialloc { bytes = n; label_after_call_gc; }) ->
|
||||
if !call_gc_label = 0 then begin
|
||||
match label_after_call_gc with
|
||||
| None -> call_gc_label := new_label ()
|
||||
| Some label -> call_gc_label := label
|
||||
end;
|
||||
let call_gc_lbl = label_for_call_gc ?label_after_call_gc n in
|
||||
` addi 31, 31, {emit_int(-n)}\n`;
|
||||
` {emit_string cmplg} 31, 30\n`;
|
||||
` addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n`;
|
||||
` bltl {emit_label !call_gc_label}\n`;
|
||||
` bltl {emit_label call_gc_lbl}\n`;
|
||||
(* Exactly 4 instructions after the beginning of the alloc sequence *)
|
||||
record_frame i.live false Debuginfo.none
|
||||
| Lop(Ispecific(Ialloc_far { bytes = n; label_after_call_gc; })) ->
|
||||
if !call_gc_label = 0 then begin
|
||||
match label_after_call_gc with
|
||||
| None -> call_gc_label := new_label ()
|
||||
| Some label -> call_gc_label := label
|
||||
end;
|
||||
let call_gc_lbl = label_for_call_gc ?label_after_call_gc n in
|
||||
let lbl = new_label() in
|
||||
` addi 31, 31, {emit_int(-n)}\n`;
|
||||
` {emit_string cmplg} 31, 30\n`;
|
||||
` bge {emit_label lbl}\n`;
|
||||
` bl {emit_label !call_gc_label}\n`;
|
||||
` bl {emit_label call_gc_lbl}\n`;
|
||||
(* Exactly 4 instructions after the beginning of the alloc sequence *)
|
||||
record_frame i.live false Debuginfo.none;
|
||||
`{emit_label lbl}: addi {emit_reg i.res.(0)}, 31, {emit_int size_addr}\n`
|
||||
|
@ -1009,7 +1030,7 @@ let fundecl fundecl =
|
|||
function_name := fundecl.fun_name;
|
||||
tailrec_entry_point := fundecl.fun_tailrec_entry_point_label;
|
||||
stack_offset := 0;
|
||||
call_gc_label := 0;
|
||||
call_gc_labels := IntMap.empty;
|
||||
float_literals := [];
|
||||
jumptables := []; jumptables_lbl := -1;
|
||||
begin match abi with
|
||||
|
@ -1041,14 +1062,30 @@ let fundecl fundecl =
|
|||
end;
|
||||
emit_debug_info fundecl.fun_dbg;
|
||||
cfi_startproc();
|
||||
(* On this target, there is at most one "out of line" code block per
|
||||
function: a single "call GC" point. It comes immediately after the
|
||||
function's body. *)
|
||||
BR.relax fundecl.fun_body ~max_out_of_line_code_offset:0;
|
||||
let num_call_gc = num_call_gc fundecl.fun_body in
|
||||
let max_out_of_line_code_offset = max (num_call_gc - 1) 0 in
|
||||
BR.relax fundecl.fun_body ~max_out_of_line_code_offset;
|
||||
emit_all fundecl.fun_body;
|
||||
(* Emit the glue code to call the GC *)
|
||||
if !call_gc_label > 0 then begin
|
||||
`{emit_label !call_gc_label}:\n`;
|
||||
assert (IntMap.cardinal !call_gc_labels = num_call_gc);
|
||||
if num_call_gc > 0 then begin
|
||||
(* Replace sizes by deltas with next size *)
|
||||
let rec delta_encode = function
|
||||
| (sz1, lbl1) :: ((sz2, _) :: _ as l) ->
|
||||
(sz1 - sz2, lbl1) :: delta_encode l
|
||||
| ([] | [(_,_)]) as l -> l in
|
||||
(* Enumerate the GC call points by decreasing size. This is not
|
||||
necessary for correctness, but it is nice for two reasons:
|
||||
1- all deltas are positive, making the generated code
|
||||
easier to read, and
|
||||
2- smaller allocation sizes, which are more frequent, execute
|
||||
fewer instructions before calling the GC. *)
|
||||
let delta_lbl_list =
|
||||
delta_encode (List.rev (IntMap.bindings !call_gc_labels)) in
|
||||
List.iter
|
||||
(fun (delta, lbl) ->
|
||||
`{emit_label lbl}: addi 31, 31, {emit_int delta}\n`)
|
||||
delta_lbl_list;
|
||||
match abi with
|
||||
| ELF32 ->
|
||||
` b {emit_symbol "caml_call_gc"}\n`
|
||||
|
|
|
@ -429,10 +429,11 @@ let emit_instr i =
|
|||
gc_return_lbl = lbl_redo;
|
||||
gc_frame_lbl = lbl_frame } :: !call_gc_sites;
|
||||
`{emit_label lbl_redo}:`;
|
||||
` lay %r11, {emit_int(-n)}(%r11)\n`;
|
||||
` clgr %r11, %r10\n`;
|
||||
` brcl 4, {emit_label lbl_call_gc}\n`; (* less than *)
|
||||
` la {emit_reg i.res.(0)}, 8(%r11)\n`
|
||||
` lay {emit_reg i.res.(0)}, {emit_int(-n+8)}(%r11)\n`;
|
||||
` clgr {emit_reg i.res.(0)}, %r10\n`;
|
||||
` brcl 12, {emit_label lbl_call_gc}\n`;
|
||||
(* less than or equal *)
|
||||
` lay %r11, -8({emit_reg i.res.(0)})\n`
|
||||
|
||||
| Lop(Iintop Imulh) ->
|
||||
(* Hacker's Delight section 8.3:
|
||||
|
|
|
@ -397,6 +397,7 @@ LBL(caml_alloc1):
|
|||
jb LBL(100)
|
||||
ret
|
||||
LBL(100):
|
||||
addq $16, %r15
|
||||
RECORD_STACK_FRAME(0)
|
||||
ENTER_FUNCTION
|
||||
/* subq $8, %rsp; CFI_ADJUST (8); */
|
||||
|
@ -414,6 +415,7 @@ LBL(caml_alloc2):
|
|||
jb LBL(101)
|
||||
ret
|
||||
LBL(101):
|
||||
addq $24, %r15
|
||||
RECORD_STACK_FRAME(0)
|
||||
ENTER_FUNCTION
|
||||
/* subq $8, %rsp; CFI_ADJUST (8); */
|
||||
|
@ -431,6 +433,7 @@ LBL(caml_alloc3):
|
|||
jb LBL(102)
|
||||
ret
|
||||
LBL(102):
|
||||
addq $32, %r15
|
||||
RECORD_STACK_FRAME(0)
|
||||
ENTER_FUNCTION
|
||||
/* subq $8, %rsp; CFI_ADJUST (8) */
|
||||
|
@ -450,6 +453,7 @@ LBL(caml_allocN):
|
|||
addq $8, %rsp; CFI_ADJUST (-8) /* drop desired size */
|
||||
ret
|
||||
LBL(103):
|
||||
addq 0(%rsp), %r15
|
||||
CFI_ADJUST(8)
|
||||
RECORD_STACK_FRAME(8)
|
||||
#ifdef WITH_FRAME_POINTERS
|
||||
|
@ -467,6 +471,27 @@ LBL(103):
|
|||
jmp LBL(caml_allocN)
|
||||
CFI_ENDPROC
|
||||
|
||||
/* Reset the allocation pointer and invoke the GC */
|
||||
|
||||
FUNCTION(G(caml_call_gc1))
|
||||
CFI_STARTPROC
|
||||
addq $16, %r15
|
||||
jmp GCALL(caml_call_gc)
|
||||
CFI_ENDPROC
|
||||
|
||||
FUNCTION(G(caml_call_gc2))
|
||||
CFI_STARTPROC
|
||||
addq $24, %r15
|
||||
jmp GCALL(caml_call_gc)
|
||||
CFI_ENDPROC
|
||||
|
||||
FUNCTION(G(caml_call_gc3))
|
||||
CFI_STARTPROC
|
||||
addq $32, %r15
|
||||
jmp GCALL(caml_call_gc)
|
||||
CFI_ENDPROC
|
||||
|
||||
|
||||
/* Call a C function from OCaml */
|
||||
|
||||
FUNCTION(G(caml_c_call))
|
||||
|
|
|
@ -150,6 +150,7 @@ caml_alloc1:
|
|||
jb L100
|
||||
ret
|
||||
L100:
|
||||
add r15, 16
|
||||
mov rax, [rsp + 0]
|
||||
mov caml_last_return_address, rax
|
||||
lea rax, [rsp + 8]
|
||||
|
@ -167,6 +168,7 @@ caml_alloc2:
|
|||
jb L101
|
||||
ret
|
||||
L101:
|
||||
add r15, 24
|
||||
mov rax, [rsp + 0]
|
||||
mov caml_last_return_address, rax
|
||||
lea rax, [rsp + 8]
|
||||
|
@ -184,6 +186,7 @@ caml_alloc3:
|
|||
jb L102
|
||||
ret
|
||||
L102:
|
||||
add r15, 32
|
||||
mov rax, [rsp + 0]
|
||||
mov caml_last_return_address, rax
|
||||
lea rax, [rsp + 8]
|
||||
|
@ -201,6 +204,7 @@ caml_allocN:
|
|||
jb L103
|
||||
ret
|
||||
L103:
|
||||
add r15, rax
|
||||
push rax ; save desired size
|
||||
mov rax, [rsp + 8]
|
||||
mov caml_last_return_address, rax
|
||||
|
@ -210,6 +214,26 @@ L103:
|
|||
pop rax ; recover desired size
|
||||
jmp caml_allocN
|
||||
|
||||
; Reset the allocation pointer and invoke the GC
|
||||
|
||||
PUBLIC caml_call_gc1
|
||||
ALIGN 16
|
||||
caml_call_gc1:
|
||||
add r15, 16
|
||||
jmp caml_call_gc
|
||||
|
||||
PUBLIC caml_call_gc2
|
||||
ALIGN 16
|
||||
caml_call_gc2:
|
||||
add r15, 24
|
||||
jmp caml_call_gc
|
||||
|
||||
PUBLIC caml_call_gc3
|
||||
ALIGN 16
|
||||
caml_call_gc3:
|
||||
add r15, 32
|
||||
jmp caml_call_gc
|
||||
|
||||
; Call a C function from OCaml
|
||||
|
||||
PUBLIC caml_c_call
|
||||
|
|
|
@ -163,7 +163,8 @@ caml_alloc1:
|
|||
cmp alloc_ptr, alloc_limit
|
||||
bcc 1f
|
||||
bx lr
|
||||
1: /* Record return address */
|
||||
1: add alloc_ptr, alloc_ptr, 8
|
||||
/* Record return address */
|
||||
ldr r7, =caml_last_return_address
|
||||
str lr, [r7]
|
||||
/* Call GC (preserves r7) */
|
||||
|
@ -185,7 +186,8 @@ caml_alloc2:
|
|||
cmp alloc_ptr, alloc_limit
|
||||
bcc 1f
|
||||
bx lr
|
||||
1: /* Record return address */
|
||||
1: add alloc_ptr, alloc_ptr, 12
|
||||
/* Record return address */
|
||||
ldr r7, =caml_last_return_address
|
||||
str lr, [r7]
|
||||
/* Call GC (preserves r7) */
|
||||
|
@ -208,7 +210,8 @@ caml_alloc3:
|
|||
cmp alloc_ptr, alloc_limit
|
||||
bcc 1f
|
||||
bx lr
|
||||
1: /* Record return address */
|
||||
1: add alloc_ptr, alloc_ptr, 16
|
||||
/* Record return address */
|
||||
ldr r7, =caml_last_return_address
|
||||
str lr, [r7]
|
||||
/* Call GC (preserves r7) */
|
||||
|
@ -230,7 +233,8 @@ caml_allocN:
|
|||
cmp alloc_ptr, alloc_limit
|
||||
bcc 1f
|
||||
bx lr
|
||||
1: /* Record return address */
|
||||
1: add alloc_ptr, alloc_ptr, r7
|
||||
/* Record return address */
|
||||
ldr r12, =caml_last_return_address
|
||||
str lr, [r12]
|
||||
/* Call GC (preserves r7) */
|
||||
|
|
|
@ -185,7 +185,8 @@ caml_alloc1:
|
|||
cmp ALLOC_PTR, ALLOC_LIMIT
|
||||
b.lo 2f
|
||||
ret
|
||||
2: stp x29, x30, [sp, -16]!
|
||||
2: add ALLOC_PTR, ALLOC_PTR, #16
|
||||
stp x29, x30, [sp, -16]!
|
||||
CFI_ADJUST(16)
|
||||
/* Record the lowest address of the caller's stack frame. This is the
|
||||
address immediately above the pair of words (x29 and x30) we just
|
||||
|
@ -217,7 +218,8 @@ caml_alloc2:
|
|||
cmp ALLOC_PTR, ALLOC_LIMIT
|
||||
b.lo 2f
|
||||
ret
|
||||
2: stp x29, x30, [sp, -16]!
|
||||
2: add ALLOC_PTR, ALLOC_PTR, #24
|
||||
stp x29, x30, [sp, -16]!
|
||||
CFI_ADJUST(16)
|
||||
/* Record the lowest address of the caller's stack frame.
|
||||
See comment above. */
|
||||
|
@ -245,7 +247,8 @@ caml_alloc3:
|
|||
cmp ALLOC_PTR, ALLOC_LIMIT
|
||||
b.lo 2f
|
||||
ret
|
||||
2: stp x29, x30, [sp, -16]!
|
||||
2: add ALLOC_PTR, ALLOC_PTR, #32
|
||||
stp x29, x30, [sp, -16]!
|
||||
CFI_ADJUST(16)
|
||||
/* Record the lowest address of the caller's stack frame.
|
||||
See comment above. */
|
||||
|
@ -273,7 +276,8 @@ caml_allocN:
|
|||
cmp ALLOC_PTR, ALLOC_LIMIT
|
||||
b.lo 2f
|
||||
ret
|
||||
2: stp x29, x30, [sp, -16]!
|
||||
2: add ALLOC_PTR, ALLOC_PTR, ARG
|
||||
stp x29, x30, [sp, -16]!
|
||||
CFI_ADJUST(16)
|
||||
/* Record the lowest address of the caller's stack frame.
|
||||
See comment above. */
|
||||
|
|
|
@ -113,9 +113,9 @@ FUNCTION(caml_alloc1)
|
|||
CFI_STARTPROC
|
||||
movl G(caml_young_ptr), %eax
|
||||
subl $8, %eax
|
||||
movl %eax, G(caml_young_ptr)
|
||||
cmpl G(caml_young_limit), %eax
|
||||
jb LBL(100)
|
||||
movl %eax, G(caml_young_ptr)
|
||||
ret
|
||||
LBL(100):
|
||||
movl 0(%esp), %eax
|
||||
|
@ -132,9 +132,9 @@ FUNCTION(caml_alloc2)
|
|||
CFI_STARTPROC
|
||||
movl G(caml_young_ptr), %eax
|
||||
subl $12, %eax
|
||||
movl %eax, G(caml_young_ptr)
|
||||
cmpl G(caml_young_limit), %eax
|
||||
jb LBL(101)
|
||||
movl %eax, G(caml_young_ptr)
|
||||
ret
|
||||
LBL(101):
|
||||
movl 0(%esp), %eax
|
||||
|
@ -151,9 +151,9 @@ FUNCTION(caml_alloc3)
|
|||
CFI_STARTPROC
|
||||
movl G(caml_young_ptr), %eax
|
||||
subl $16, %eax
|
||||
movl %eax, G(caml_young_ptr)
|
||||
cmpl G(caml_young_limit), %eax
|
||||
jb LBL(102)
|
||||
movl %eax, G(caml_young_ptr)
|
||||
ret
|
||||
LBL(102):
|
||||
movl 0(%esp), %eax
|
||||
|
@ -178,7 +178,6 @@ LBL(103):
|
|||
subl G(caml_young_ptr), %eax /* eax = - size */
|
||||
negl %eax /* eax = size */
|
||||
pushl %eax; CFI_ADJUST(4) /* save desired size */
|
||||
subl %eax, G(caml_young_ptr) /* must update young_ptr */
|
||||
movl 4(%esp), %eax
|
||||
movl %eax, G(caml_last_return_address)
|
||||
leal 8(%esp), %eax
|
||||
|
|
|
@ -74,9 +74,9 @@ L105: push ebp
|
|||
_caml_alloc1:
|
||||
mov eax, _caml_young_ptr
|
||||
sub eax, 8
|
||||
mov _caml_young_ptr, eax
|
||||
cmp eax, _caml_young_limit
|
||||
jb L100
|
||||
mov _caml_young_ptr, eax
|
||||
ret
|
||||
L100: mov eax, [esp]
|
||||
mov _caml_last_return_address, eax
|
||||
|
@ -89,9 +89,9 @@ L100: mov eax, [esp]
|
|||
_caml_alloc2:
|
||||
mov eax, _caml_young_ptr
|
||||
sub eax, 12
|
||||
mov _caml_young_ptr, eax
|
||||
cmp eax, _caml_young_limit
|
||||
jb L101
|
||||
mov _caml_young_ptr, eax
|
||||
ret
|
||||
L101: mov eax, [esp]
|
||||
mov _caml_last_return_address, eax
|
||||
|
@ -104,9 +104,9 @@ L101: mov eax, [esp]
|
|||
_caml_alloc3:
|
||||
mov eax, _caml_young_ptr
|
||||
sub eax, 16
|
||||
mov _caml_young_ptr, eax
|
||||
cmp eax, _caml_young_limit
|
||||
jb L102
|
||||
mov _caml_young_ptr, eax
|
||||
ret
|
||||
L102: mov eax, [esp]
|
||||
mov _caml_last_return_address, eax
|
||||
|
@ -126,7 +126,6 @@ _caml_allocN:
|
|||
L103: sub eax, _caml_young_ptr ; eax = - size
|
||||
neg eax ; eax = size
|
||||
push eax ; save desired size
|
||||
sub _caml_young_ptr, eax ; must update young_ptr
|
||||
mov eax, [esp+4]
|
||||
mov _caml_last_return_address, eax
|
||||
lea eax, [esp+8]
|
||||
|
|
|
@ -18,9 +18,7 @@
|
|||
type stat =
|
||||
{ minor_words : float;
|
||||
(** Number of words allocated in the minor heap since
|
||||
the program was started. This number is accurate in
|
||||
byte-code programs, but only an approximation in programs
|
||||
compiled to native code. *)
|
||||
the program was started. *)
|
||||
|
||||
promoted_words : float;
|
||||
(** Number of words allocated in the minor heap that
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
pr7798.ml
|
|
@ -0,0 +1,57 @@
|
|||
(* TEST
|
||||
* bytecode
|
||||
* native
|
||||
* native
|
||||
ocamlopt_flags = "-compact"
|
||||
*)
|
||||
|
||||
type mut2 = { mutable p: int; mutable q:int }
|
||||
type mut3 = { mutable s: int; mutable t:int; mutable u:int }
|
||||
|
||||
type mut_record =
|
||||
{ mutable a : int;
|
||||
mutable b : int;
|
||||
mutable c : int;
|
||||
mutable d : int;
|
||||
mutable e : int;
|
||||
mutable f : int; }
|
||||
|
||||
let go () =
|
||||
let pre_before = Gc.minor_words () in
|
||||
let before = Gc.minor_words () in
|
||||
let alloc_per_minor_words = int_of_float (before -. pre_before) in
|
||||
if Sys.backend_type = Sys.Native then assert (alloc_per_minor_words = 0);
|
||||
let allocs = ref alloc_per_minor_words in
|
||||
let n = 1_000_000 in
|
||||
for i = 1 to n do
|
||||
Sys.opaque_identity (ref i)
|
||||
|> ignore;
|
||||
allocs := !allocs + 2;
|
||||
done;
|
||||
for i = 1 to n do
|
||||
Sys.opaque_identity { p = i; q = i }
|
||||
|> ignore;
|
||||
allocs := !allocs + 3;
|
||||
done;
|
||||
for i = 1 to n do
|
||||
Sys.opaque_identity { s = i; t = i; u = i }
|
||||
|> ignore;
|
||||
allocs := !allocs + 4;
|
||||
done;
|
||||
for i = 1 to n do
|
||||
Sys.opaque_identity { a = i; b = i; c = i; d = i; e = i; f = i }
|
||||
|> ignore;
|
||||
allocs := !allocs + 7;
|
||||
if i mod (n/3) == 0 then Gc.full_major ();
|
||||
done;
|
||||
for i = 1 to n do
|
||||
Sys.opaque_identity (Array.make 8 i)
|
||||
|> ignore;
|
||||
allocs := !allocs + 9;
|
||||
if i mod (n/3) == 0 then Gc.compact ();
|
||||
done;
|
||||
let after = Gc.minor_words () in
|
||||
let measured_allocs = int_of_float (after -. before) - alloc_per_minor_words in
|
||||
Printf.printf "%d\n" (measured_allocs - !allocs)
|
||||
|
||||
let () = go ()
|
|
@ -0,0 +1 @@
|
|||
0
|
Loading…
Reference in New Issue