diff --git a/Changes b/Changes index 5b1f59232..ff2f10d4f 100644 --- a/Changes +++ b/Changes @@ -106,9 +106,8 @@ Working version - #8634: Statistical memory profiling provided by the Gc.Memprof module. - Incomplete version: does not sample - - objects allocated in the minor heap in native mode, - - objects allocated by de-marshalling. + Incomplete version: does not sample objects allocated in the minor + heap in native mode (Jacques-Henri Jourdan, review by Stephen Dolan, Gabriel Scherer and Damien Doligez) diff --git a/runtime/.depend b/runtime/.depend index da5364c78..f1c9ddb50 100644 --- a/runtime/.depend +++ b/runtime/.depend @@ -149,7 +149,8 @@ intern_b.$(O): intern.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/callback.h caml/config.h caml/custom.h caml/fail.h caml/gc.h \ caml/intext.h caml/io.h caml/io.h caml/md5.h caml/memory.h caml/gc.h \ caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h \ - caml/domain.h caml/mlvalues.h caml/misc.h caml/reverse.h + caml/domain.h caml/mlvalues.h caml/misc.h caml/reverse.h caml/memprof.h \ + caml/roots.h caml/memory.h interp_b.$(O): interp.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/domain_state.h caml/domain_state.tbl \ caml/backtrace.h caml/exec.h caml/callback.h caml/debugger.h caml/fail.h \ @@ -509,7 +510,8 @@ intern_bd.$(O): intern.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/callback.h caml/config.h caml/custom.h caml/fail.h caml/gc.h \ caml/intext.h caml/io.h caml/io.h caml/md5.h caml/memory.h caml/gc.h \ caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h \ - caml/domain.h caml/mlvalues.h caml/misc.h caml/reverse.h + caml/domain.h caml/mlvalues.h caml/misc.h caml/reverse.h caml/memprof.h \ + caml/roots.h caml/memory.h interp_bd.$(O): interp.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/domain_state.h caml/domain_state.tbl \ caml/backtrace.h caml/exec.h caml/callback.h caml/debugger.h caml/fail.h \ @@ -864,7 +866,8 @@ intern_bi.$(O): intern.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/callback.h caml/config.h caml/custom.h caml/fail.h caml/gc.h \ caml/intext.h caml/io.h caml/io.h caml/md5.h caml/memory.h caml/gc.h \ caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h \ - caml/domain.h caml/mlvalues.h caml/misc.h caml/reverse.h + caml/domain.h caml/mlvalues.h caml/misc.h caml/reverse.h caml/memprof.h \ + caml/roots.h caml/memory.h interp_bi.$(O): interp.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/domain_state.h caml/domain_state.tbl \ caml/backtrace.h caml/exec.h caml/callback.h caml/debugger.h caml/fail.h \ @@ -1219,7 +1222,8 @@ intern_bpic.$(O): intern.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/callback.h caml/config.h caml/custom.h caml/fail.h caml/gc.h \ caml/intext.h caml/io.h caml/io.h caml/md5.h caml/memory.h caml/gc.h \ caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h \ - caml/domain.h caml/mlvalues.h caml/misc.h caml/reverse.h + caml/domain.h caml/mlvalues.h caml/misc.h caml/reverse.h caml/memprof.h \ + caml/roots.h caml/memory.h interp_bpic.$(O): interp.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/domain_state.h caml/domain_state.tbl \ caml/backtrace.h caml/exec.h caml/callback.h caml/debugger.h caml/fail.h \ @@ -1571,7 +1575,8 @@ intern_n.$(O): intern.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/callback.h caml/config.h caml/custom.h caml/fail.h caml/gc.h \ caml/intext.h caml/io.h caml/io.h caml/md5.h caml/memory.h caml/gc.h \ caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h \ - caml/domain.h caml/mlvalues.h caml/misc.h caml/reverse.h + caml/domain.h caml/mlvalues.h caml/misc.h caml/reverse.h caml/memprof.h \ + caml/roots.h caml/memory.h interp_n.$(O): interp.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/domain_state.h caml/domain_state.tbl \ caml/backtrace.h caml/exec.h caml/callback.h caml/debugger.h caml/fail.h \ @@ -1928,7 +1933,8 @@ intern_nd.$(O): intern.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/callback.h caml/config.h caml/custom.h caml/fail.h caml/gc.h \ caml/intext.h caml/io.h caml/io.h caml/md5.h caml/memory.h caml/gc.h \ caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h \ - caml/domain.h caml/mlvalues.h caml/misc.h caml/reverse.h + caml/domain.h caml/mlvalues.h caml/misc.h caml/reverse.h caml/memprof.h \ + caml/roots.h caml/memory.h interp_nd.$(O): interp.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/domain_state.h caml/domain_state.tbl \ caml/backtrace.h caml/exec.h caml/callback.h caml/debugger.h caml/fail.h \ @@ -2280,7 +2286,8 @@ intern_ni.$(O): intern.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/callback.h caml/config.h caml/custom.h caml/fail.h caml/gc.h \ caml/intext.h caml/io.h caml/io.h caml/md5.h caml/memory.h caml/gc.h \ caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h \ - caml/domain.h caml/mlvalues.h caml/misc.h caml/reverse.h + caml/domain.h caml/mlvalues.h caml/misc.h caml/reverse.h caml/memprof.h \ + caml/roots.h caml/memory.h interp_ni.$(O): interp.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/domain_state.h caml/domain_state.tbl \ caml/backtrace.h caml/exec.h caml/callback.h caml/debugger.h caml/fail.h \ @@ -2632,7 +2639,8 @@ intern_npic.$(O): intern.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/callback.h caml/config.h caml/custom.h caml/fail.h caml/gc.h \ caml/intext.h caml/io.h caml/io.h caml/md5.h caml/memory.h caml/gc.h \ caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h \ - caml/domain.h caml/mlvalues.h caml/misc.h caml/reverse.h + caml/domain.h caml/mlvalues.h caml/misc.h caml/reverse.h caml/memprof.h \ + caml/roots.h caml/memory.h interp_npic.$(O): interp.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/domain_state.h caml/domain_state.tbl \ caml/backtrace.h caml/exec.h caml/callback.h caml/debugger.h caml/fail.h \ diff --git a/runtime/caml/memprof.h b/runtime/caml/memprof.h index bde99ee0a..c1d8b61fe 100644 --- a/runtime/caml/memprof.h +++ b/runtime/caml/memprof.h @@ -28,6 +28,7 @@ extern void caml_memprof_handle_postponed(); extern void caml_memprof_track_alloc_shr(value block); extern void caml_memprof_track_young(tag_t tag, uintnat wosize, int from_caml); +extern void caml_memprof_track_interned(header_t* block, header_t* blockend); extern void caml_memprof_renew_minor_sample(void); extern value* caml_memprof_young_trigger; diff --git a/runtime/intern.c b/runtime/intern.c index 9f83cdda9..80f29a1ce 100644 --- a/runtime/intern.c +++ b/runtime/intern.c @@ -34,6 +34,7 @@ #include "caml/mlvalues.h" #include "caml/misc.h" #include "caml/reverse.h" +#include "caml/memprof.h" static unsigned char * intern_src; /* Reading pointer in block holding input data. */ @@ -659,8 +660,9 @@ static void intern_alloc(mlsize_t whsize, mlsize_t num_objects, CAMLassert(intern_obj_table == NULL); } -static void intern_add_to_heap(mlsize_t whsize) +static header_t* intern_add_to_heap(mlsize_t whsize) { + header_t* res = NULL; /* Add new heap chunk to heap if needed */ if (intern_extra_block != NULL) { /* If heap chunk not filled totally, build free block at end */ @@ -675,11 +677,32 @@ static void intern_add_to_heap(mlsize_t whsize) } caml_allocated_words += Wsize_bsize ((char *) intern_dest - intern_extra_block); - caml_add_to_heap(intern_extra_block); + if(caml_add_to_heap(intern_extra_block) != 0) { + intern_cleanup(); + caml_raise_out_of_memory(); + } + res = (header_t*)intern_extra_block; intern_extra_block = NULL; // To prevent intern_cleanup freeing it - } else { + } else if(intern_block != 0) { /* [intern_block = 0] when [whsize = 0] */ + res = Hp_val(intern_block); intern_block = 0; // To prevent intern_cleanup rewriting its header } + return res; +} + +static value intern_end(value res, mlsize_t whsize) { + header_t *block = intern_add_to_heap(whsize); + header_t *blockend = intern_dest; + + /* Free everything */ + intern_cleanup(); + + /* Memprof tracking has to be done here, because unmarshalling can + still fail until now. */ + if(block != NULL) + caml_memprof_track_interned(block, blockend); + + return caml_check_urgent_gc(res); } /* Parsing the header */ @@ -776,16 +799,16 @@ static value caml_input_val_core(struct channel *chan, int outside_heap) intern_alloc(h.whsize, h.num_objects, outside_heap); /* Fill it in */ intern_rec(&res); - if (!outside_heap) { - intern_add_to_heap(h.whsize); - } else { + if (!outside_heap) + return intern_end(res, h.whsize); + else { caml_disown_for_heap(intern_extra_block); intern_extra_block = NULL; intern_block = 0; + /* Free everything */ + intern_cleanup(); + return caml_check_urgent_gc(res); } - /* Free everything */ - intern_cleanup(); - return caml_check_urgent_gc(res); } value caml_input_val(struct channel* chan) @@ -835,10 +858,7 @@ CAMLexport value caml_input_val_from_bytes(value str, intnat ofs) intern_src = &Byte_u(str, ofs + h.header_len); /* If a GC occurred */ /* Fill it in */ intern_rec(&obj); - intern_add_to_heap(h.whsize); - /* Free everything */ - intern_cleanup(); - CAMLreturn (caml_check_urgent_gc(obj)); + CAMLreturn (intern_end(obj, h.whsize)); } CAMLprim value caml_input_value_from_string(value str, value ofs) @@ -858,10 +878,7 @@ static value input_val_from_block(struct marshal_header * h) intern_alloc(h->whsize, h->num_objects, 0); /* Fill it in */ intern_rec(&obj); - intern_add_to_heap(h->whsize); - /* Free internal data structures */ - intern_cleanup(); - return caml_check_urgent_gc(obj); + return (intern_end(obj, h->whsize)); } CAMLexport value caml_input_value_from_malloc(char * data, intnat ofs) diff --git a/runtime/memprof.c b/runtime/memprof.c index 53f067311..5bbb3b9dd 100644 --- a/runtime/memprof.c +++ b/runtime/memprof.c @@ -179,7 +179,7 @@ CAMLprim value caml_memprof_set(value v) enum ml_alloc_kind { Minor = Val_long(0), Major = Val_long(1), - Serialized = Val_long(2) + Unmarshalled = Val_long(2) }; /* When we call do_callback, we suspend/resume sampling. In order to @@ -287,13 +287,13 @@ static void purge_postponed_queue(void) block is allocated, but not yet initialized, so that the heap invariants are broken. */ static void register_postponed_callback(value block, uintnat occurrences, - enum ml_alloc_kind kind) + enum ml_alloc_kind kind, + value* callstack) { - value callstack; struct postponed_block* new_hd; if (occurrences == 0) return; - callstack = capture_callstack_postponed(); - if (callstack == 0) return; /* OOM */ + if (*callstack == 0) *callstack = capture_callstack_postponed(); + if (*callstack == 0) return; /* OOM */ new_hd = postponed_next(postponed_hd); if (new_hd == postponed_tl) { @@ -319,7 +319,7 @@ static void register_postponed_callback(value block, uintnat occurrences, } postponed_hd->block = block; - postponed_hd->callstack = callstack; + postponed_hd->callstack = *callstack; postponed_hd->occurrences = occurrences; postponed_hd->kind = kind; postponed_hd = new_hd; @@ -371,11 +371,12 @@ void caml_memprof_scan_roots(scanning_action f) { void caml_memprof_track_alloc_shr(value block) { + value callstack = 0; CAMLassert(Is_in_heap(block)); /* This test also makes sure memprof is initialized. */ if (lambda == 0 || caml_memprof_suspended) return; register_postponed_callback( - block, mt_generate_binom(Whsize_val(block)), Major); + block, mt_generate_binom(Whsize_val(block)), Major, &callstack); } /* Shifts the next sample in the minor heap by [n] words. Essentially, @@ -438,8 +439,9 @@ void caml_memprof_track_young(tag_t tag, uintnat wosize, int from_caml) - Caml_state->young_ptr) + 1; if (!from_caml) { + value callstack = 0; register_postponed_callback(Val_hp(Caml_state->young_ptr), occurrences, - Minor); + Minor, &callstack); caml_memprof_renew_minor_sample(); CAMLreturn0; } @@ -493,3 +495,39 @@ void caml_memprof_track_young(tag_t tag, uintnat wosize, int from_caml) CAMLreturn0; } + +void caml_memprof_track_interned(header_t* block, header_t* blockend) { + header_t *p; + value callstack = 0; + + if(lambda == 0 || caml_memprof_suspended) + return; + + /* We have to select the sampled blocks before sampling them, + because sampling may trigger GC, and then blocks can escape from + [block, blockend[. So we use the postponing machinery for + selecting blocks. [intern.c] will call [check_urgent_gc] which + will call [caml_memprof_handle_postponed] in turn. */ + p = block; + while(1) { + uintnat next_sample = mt_generate_geom(); + header_t *next_sample_p, *next_p; + if(next_sample > blockend - p) + break; + /* [next_sample_p] is the block *following* the next sampled + block! */ + next_sample_p = p + next_sample; + + while(1) { + next_p = p + Whsize_hp(p); + if(next_p >= next_sample_p) break; + p = next_p; + } + + register_postponed_callback( + Val_hp(p), mt_generate_binom(next_p - next_sample_p) + 1, + Unmarshalled, &callstack); + + p = next_p; + } +} diff --git a/stdlib/gc.ml b/stdlib/gc.ml index 28beaf66c..dfa6443ba 100644 --- a/stdlib/gc.ml +++ b/stdlib/gc.ml @@ -124,7 +124,7 @@ module Memprof = type alloc_kind = | Minor | Major - | Serialized + | Unmarshalled type sample_info = { n_samples: int; kind: alloc_kind; tag: int; diff --git a/stdlib/gc.mli b/stdlib/gc.mli index f706dca23..476ef1190 100644 --- a/stdlib/gc.mli +++ b/stdlib/gc.mli @@ -401,12 +401,11 @@ module Memprof : type alloc_kind = | Minor | Major - | Serialized + | Unmarshalled (** Allocation kinds - [Minor] : the allocation took place in the minor heap. - [Major] : the allocation took place in the major heap. - - [Serialized] : the allocation happened during a - deserialization. *) + - [Unmarshalled] : the allocation happened while unmarshalling. *) type sample_info = { n_samples: int; diff --git a/testsuite/tests/statmemprof/intern.byte.reference b/testsuite/tests/statmemprof/intern.byte.reference new file mode 100644 index 000000000..5c0836234 --- /dev/null +++ b/testsuite/tests/statmemprof/intern.byte.reference @@ -0,0 +1,13 @@ +check_nosample +check_ephe_full_major +check_no_nested +check_distrib 2 3000 3 0.000010 +check_distrib 2 3000 1 0.000100 +check_distrib 2 2000 1 0.010000 +check_distrib 2 2000 1 0.900000 +check_distrib 300000 300000 20 0.100000 +check_callstack +Raised by primitive operation at file "intern.ml", line 32, characters 14-35 +Called from file "intern.ml", line 168, characters 2-25 +Called from file "intern.ml", line 174, characters 9-27 +OK ! diff --git a/testsuite/tests/statmemprof/intern.ml b/testsuite/tests/statmemprof/intern.ml new file mode 100644 index 000000000..035643655 --- /dev/null +++ b/testsuite/tests/statmemprof/intern.ml @@ -0,0 +1,177 @@ +(* TEST + flags = "-g" + * bytecode + reference = "${test_source_directory}/intern.byte.reference" + * native + reference = "${test_source_directory}/intern.opt.reference" + compare_programs = "false" +*) + +open Gc.Memprof + +type t = Dummy of int (* Skip tag 0. *) | I of int | II of int * int | Cons of t +let rec t_of_len = function + | len when len <= 1 -> assert false + | 2 -> I 1 + | 3 -> II (2, 3) + | len -> Cons (t_of_len (len - 2)) + +let marshalled_data = Hashtbl.create 17 +let[@inline never] get_marshalled_data len : t = + Marshal.from_string (Hashtbl.find marshalled_data len) 0 +let precompute_marshalled_data lo hi = + for len = lo to hi do + if not (Hashtbl.mem marshalled_data len) then + Hashtbl.add marshalled_data len (Marshal.to_string (t_of_len len) []) + done + +let root = ref [] +let[@inline never] do_intern lo hi cnt keep = + for j = 0 to cnt-1 do + for i = lo to hi do + root := get_marshalled_data i :: !root + done; + if not keep then root := [] + done + +let check_nosample () = + Printf.printf "check_nosample\n%!"; + precompute_marshalled_data 2 3000; + start { + sampling_rate = 0.; + callstack_size = 10; + callback = fun _ -> + Printf.printf "Callback called with sampling_rate = 0\n"; + assert(false) + }; + do_intern 2 3000 1 false + +let () = check_nosample () + +let check_ephe_full_major () = + Printf.printf "check_ephe_full_major\n%!"; + precompute_marshalled_data 2 3000; + let ephes = ref [] in + start { + sampling_rate = 0.01; + callstack_size = 10; + callback = fun _ -> + let res = Ephemeron.K1.create () in + ephes := res :: !ephes; + Some res + }; + do_intern 2 3000 1 true; + stop (); + List.iter (fun e -> assert (Ephemeron.K1.check_key e)) !ephes; + Gc.full_major (); + List.iter (fun e -> assert (Ephemeron.K1.check_key e)) !ephes; + root := []; + Gc.full_major (); + List.iter (fun e -> assert (not (Ephemeron.K1.check_key e))) !ephes + +let () = check_ephe_full_major () + +let check_no_nested () = + Printf.printf "check_no_nested\n%!"; + precompute_marshalled_data 2 300; + let in_callback = ref false in + start { + (* FIXME: we should use 1. to make sure the block is sampled, + but the runtime does an infinite loop in native mode in this + case. This bug will go away when the sampling of natively + allocated will be correctly implemented. *) + sampling_rate = 0.5; + callstack_size = 10; + callback = fun _ -> + assert (not !in_callback); + in_callback := true; + do_intern 100 200 1 false; + in_callback := false; + None + }; + do_intern 100 200 1 false; + stop () + +let () = check_no_nested () + +let check_distrib lo hi cnt rate = + Printf.printf "check_distrib %d %d %d %f\n%!" lo hi cnt rate; + precompute_marshalled_data lo hi; + let smp = ref 0 in + start { + sampling_rate = rate; + callstack_size = 10; + callback = fun info -> + (* We also allocate the list constructor in the minor heap. *) + if info.kind = Unmarshalled then begin + begin match info.tag, info.size with + | 1, 1 | 2, 2 | 3, 1 -> () + | _ -> assert false + end; + assert (info.n_samples > 0); + smp := !smp + info.n_samples + end; + None + }; + do_intern lo hi cnt false; + stop (); + + (* The probability distribution of the number of samples follows a + binomial distribution of parameters tot_alloc and rate. Given + that tot_alloc*rate and tot_alloc*(1-rate) are large (i.e., > + 100), this distribution is approximately equal to a normal + distribution. We compute a 1e-8 confidence interval for !smp + using quantiles of the normal distribution, and check that we are + in this confidence interval. *) + let tot_alloc = cnt*(lo+hi)*(hi-lo+1)/2 in + assert (float tot_alloc *. rate > 100. && + float tot_alloc *. (1. -. rate) > 100.); + let mean = float tot_alloc *. rate in + let stddev = sqrt (float tot_alloc *. rate *. (1. -. rate)) in + (* This assertion has probability to fail close to 1e-8. *) + assert (abs_float (mean -. float !smp) <= stddev *. 5.7) + +let () = + check_distrib 2 3000 3 0.00001; + check_distrib 2 3000 1 0.0001; + check_distrib 2 2000 1 0.01; + check_distrib 2 2000 1 0.9; + check_distrib 300000 300000 20 0.1 + +(* FIXME : in bytecode mode, the function [caml_get_current_callstack_impl], + which is supposed to capture the current call stack, does not have access + to the current value of [pc]. Therefore, depending on how the C call is + performed, we may miss the first call stack slot in the captured backtraces. + This is the reason why the reference file is different in native and + bytecode modes. + + Note that [Printexc.get_callstack] does not suffer from this problem, because + this function is actually an automatically generated stub which performs th + C call. This is because [Printexc.get_callstack] is not declared as external + in the mli file. *) + +let[@inline never] check_callstack () = + Printf.printf "check_callstack\n%!"; + precompute_marshalled_data 2 300; + let callstack = ref None in + start { + (* FIXME: we should use 1. to make sure the block is sampled, + but the runtime does an infinite loop in native mode in this + case. This bug will go away when the sampling of natively + allocated will be correctly implemented. *) + sampling_rate = 0.5; + callstack_size = 10; + callback = fun info -> + if info.kind = Unmarshalled then callstack := Some info.callstack; + None + }; + do_intern 2 300 1 false; + stop (); + match !callstack with + | None -> assert false + | Some cs -> Printexc.print_raw_backtrace stdout cs + +let () = check_callstack () + +let () = + Printf.printf "OK !\n" diff --git a/testsuite/tests/statmemprof/intern.opt.reference b/testsuite/tests/statmemprof/intern.opt.reference new file mode 100644 index 000000000..43666c600 --- /dev/null +++ b/testsuite/tests/statmemprof/intern.opt.reference @@ -0,0 +1,14 @@ +check_nosample +check_ephe_full_major +check_no_nested +check_distrib 2 3000 3 0.000010 +check_distrib 2 3000 1 0.000100 +check_distrib 2 2000 1 0.010000 +check_distrib 2 2000 1 0.900000 +check_distrib 300000 300000 20 0.100000 +check_callstack +Raised by primitive operation at file "marshal.ml", line 61, characters 9-35 +Called from file "intern.ml", line 32, characters 14-35 +Called from file "intern.ml", line 168, characters 2-25 +Called from file "intern.ml", line 174, characters 9-27 +OK ! diff --git a/testsuite/tests/statmemprof/ocamltests b/testsuite/tests/statmemprof/ocamltests index 761380f93..76d7ec969 100644 --- a/testsuite/tests/statmemprof/ocamltests +++ b/testsuite/tests/statmemprof/ocamltests @@ -2,3 +2,4 @@ arrays_in_major.ml arrays_in_minor.ml lists_in_minor.ml exception_callback.ml +intern.ml