From cad494b3990ee2fbe8e8443d54fb6ac4e3454374 Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Tue, 9 Jun 2020 17:05:32 +0200 Subject: [PATCH 01/12] Whitespace, typos. --- runtime/memprof.c | 61 +++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/runtime/memprof.c b/runtime/memprof.c index 0ae851ac5..053983824 100644 --- a/runtime/memprof.c +++ b/runtime/memprof.c @@ -85,14 +85,16 @@ static intnat callstack_buffer_len = 0; /**** Statistical sampling ****/ -Caml_inline uint64_t splitmix64_next(uint64_t* x) { +Caml_inline uint64_t splitmix64_next(uint64_t* x) +{ uint64_t z = (*x += 0x9E3779B97F4A7C15ull); z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull; z = (z ^ (z >> 27)) * 0x94D049BB133111EBull; return z ^ (z >> 31); } -static void xoshiro_init(void) { +static void xoshiro_init(void) +{ int i; uint64_t splitmix64_state = 42; rand_pos = RAND_BLOCK_SIZE; @@ -106,7 +108,8 @@ static void xoshiro_init(void) { } } -Caml_inline uint32_t xoshiro_next(int i) { +Caml_inline uint32_t xoshiro_next(int i) +{ uint32_t res = xoshiro_state[0][i] + xoshiro_state[3][i]; uint32_t t = xoshiro_state[1][i] << 9; xoshiro_state[2][i] ^= xoshiro_state[0][i]; @@ -122,7 +125,8 @@ Caml_inline uint32_t xoshiro_next(int i) { /* Computes [log((y+0.5)/2^32)], up to a relatively good precision, and guarantee that the result is negative. The average absolute error is very close to 0. */ -Caml_inline float log_approx(uint32_t y) { +Caml_inline float log_approx(uint32_t y) +{ union { float f; int32_t i; } u; float exp, x; u.f = y + 0.5f; /* We convert y to a float ... */ @@ -155,7 +159,8 @@ Caml_inline float log_approx(uint32_t y) { #ifdef SUPPORTS_TREE_VECTORIZE __attribute__((optimize("tree-vectorize"))) #endif -static void rand_batch(void) { +static void rand_batch(void) +{ int i; /* Instead of using temporary buffers, we could use one big loop, @@ -167,23 +172,23 @@ static void rand_batch(void) { CAMLassert(lambda > 0.); /* Shuffle the xoshiro samplers, and generate uniform variables in A. */ - for(i = 0; i < RAND_BLOCK_SIZE; i++) + for (i = 0; i < RAND_BLOCK_SIZE; i++) A[i] = xoshiro_next(i); /* Generate exponential random variables by computing logarithms. We do not use math.h library functions, which are slow and prevent compiler from using SIMD instructions. */ - for(i = 0; i < RAND_BLOCK_SIZE; i++) + for (i = 0; i < RAND_BLOCK_SIZE; i++) B[i] = 1 + log_approx(A[i]) * one_log1m_lambda; /* We do the final flooring for generating geometric variables. Compilers are unlikely to use SIMD instructions for this loop, because it involves a conditional and variables of different sizes (32 and 64 bits). */ - for(i = 0; i < RAND_BLOCK_SIZE; i++) { + for (i = 0; i < RAND_BLOCK_SIZE; i++) { double f = B[i]; CAMLassert (f >= 1); - if(f > (double)Max_long) rand_geom_buff[i] = Max_long; + if (f > (double)Max_long) rand_geom_buff[i] = Max_long; else rand_geom_buff[i] = (uintnat)f; } @@ -196,7 +201,7 @@ static uintnat rand_geom(void) { uintnat res; CAMLassert(lambda > 0.); - if(rand_pos == RAND_BLOCK_SIZE) rand_batch(); + if (rand_pos == RAND_BLOCK_SIZE) rand_batch(); res = rand_geom_buff[rand_pos++]; CAMLassert(1 <= res && res <= Max_long); return res; @@ -360,7 +365,8 @@ static struct tracking_state { large. Returns 1 if reallocation succeeded --[trackst.alloc_len] is at least [trackst.len]--, and 0 otherwise. */ -static int realloc_trackst(void) { +static int realloc_trackst(void) +{ uintnat new_alloc_len; struct tracked* new_entries; if (trackst.len <= trackst.alloc_len && @@ -417,7 +423,8 @@ static void mark_deleted(uintnat t_idx) /* The return value is an exception or [Val_unit] iff [*t_idx] is set to [Invalid_index]. In this case, the entry is deleted. Otherwise, the return value is a [Some(...)] block. */ -Caml_inline value run_callback_exn(uintnat *t_idx, value cb, value param) { +Caml_inline value run_callback_exn(uintnat *t_idx, value cb, value param) +{ struct tracked* t = &trackst.entries[*t_idx]; value res; CAMLassert(!t->callback_running && t->idx_ptr == NULL); @@ -541,15 +548,17 @@ static void flush_deleted(void) realloc_trackst(); } -static void check_action_pending(void) { +static void check_action_pending(void) +{ if (!suspended && trackst.callback < trackst.len) caml_set_action_pending(); } -void caml_memprof_set_suspended(int s) { +void caml_memprof_set_suspended(int s) +{ suspended = s; caml_memprof_renew_minor_sample(); - if(!s) check_action_pending(); + if (!s) check_action_pending(); } /* In case of a thread context switch during a callback, this can be @@ -573,7 +582,7 @@ value caml_memprof_handle_postponed_exn(void) void caml_memprof_oldify_young_roots(void) { uintnat i; - /* This loop should always have a small number of iteration (when + /* This loop should always have a small number of iterations (when compared to the size of the minor heap), because the young pointer should always be close to the end of the array. Indeed, it is only moved back when returning from a callback triggered by @@ -887,7 +896,8 @@ void caml_memprof_track_young(uintnat wosize, int from_caml, return; } -void caml_memprof_track_interned(header_t* block, header_t* blockend) { +void caml_memprof_track_interned(header_t* block, header_t* blockend) +{ header_t *p; value callstack = 0; int is_young = Is_young(Val_hp(block)); @@ -921,7 +931,8 @@ void caml_memprof_track_interned(header_t* block, header_t* blockend) { /**** Interface with the OCaml code. ****/ -static void caml_memprof_init(void) { +static void caml_memprof_init(void) +{ init = 1; xoshiro_init(); } @@ -996,26 +1007,30 @@ CAMLprim value caml_memprof_stop(value unit) /**** Interface with systhread. ****/ -CAMLexport void caml_memprof_init_th_ctx(struct caml_memprof_th_ctx* ctx) { +CAMLexport void caml_memprof_init_th_ctx(struct caml_memprof_th_ctx* ctx) +{ ctx->suspended = 0; ctx->callback_running = 0; } -CAMLexport void caml_memprof_stop_th_ctx(struct caml_memprof_th_ctx* ctx) { +CAMLexport void caml_memprof_stop_th_ctx(struct caml_memprof_th_ctx* ctx) +{ /* Make sure that no memprof callback is being executed in this thread. If so, memprof data structures may have pointers to the thread's stack. */ - if(ctx->callback_running) + if (ctx->callback_running) caml_fatal_error("Thread.exit called from a memprof callback."); } -CAMLexport void caml_memprof_save_th_ctx(struct caml_memprof_th_ctx* ctx) { +CAMLexport void caml_memprof_save_th_ctx(struct caml_memprof_th_ctx* ctx) +{ ctx->suspended = suspended; ctx->callback_running = callback_running; } CAMLexport void caml_memprof_restore_th_ctx - (const struct caml_memprof_th_ctx* ctx) { + (const struct caml_memprof_th_ctx* ctx) +{ callback_running = ctx->callback_running; caml_memprof_set_suspended(ctx->suspended); } From b371c213968aa52c8632d7aefde9e9ed89c51c23 Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Fri, 26 Jun 2020 12:08:37 +0200 Subject: [PATCH 02/12] Memprof: fix rounding error. --- runtime/memprof.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/runtime/memprof.c b/runtime/memprof.c index 053983824..bdef6ed04 100644 --- a/runtime/memprof.c +++ b/runtime/memprof.c @@ -188,7 +188,9 @@ static void rand_batch(void) for (i = 0; i < RAND_BLOCK_SIZE; i++) { double f = B[i]; CAMLassert (f >= 1); - if (f > (double)Max_long) rand_geom_buff[i] = Max_long; + /* [Max_long+1] is a power of two => no rounding in the test. */ + if (f >= Max_long+1) + rand_geom_buff[i] = Max_long; else rand_geom_buff[i] = (uintnat)f; } From d1800821df5e1679bdd7f7e4a9ea7340ffb7a344 Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Wed, 15 Apr 2020 15:39:28 +0200 Subject: [PATCH 03/12] Gc.Memprof.stop does no longer try to run pending callbacks. This is only effetive in native mode, since function calls in bytecode mode will trigger polling and hence run pending callbacks. --- runtime/memprof.c | 5 ----- stdlib/gc.mli | 7 +++---- testsuite/tests/statmemprof/minor_no_postpone.ml | 5 +++-- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/runtime/memprof.c b/runtime/memprof.c index bdef6ed04..8ae9ffae6 100644 --- a/runtime/memprof.c +++ b/runtime/memprof.c @@ -977,11 +977,6 @@ CAMLprim value caml_memprof_stop(value unit) if (!started) caml_failwith("Gc.Memprof.stop: not started."); - /* This call to [caml_memprof_stop] will discard all the previously - tracked blocks. We try one last time to call the postponed - callbacks. */ - caml_raise_if_exception(caml_memprof_handle_postponed_exn()); - /* Discard the tracked blocks. */ for (i = 0; i < trackst.len; i++) if (trackst.entries[i].idx_ptr != NULL) diff --git a/stdlib/gc.mli b/stdlib/gc.mli index 4b1e2f783..4d69e2089 100644 --- a/stdlib/gc.mli +++ b/stdlib/gc.mli @@ -536,11 +536,10 @@ module Memprof : val stop : unit -> unit (** Stop the sampling. Fails if sampling is not active. - This function does not allocate memory, but tries to run the - postponed callbacks for already allocated memory blocks (of - course, these callbacks may allocate). + This function does not allocate memory. - All the already tracked blocks are discarded. + All the already tracked blocks are discarded. If there are + pending postponed callbacks, they may be discarded. Calling [stop] when a callback is running can lead to callbacks not being called even though some events happened. *) diff --git a/testsuite/tests/statmemprof/minor_no_postpone.ml b/testsuite/tests/statmemprof/minor_no_postpone.ml index 9d9ecd791..fcb94cf81 100644 --- a/testsuite/tests/statmemprof/minor_no_postpone.ml +++ b/testsuite/tests/statmemprof/minor_no_postpone.ml @@ -32,5 +32,6 @@ let () = ignore (Sys.opaque_identity (alloc_stub ())); assert(not !callback_done); callback_ok := true; - stop (); - assert(!callback_done) + ignore (Sys.opaque_identity (ref ())); + assert(!callback_done); + stop () From 11411635bceaf95b47d90521054ae38edcd7fff9 Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Wed, 15 Apr 2020 13:33:06 +0200 Subject: [PATCH 04/12] Memprof: refactor realloc_trackst. --- runtime/memprof.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/runtime/memprof.c b/runtime/memprof.c index 8ae9ffae6..2a3e934dc 100644 --- a/runtime/memprof.c +++ b/runtime/memprof.c @@ -365,17 +365,18 @@ static struct tracking_state { /* Reallocate the [trackst] array if it is either too small or too large. + [grow] is the number of free cells needed. Returns 1 if reallocation succeeded --[trackst.alloc_len] is at least [trackst.len]--, and 0 otherwise. */ -static int realloc_trackst(void) +static int realloc_trackst(uintnat grow) { - uintnat new_alloc_len; + uintnat new_alloc_len, new_len = trackst.len + grow; struct tracked* new_entries; - if (trackst.len <= trackst.alloc_len && - (4*trackst.len >= trackst.alloc_len || + if (new_len <= trackst.alloc_len && + (4*new_len >= trackst.alloc_len || trackst.alloc_len == MIN_TRACKST_ALLOC_LEN)) return 1; - new_alloc_len = trackst.len * 2; + new_alloc_len = new_len * 2; if (new_alloc_len < MIN_TRACKST_ALLOC_LEN) new_alloc_len = MIN_TRACKST_ALLOC_LEN; new_entries = caml_stat_resize_noexc(trackst.entries, @@ -391,11 +392,9 @@ Caml_inline uintnat new_tracked(uintnat n_samples, uintnat wosize, value block, value user_data) { struct tracked *t; - trackst.len++; - if (!realloc_trackst()) { - trackst.len--; + if (!realloc_trackst(1)) return Invalid_index; - } + trackst.len++; t = &trackst.entries[trackst.len - 1]; t->block = block; t->n_samples = n_samples; @@ -547,7 +546,7 @@ static void flush_deleted(void) trackst.delete = trackst.len = j; CAMLassert(trackst.callback <= trackst.len); CAMLassert(trackst.young <= trackst.len); - realloc_trackst(); + realloc_trackst(0); } static void check_action_pending(void) From 13a874cf3f0c87ada00f8dab4122f0d536462b05 Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Mon, 8 Jun 2020 18:25:57 +0200 Subject: [PATCH 05/12] Memprof : get rid of the tracking_state struct, and create an entry_array struct, which only contains information specific to the array. --- runtime/memprof.c | 214 +++++++++++++++++++++++----------------------- 1 file changed, 109 insertions(+), 105 deletions(-) diff --git a/runtime/memprof.c b/runtime/memprof.c index 2a3e934dc..a12a122f7 100644 --- a/runtime/memprof.c +++ b/runtime/memprof.c @@ -344,46 +344,46 @@ struct tracked { /* When an entry is deleted, its index is replaced by that integer. */ #define Invalid_index (~(uintnat)0) - -static struct tracking_state { - struct tracked* entries; - /* The allocated capacity of the entries array */ - uintnat alloc_len; - /* The number of active entries. (len <= alloc_len) */ - uintnat len; +/* A resizable array of entries */ +struct entry_array { + struct tracked* t; + uintnat min_alloc_len, alloc_len, len; /* Before this position, the [block] and [user_data] fields point to - the major heap (young <= len). */ - uintnat young; - /* There are no pending callbacks before this position (callback <= len). */ - uintnat callback; - /* There are no blocks to be deleted before this position */ - uintnat delete; -} trackst; + the major heap ([young <= len]). */ + uintnat young_idx; + /* There are no blocks to be deleted before this position + ([delete_idx <= entrieslen]). */ + uintnat delete_idx; +}; -#define MIN_TRACKST_ALLOC_LEN 128 +#define MIN_ENTRIES_ALLOC_LEN 128 +static struct entry_array entries = + { NULL, MIN_ENTRIES_ALLOC_LEN, 0, 0, 0, 0 }; -/* Reallocate the [trackst] array if it is either too small or too +/* There are no pending callbacks before this position + ([callback_idx <= len]). */ +static uintnat callback_idx; + +/* Reallocate the [ea] array if it is either too small or too large. [grow] is the number of free cells needed. - Returns 1 if reallocation succeeded --[trackst.alloc_len] is at - least [trackst.len]--, and 0 otherwise. */ -static int realloc_trackst(uintnat grow) + Returns 1 if reallocation succeeded --[ea->alloc_len] is at + least [ea->len+grow]--, and 0 otherwise. */ +static int realloc_entries(struct entry_array* ea, uintnat grow) { - uintnat new_alloc_len, new_len = trackst.len + grow; - struct tracked* new_entries; - if (new_len <= trackst.alloc_len && - (4*new_len >= trackst.alloc_len || - trackst.alloc_len == MIN_TRACKST_ALLOC_LEN)) + uintnat new_alloc_len, new_len = ea->len + grow; + struct tracked* new_t; + if (new_len <= ea->alloc_len && + (4*new_len >= ea->alloc_len || ea->alloc_len == ea->min_alloc_len)) return 1; new_alloc_len = new_len * 2; - if (new_alloc_len < MIN_TRACKST_ALLOC_LEN) - new_alloc_len = MIN_TRACKST_ALLOC_LEN; - new_entries = caml_stat_resize_noexc(trackst.entries, - new_alloc_len * sizeof(struct tracked)); - if (new_entries == NULL) return 0; - trackst.entries = new_entries; - trackst.alloc_len = new_alloc_len; + if (new_alloc_len < ea->min_alloc_len) + new_alloc_len = ea->min_alloc_len; + new_t = caml_stat_resize_noexc(ea->t, new_alloc_len * sizeof(struct tracked)); + if (new_t == NULL) return 0; + ea->t = new_t; + ea->alloc_len = new_alloc_len; return 1; } @@ -392,10 +392,10 @@ Caml_inline uintnat new_tracked(uintnat n_samples, uintnat wosize, value block, value user_data) { struct tracked *t; - if (!realloc_trackst(1)) + if (!realloc_entries(&entries, 1)) return Invalid_index; - trackst.len++; - t = &trackst.entries[trackst.len - 1]; + entries.len++; + t = &entries.t[entries.len - 1]; t->block = block; t->n_samples = n_samples; t->wosize = wosize; @@ -408,16 +408,16 @@ Caml_inline uintnat new_tracked(uintnat n_samples, uintnat wosize, t->cb_alloc_called = t->cb_promote_called = t->cb_dealloc_called = 0; t->deleted = 0; t->callback_running = 0; - return trackst.len - 1; + return entries.len - 1; } static void mark_deleted(uintnat t_idx) { - struct tracked* t = &trackst.entries[t_idx]; + struct tracked* t = &entries.t[t_idx]; t->deleted = 1; t->user_data = Val_unit; t->block = Val_unit; - if (t_idx < trackst.delete) trackst.delete = t_idx; + if (t_idx < entries.delete_idx) entries.delete_idx = t_idx; CAMLassert(t->idx_ptr == NULL); } @@ -426,7 +426,7 @@ static void mark_deleted(uintnat t_idx) Otherwise, the return value is a [Some(...)] block. */ Caml_inline value run_callback_exn(uintnat *t_idx, value cb, value param) { - struct tracked* t = &trackst.entries[*t_idx]; + struct tracked* t = &entries.t[*t_idx]; value res; CAMLassert(!t->callback_running && t->idx_ptr == NULL); CAMLassert(lambda > 0.); @@ -437,10 +437,10 @@ Caml_inline value run_callback_exn(uintnat *t_idx, value cb, value param) callback_running = 0; /* The call above can modify [*t_idx] and thus invalidate [t]. */ if (*t_idx == Invalid_index) { - /* Make sure this entry has not been removed by [caml_memprof_set] */ + /* Make sure this entry has not been removed by [caml_memprof_stop] */ return Val_unit; } - t = &trackst.entries[*t_idx]; + t = &entries.t[*t_idx]; t->idx_ptr = NULL; t->callback_running = 0; if (Is_exception_result(res) || res == Val_unit) { @@ -455,8 +455,8 @@ Caml_inline value run_callback_exn(uintnat *t_idx, value cb, value param) /* Run all the needed callbacks for a given entry. In case of a thread context switch during a callback, this can be called in a reetrant way. - If [*t_idx] equals [trackst.callback], then this function - increments [trackst.callback]. + If [*t_idx] equals [callback_idx], then this function + increments [callback_idx]. The index of the entry may change. It is set to [Invalid_index] if the entry is discarded. Returns: @@ -466,8 +466,8 @@ Caml_inline value run_callback_exn(uintnat *t_idx, value cb, value param) static value handle_entry_callbacks_exn(uintnat* t_idx) { value sample_info, res, user_data; /* No need to make these roots */ - struct tracked* t = &trackst.entries[*t_idx]; - if (*t_idx == trackst.callback) trackst.callback++; + struct tracked* t = &entries.t[*t_idx]; + if (*t_idx == callback_idx) callback_idx++; if (t->deleted || t->callback_running) return Val_unit; @@ -489,11 +489,11 @@ static value handle_entry_callbacks_exn(uintnat* t_idx) return res; CAMLassert(!Is_exception_result(res) && Is_block(res) && Tag_val(res) == 0 && Wosize_val(res) == 1); - t = &trackst.entries[*t_idx]; + t = &entries.t[*t_idx]; t->user_data = Field(res, 0); if (Is_block(t->user_data) && Is_young(t->user_data) && - *t_idx < trackst.young) - trackst.young = *t_idx; + *t_idx < entries.young_idx) + entries.young_idx = *t_idx; } if (t->promoted && !t->cb_promote_called) { @@ -505,11 +505,11 @@ static value handle_entry_callbacks_exn(uintnat* t_idx) return res; CAMLassert(!Is_exception_result(res) && Is_block(res) && Tag_val(res) == 0 && Wosize_val(res) == 1); - t = &trackst.entries[*t_idx]; + t = &entries.t[*t_idx]; t->user_data = Field(res, 0); if (Is_block(t->user_data) && Is_young(t->user_data) && - *t_idx < trackst.young) - trackst.young = *t_idx; + *t_idx < entries.young_idx) + entries.young_idx = *t_idx; } if (t->deallocated && !t->cb_dealloc_called) { @@ -528,30 +528,34 @@ static value handle_entry_callbacks_exn(uintnat* t_idx) return Val_unit; } -/* Remove any deleted entries, updating callback and young */ -static void flush_deleted(void) +/* Remove any deleted entries from the entries array, updating + [ea->young_idx] and [callback_idx] if [ea == &entries]. */ +static void flush_deleted(struct entry_array* ea) { - uintnat i = trackst.delete, j = i; - while (i < trackst.len) { - if (!trackst.entries[i].deleted) { - if (trackst.entries[i].idx_ptr != NULL) - *trackst.entries[i].idx_ptr = j; - trackst.entries[j] = trackst.entries[i]; + uintnat i, j; + + if (ea == NULL) return; + + j = i = ea->delete_idx; + while (i < ea->len) { + if (!ea->t[i].deleted) { + if (ea->t[i].idx_ptr != NULL) *ea->t[i].idx_ptr = j; + ea->t[j] = ea->t[i]; j++; } i++; - if (trackst.young == i) trackst.young = j; - if (trackst.callback == i) trackst.callback = j; + if (ea->young_idx == i) ea->young_idx = j; + if (ea == &entries && callback_idx == i) callback_idx = j; } - trackst.delete = trackst.len = j; - CAMLassert(trackst.callback <= trackst.len); - CAMLassert(trackst.young <= trackst.len); - realloc_trackst(0); + ea->delete_idx = ea->len = j; + CAMLassert(ea != &entries || callback_idx <= ea->len); + CAMLassert(ea->young_idx <= ea->len); + realloc_entries(ea, 0); } static void check_action_pending(void) { - if (!suspended && trackst.callback < trackst.len) + if (!suspended && callback_idx < entries.len) caml_set_action_pending(); } @@ -567,16 +571,16 @@ void caml_memprof_set_suspended(int s) value caml_memprof_handle_postponed_exn(void) { value res = Val_unit; - if (suspended || trackst.callback >= trackst.len) return res; + if (suspended || callback_idx >= entries.len) return res; caml_memprof_set_suspended(1); - while (trackst.callback < trackst.len) { - uintnat i = trackst.callback; + while (callback_idx < entries.len) { + uintnat i = callback_idx; res = handle_entry_callbacks_exn(&i); if (Is_exception_result(res)) break; } caml_memprof_set_suspended(0); - flush_deleted(); + flush_deleted(&entries); return res; } @@ -584,15 +588,15 @@ void caml_memprof_oldify_young_roots(void) { uintnat i; /* This loop should always have a small number of iterations (when - compared to the size of the minor heap), because the young + compared to the size of the minor heap), because the young_idx pointer should always be close to the end of the array. Indeed, it is only moved back when returning from a callback triggered by allocation or promotion, which can only happen for blocks - allocated recently, which are close to the end of the trackst + allocated recently, which are close to the end of the entries array. */ - for (i = trackst.young; i < trackst.len; i++) - caml_oldify_one(trackst.entries[i].user_data, - &trackst.entries[i].user_data); + for (i = entries.young_idx; i < entries.len; i++) + caml_oldify_one(entries.t[i].user_data, + &entries.t[i].user_data); } void caml_memprof_minor_update(void) @@ -600,8 +604,8 @@ void caml_memprof_minor_update(void) uintnat i; /* See comment in [caml_memprof_oldify_young_roots] for the number of iterations of this loop. */ - for (i = trackst.young; i < trackst.len; i++) { - struct tracked *t = &trackst.entries[i]; + for (i = entries.young_idx; i < entries.len; i++) { + struct tracked *t = &entries.t[i]; CAMLassert(Is_block(t->block) || t->deleted || t->deallocated || Is_placeholder(t->block)); if (Is_block(t->block) && Is_young(t->block)) { @@ -617,25 +621,25 @@ void caml_memprof_minor_update(void) } } } - if (trackst.callback > trackst.young) { - trackst.callback = trackst.young; + if (callback_idx > entries.young_idx) { + callback_idx = entries.young_idx; check_action_pending(); } - trackst.young = trackst.len; + entries.young_idx = entries.len; } void caml_memprof_do_roots(scanning_action f) { uintnat i; - for (i = 0; i < trackst.len; i++) - f(trackst.entries[i].user_data, &trackst.entries[i].user_data); + for (i = 0; i < entries.len; i++) + f(entries.t[i].user_data, &entries.t[i].user_data); } void caml_memprof_update_clean_phase(void) { uintnat i; - for (i = 0; i < trackst.len; i++) { - struct tracked *t = &trackst.entries[i]; + for (i = 0; i < entries.len; i++) { + struct tracked *t = &entries.t[i]; if (Is_block(t->block) && !Is_young(t->block)) { CAMLassert(Is_in_heap(t->block)); CAMLassert(!t->alloc_young || t->promoted); @@ -645,15 +649,15 @@ void caml_memprof_update_clean_phase(void) } } } - trackst.callback = 0; + callback_idx = 0; check_action_pending(); } void caml_memprof_invert_tracked(void) { uintnat i; - for (i = 0; i < trackst.len; i++) - caml_invert_root(trackst.entries[i].block, &trackst.entries[i].block); + for (i = 0; i < entries.len; i++) + caml_invert_root(entries.t[i].block, &entries.t[i].block); } /**** Sampling procedures ****/ @@ -806,10 +810,10 @@ void caml_memprof_track_young(uintnat wosize, int from_caml, } idx_tab[0] = first_idx; if (idx_tab[0] != Invalid_index) - trackst.entries[idx_tab[0]].idx_ptr = &idx_tab[0]; + entries.t[idx_tab[0]].idx_ptr = &idx_tab[0]; } - /* Usually, trackst.entries[...].idx_ptr is owned by the thread + /* Usually, entries.t[...].idx_ptr is owned by the thread running a callback for the entry, if any. Here, we take ownership of idx_ptr until the end of the function. @@ -819,7 +823,7 @@ void caml_memprof_track_young(uintnat wosize, int from_caml, do not run on Placeholder values */ idx_ptr = &idx_tab[allocs_sampled]; *idx_ptr = t_idx; - trackst.entries[*idx_ptr].idx_ptr = idx_ptr; + entries.t[*idx_ptr].idx_ptr = idx_ptr; allocs_sampled++; } } @@ -831,27 +835,27 @@ void caml_memprof_track_young(uintnat wosize, int from_caml, pending flag, but we need to make sure that the action pending flag is not set systematically, which is to be expected, since [new_tracked] created a new block without updating - [trackst.callback]. Fortunately, [handle_entry_callback_exn] - increments [trackst.callback] if it is equal to [t_idx]. */ + [callback_idx]. Fortunately, [handle_entry_callback_exn] + increments [callback_idx] if it is equal to [t_idx]. */ /* This condition happens either in the case of an exception or if one of the callbacks returned [None]. If these cases happen frequently, then we need to call [flush_deleted] somewhere to prevent a leak. */ if (has_delete) - flush_deleted(); + flush_deleted(&entries); if (Is_exception_result(res)) { for (i = 0; i < allocs_sampled; i++) if (idx_tab[i] != Invalid_index) { - struct tracked* t = &trackst.entries[idx_tab[i]]; + struct tracked* t = &entries.t[idx_tab[i]]; /* The allocations are cancelled because of the exception, but this callback has already been called. We simulate a deallocation. */ t->block = Val_unit; t->deallocated = 1; - if (trackst.callback > idx_tab[i]) { - trackst.callback = idx_tab[i]; + if (callback_idx > idx_tab[i]) { + callback_idx = idx_tab[i]; check_action_pending(); } } @@ -882,11 +886,11 @@ void caml_memprof_track_young(uintnat wosize, int from_caml, (uninitialized) block. This is correct because the allocation and initialization happens right after returning from [caml_memprof_track_young]. */ - struct tracked *t = &trackst.entries[idx_tab[i]]; + struct tracked *t = &entries.t[idx_tab[i]]; t->block = Val_hp(Caml_state->young_ptr + Offs_placeholder(t->block)); t->idx_ptr = NULL; CAMLassert(t->cb_alloc_called); - if (idx_tab[i] < trackst.young) trackst.young = idx_tab[i]; + if (idx_tab[i] < entries.young_idx) entries.young_idx = idx_tab[i]; } } if (idx_tab != &first_idx) caml_stat_free(idx_tab); @@ -977,14 +981,14 @@ CAMLprim value caml_memprof_stop(value unit) if (!started) caml_failwith("Gc.Memprof.stop: not started."); /* Discard the tracked blocks. */ - for (i = 0; i < trackst.len; i++) - if (trackst.entries[i].idx_ptr != NULL) - *trackst.entries[i].idx_ptr = Invalid_index; - trackst.len = 0; - trackst.callback = trackst.young = trackst.delete = 0; - caml_stat_free(trackst.entries); - trackst.entries = NULL; - trackst.alloc_len = 0; + for (i = 0; i < entries.len; i++) + if (entries.t[i].idx_ptr != NULL) + *entries.t[i].idx_ptr = Invalid_index; + entries.len = 0; + callback_idx = entries.young_idx = entries.delete_idx = 0; + caml_stat_free(entries.t); + entries.t = NULL; + entries.alloc_len = 0; lambda = 0; // Reset the memprof trigger in order to make sure we won't enter From ea52bec84b35611a2cdf25d0ca6e9794e980815b Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Fri, 9 Oct 2020 16:26:38 +0200 Subject: [PATCH 06/12] Memprof: thread-local context is now stored as a pointer in systhread's data structures. Reasons: - Better abstraction in memprof.h - Simpler Saving/restore functions - We introduce a current's thread context, so that we don't need to do a spacial case for the current thread --- otherlibs/systhreads/st_stubs.c | 11 ++--- runtime/caml/memprof.h | 13 +++--- runtime/memprof.c | 77 ++++++++++++++++++--------------- 3 files changed, 54 insertions(+), 47 deletions(-) diff --git a/otherlibs/systhreads/st_stubs.c b/otherlibs/systhreads/st_stubs.c index 8d6b93742..6164bb425 100644 --- a/otherlibs/systhreads/st_stubs.c +++ b/otherlibs/systhreads/st_stubs.c @@ -93,7 +93,7 @@ struct caml_thread_struct { int backtrace_pos; /* Saved Caml_state->backtrace_pos */ backtrace_slot * backtrace_buffer; /* Saved Caml_state->backtrace_buffer */ value backtrace_last_exn; /* Saved Caml_state->backtrace_last_exn (root) */ - struct caml_memprof_th_ctx memprof_ctx; + struct caml_memprof_th_ctx *memprof_ctx; }; typedef struct caml_thread_struct * caml_thread_t; @@ -196,7 +196,7 @@ Caml_inline void caml_thread_save_runtime_state(void) curr_thread->backtrace_pos = Caml_state->backtrace_pos; curr_thread->backtrace_buffer = Caml_state->backtrace_buffer; curr_thread->backtrace_last_exn = Caml_state->backtrace_last_exn; - caml_memprof_save_th_ctx(&curr_thread->memprof_ctx); + caml_memprof_leave_thread(); } Caml_inline void caml_thread_restore_runtime_state(void) @@ -219,7 +219,7 @@ Caml_inline void caml_thread_restore_runtime_state(void) Caml_state->backtrace_pos = curr_thread->backtrace_pos; Caml_state->backtrace_buffer = curr_thread->backtrace_buffer; Caml_state->backtrace_last_exn = curr_thread->backtrace_last_exn; - caml_memprof_restore_th_ctx(&curr_thread->memprof_ctx); + caml_memprof_enter_thread(curr_thread->memprof_ctx); } /* Hooks for caml_enter_blocking_section and caml_leave_blocking_section */ @@ -349,7 +349,7 @@ static caml_thread_t caml_thread_new_info(void) th->backtrace_pos = 0; th->backtrace_buffer = NULL; th->backtrace_last_exn = Val_unit; - caml_memprof_init_th_ctx(&th->memprof_ctx); + th->memprof_ctx = caml_memprof_new_th_ctx(); return th; } @@ -450,6 +450,7 @@ CAMLprim value caml_thread_initialize(value unit) /* ML */ #ifdef NATIVE_CODE curr_thread->exit_buf = &caml_termination_jmpbuf; #endif + curr_thread->memprof_ctx = &caml_memprof_main_ctx; /* The stack-related fields will be filled in at the next caml_enter_blocking_section */ /* Associate the thread descriptor with the thread */ @@ -499,7 +500,7 @@ static void caml_thread_stop(void) below uses accurate information. */ caml_thread_save_runtime_state(); /* Tell memprof that this thread is terminating. */ - caml_memprof_stop_th_ctx(&curr_thread->memprof_ctx); + caml_memprof_delete_th_ctx(curr_thread->memprof_ctx); /* Signal that the thread has terminated */ caml_threadstatus_terminate(Terminated(curr_thread->descr)); /* Remove th from the doubly-linked list of threads and free its info block */ diff --git a/runtime/caml/memprof.h b/runtime/caml/memprof.h index 7625273c8..2e325baba 100644 --- a/runtime/caml/memprof.h +++ b/runtime/caml/memprof.h @@ -40,13 +40,12 @@ extern void caml_memprof_do_roots(scanning_action f); extern void caml_memprof_update_clean_phase(void); extern void caml_memprof_invert_tracked(void); -struct caml_memprof_th_ctx { - int suspended, callback_running; -}; -CAMLextern void caml_memprof_init_th_ctx(struct caml_memprof_th_ctx*); -CAMLextern void caml_memprof_stop_th_ctx(struct caml_memprof_th_ctx*); -CAMLextern void caml_memprof_save_th_ctx(struct caml_memprof_th_ctx*); -CAMLextern void caml_memprof_restore_th_ctx(const struct caml_memprof_th_ctx*); +CAMLextern struct caml_memprof_th_ctx caml_memprof_main_ctx; + +CAMLextern struct caml_memprof_th_ctx* caml_memprof_new_th_ctx(void); +CAMLextern void caml_memprof_leave_thread(void); +CAMLextern void caml_memprof_enter_thread(struct caml_memprof_th_ctx*); +CAMLextern void caml_memprof_delete_th_ctx(struct caml_memprof_th_ctx*); #endif diff --git a/runtime/memprof.c b/runtime/memprof.c index a12a122f7..9f7a8c205 100644 --- a/runtime/memprof.c +++ b/runtime/memprof.c @@ -40,20 +40,11 @@ static uint32_t rand_pos; /* [lambda] is the mean number of samples for each allocated word (including block headers). */ static double lambda = 0; - /* Precomputed value of [1/log(1-lambda)], for fast sampling of - geometric distribution. - Dummy if [lambda = 0]. */ +/* Precomputed value of [1/log(1-lambda)], for fast sampling of + geometric distribution. + Dummy if [lambda = 0]. */ static float one_log1m_lambda; -/* [suspended] is used for masking memprof callbacks when - a callback is running or when an uncaught exception handler is - called. */ -static int suspended = 0; - -/* [callback_running] is used to trigger a fatal error whenever - [Thread.exit] is called from a callback. */ -static int callback_running = 0; - static intnat callstack_size; /* accessors for the OCaml type [Gc.Memprof.tracker], @@ -66,6 +57,19 @@ static intnat callstack_size; static value tracker; +/* Structure for thread-local variables. */ +struct caml_memprof_th_ctx { + /* [suspended] is used for masking memprof callbacks when + a callback is running or when an uncaught exception handler is + called. */ + int suspended; + + /* [callback_running] is used to trigger a fatal error whenever + [Thread.exit] is called from a callback. */ + int callback_running; +} caml_memprof_main_ctx = { 0, 0 }; +static struct caml_memprof_th_ctx* local = &caml_memprof_main_ctx; + /* Pointer to the word following the next sample in the minor heap. Equals [Caml_state->young_alloc_start] if no sampling is planned in the current minor heap. @@ -263,14 +267,14 @@ static value capture_callstack_postponed() /* In this version, we are allowed to call the GC, so we use [caml_alloc], which is more efficient since it uses the minor heap. - Should be called with [suspended == 1] */ + Should be called with [local->suspended == 1] */ static value capture_callstack(int alloc_idx) { value res; intnat callstack_len = caml_collect_current_callstack(&callstack_buffer, &callstack_buffer_len, callstack_size, alloc_idx); - CAMLassert(suspended); + CAMLassert(local->suspended); res = caml_alloc(callstack_len, 0); memcpy(Op_val(res), callstack_buffer, sizeof(value) * callstack_len); if (callstack_buffer_len > 256 && callstack_buffer_len > callstack_len * 8) { @@ -352,7 +356,7 @@ struct entry_array { the major heap ([young <= len]). */ uintnat young_idx; /* There are no blocks to be deleted before this position - ([delete_idx <= entrieslen]). */ + ([delete_idx <= len]). */ uintnat delete_idx; }; @@ -361,8 +365,8 @@ struct entry_array { static struct entry_array entries = { NULL, MIN_ENTRIES_ALLOC_LEN, 0, 0, 0, 0 }; -/* There are no pending callbacks before this position - ([callback_idx <= len]). */ +/* There are no pending callbacks in [entries] before this + position ([callback_idx <= entries.len]). */ static uintnat callback_idx; /* Reallocate the [ea] array if it is either too small or too @@ -431,10 +435,10 @@ Caml_inline value run_callback_exn(uintnat *t_idx, value cb, value param) CAMLassert(!t->callback_running && t->idx_ptr == NULL); CAMLassert(lambda > 0.); - callback_running = t->callback_running = 1; + local->callback_running = t->callback_running = 1; t->idx_ptr = t_idx; res = caml_callback_exn(cb, param); - callback_running = 0; + local->callback_running = 0; /* The call above can modify [*t_idx] and thus invalidate [t]. */ if (*t_idx == Invalid_index) { /* Make sure this entry has not been removed by [caml_memprof_stop] */ @@ -555,13 +559,13 @@ static void flush_deleted(struct entry_array* ea) static void check_action_pending(void) { - if (!suspended && callback_idx < entries.len) + if (!local->suspended && callback_idx < entries.len) caml_set_action_pending(); } void caml_memprof_set_suspended(int s) { - suspended = s; + local->suspended = s; caml_memprof_renew_minor_sample(); if (!s) check_action_pending(); } @@ -571,7 +575,7 @@ void caml_memprof_set_suspended(int s) value caml_memprof_handle_postponed_exn(void) { value res = Val_unit; - if (suspended || callback_idx >= entries.len) return res; + if (local->suspended || callback_idx >= entries.len) return res; caml_memprof_set_suspended(1); while (callback_idx < entries.len) { @@ -668,7 +672,7 @@ void caml_memprof_track_alloc_shr(value block) value callstack = 0; CAMLassert(Is_in_heap(block)); - if (lambda == 0 || suspended) return; + if (lambda == 0 || local->suspended) return; n_samples = rand_binom(Whsize_val(block)); if (n_samples == 0) return; @@ -700,8 +704,7 @@ static void shift_sample(uintnat n) geometric distribution. */ void caml_memprof_renew_minor_sample(void) { - - if (lambda == 0 || suspended) /* No trigger in the current minor heap. */ + if (lambda == 0 || local->suspended) /* No trigger in the current minor heap. */ caml_memprof_young_trigger = Caml_state->young_alloc_start; else { uintnat geom = rand_geom(); @@ -734,7 +737,7 @@ void caml_memprof_track_young(uintnat wosize, int from_caml, equal to [Caml_state->young_alloc_start]. But this function is only called with [Caml_state->young_alloc_start <= Caml_state->young_ptr < caml_memprof_young_trigger], which is contradictory. */ - CAMLassert(!suspended && lambda > 0); + CAMLassert(!local->suspended && lambda > 0); if (!from_caml) { unsigned n_samples = 1 + @@ -907,7 +910,7 @@ void caml_memprof_track_interned(header_t* block, header_t* blockend) value callstack = 0; int is_young = Is_young(Val_hp(block)); - if (lambda == 0 || suspended) return; + if (lambda == 0 || local->suspended) return; p = block; while (1) { @@ -1007,30 +1010,34 @@ CAMLprim value caml_memprof_stop(value unit) /**** Interface with systhread. ****/ -CAMLexport void caml_memprof_init_th_ctx(struct caml_memprof_th_ctx* ctx) +CAMLexport struct caml_memprof_th_ctx* caml_memprof_new_th_ctx() { + struct caml_memprof_th_ctx* ctx = + caml_stat_alloc(sizeof(struct caml_memprof_th_ctx)); ctx->suspended = 0; ctx->callback_running = 0; + return ctx; } -CAMLexport void caml_memprof_stop_th_ctx(struct caml_memprof_th_ctx* ctx) +CAMLexport void caml_memprof_delete_th_ctx(struct caml_memprof_th_ctx* ctx) { /* Make sure that no memprof callback is being executed in this thread. If so, memprof data structures may have pointers to the thread's stack. */ if (ctx->callback_running) caml_fatal_error("Thread.exit called from a memprof callback."); + if (ctx == local) local = NULL; + if (ctx != &caml_memprof_main_ctx) caml_stat_free(ctx); } -CAMLexport void caml_memprof_save_th_ctx(struct caml_memprof_th_ctx* ctx) +CAMLexport void caml_memprof_leave_thread(void) { - ctx->suspended = suspended; - ctx->callback_running = callback_running; + local = NULL; } -CAMLexport void caml_memprof_restore_th_ctx - (const struct caml_memprof_th_ctx* ctx) +CAMLexport void caml_memprof_enter_thread(struct caml_memprof_th_ctx* ctx) { - callback_running = ctx->callback_running; + CAMLassert(local == NULL); + local = ctx; caml_memprof_set_suspended(ctx->suspended); } From 18c0f9556040b9d3210425915faea2b4f46e91c2 Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Fri, 12 Jun 2020 14:36:05 +0200 Subject: [PATCH 07/12] Memprof : Refactor [run_callback_exn] so that it also stores the value returned by the callback. --- runtime/memprof.c | 49 +++++++++++++++++------------------------------ 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/runtime/memprof.c b/runtime/memprof.c index 9f7a8c205..aeb75107a 100644 --- a/runtime/memprof.c +++ b/runtime/memprof.c @@ -329,9 +329,9 @@ struct tracked { /* Whether a callback is currently running for this entry. */ unsigned int callback_running : 1; - /* Pointer to the [t_idx] variable in the [run_callback] frame which + /* Pointer to the [t_idx] variable in the [run_callback_exn] frame which is currently running the callback for this entry. This is needed - to make [run_callback] reetrant, in the case it is called + to make [run_callback_exn] reetrant, in the case it is called simultaneously by several threads. */ uintnat* idx_ptr; }; @@ -425,9 +425,6 @@ static void mark_deleted(uintnat t_idx) CAMLassert(t->idx_ptr == NULL); } -/* The return value is an exception or [Val_unit] iff [*t_idx] is set to - [Invalid_index]. In this case, the entry is deleted. - Otherwise, the return value is a [Some(...)] block. */ Caml_inline value run_callback_exn(uintnat *t_idx, value cb, value param) { struct tracked* t = &entries.t[*t_idx]; @@ -437,6 +434,7 @@ Caml_inline value run_callback_exn(uintnat *t_idx, value cb, value param) local->callback_running = t->callback_running = 1; t->idx_ptr = t_idx; + t->user_data = Val_unit; /* Release root. */ res = caml_callback_exn(cb, param); local->callback_running = 0; /* The call above can modify [*t_idx] and thus invalidate [t]. */ @@ -452,8 +450,17 @@ Caml_inline value run_callback_exn(uintnat *t_idx, value cb, value param) this entry. */ mark_deleted(*t_idx); *t_idx = Invalid_index; + return res; + } else { + /* Callback returned [Some _]. Store the value in [user_data]. */ + CAMLassert(!Is_exception_result(res) && Is_block(res) && Tag_val(res) == 0 + && Wosize_val(res) == 1); + t->user_data = Field(res, 0); + if (Is_block(t->user_data) && Is_young(t->user_data) && + *t_idx < entries.young_idx) + entries.young_idx = *t_idx; + return Val_unit; } - return res; } /* Run all the needed callbacks for a given entry. @@ -469,7 +476,7 @@ Caml_inline value run_callback_exn(uintnat *t_idx, value cb, value param) */ static value handle_entry_callbacks_exn(uintnat* t_idx) { - value sample_info, res, user_data; /* No need to make these roots */ + value sample_info, res; /* No need to make these roots */ struct tracked* t = &entries.t[*t_idx]; if (*t_idx == callback_idx) callback_idx++; @@ -485,47 +492,27 @@ static value handle_entry_callbacks_exn(uintnat* t_idx) Field(sample_info, 1) = Val_long(t->wosize); Field(sample_info, 2) = Val_long(t->unmarshalled); Field(sample_info, 3) = t->user_data; - t->user_data = Val_unit; res = run_callback_exn(t_idx, t->alloc_young ? Alloc_minor(tracker) : Alloc_major(tracker), sample_info); - if (*t_idx == Invalid_index) - return res; - CAMLassert(!Is_exception_result(res) && Is_block(res) && Tag_val(res) == 0 - && Wosize_val(res) == 1); + if (*t_idx == Invalid_index) return res; t = &entries.t[*t_idx]; - t->user_data = Field(res, 0); - if (Is_block(t->user_data) && Is_young(t->user_data) && - *t_idx < entries.young_idx) - entries.young_idx = *t_idx; } if (t->promoted && !t->cb_promote_called) { t->cb_promote_called = 1; - user_data = t->user_data; - t->user_data = Val_unit; - res = run_callback_exn(t_idx, Promote(tracker), user_data); - if (*t_idx == Invalid_index) - return res; - CAMLassert(!Is_exception_result(res) && Is_block(res) && Tag_val(res) == 0 - && Wosize_val(res) == 1); + res = run_callback_exn(t_idx, Promote(tracker), t->user_data); + if (*t_idx == Invalid_index) return res; t = &entries.t[*t_idx]; - t->user_data = Field(res, 0); - if (Is_block(t->user_data) && Is_young(t->user_data) && - *t_idx < entries.young_idx) - entries.young_idx = *t_idx; } if (t->deallocated && !t->cb_dealloc_called) { value cb = (t->promoted || !t->alloc_young) ? Dealloc_major(tracker) : Dealloc_minor(tracker); t->cb_dealloc_called = 1; - user_data = t->user_data; - t->user_data = Val_unit; - res = run_callback_exn(t_idx, cb, user_data); + res = run_callback_exn(t_idx, cb, t->user_data); /* [t] is invalid, but we do no longer use it. */ CAMLassert(*t_idx == Invalid_index); - CAMLassert(Is_exception_result(res) || res == Val_unit); return res; } From f83d71830dce28e5aa82310169d6fa59a0a54013 Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Fri, 12 Jun 2020 16:40:46 +0200 Subject: [PATCH 08/12] Memprof: provide the guarantee that an allocation callback is always run in the same thread the allocation takes place. This is done by using a local entry array for each thread, containing tracked blocks whose allocation callback has not yet been called. This allows some simplification in the code running callbacks for young allocations. Indeed, since the entry array is local to one thread, we know for sure that it cannot be modified during a callback, and therefore we no longer need to remember the indices of the corresponding new entries. --- otherlibs/systhreads/st_stubs.c | 16 +- otherlibs/systhreads/thread.ml | 11 +- runtime/caml/memprof.h | 3 + runtime/memprof.c | 636 ++++++++++-------- runtime/minor_gc.c | 3 +- runtime/startup_aux.c | 1 - stdlib/gc.mli | 3 + testsuite/tests/backtrace/callstack.reference | 2 +- .../tests/statmemprof/blocking_in_callback.ml | 40 +- .../tests/statmemprof/moved_while_blocking.ml | 76 +++ .../moved_while_blocking.reference | 13 + 11 files changed, 495 insertions(+), 309 deletions(-) create mode 100644 testsuite/tests/statmemprof/moved_while_blocking.ml create mode 100644 testsuite/tests/statmemprof/moved_while_blocking.reference diff --git a/otherlibs/systhreads/st_stubs.c b/otherlibs/systhreads/st_stubs.c index 6164bb425..5fb73ce6d 100644 --- a/otherlibs/systhreads/st_stubs.c +++ b/otherlibs/systhreads/st_stubs.c @@ -152,9 +152,7 @@ static void (*prev_scan_roots_hook) (scanning_action); static void caml_thread_scan_roots(scanning_action action) { - caml_thread_t th; - - th = curr_thread; + caml_thread_t th = curr_thread; do { (*action)(th->descr, &th->descr); (*action)(th->backtrace_last_exn, &th->backtrace_last_exn); @@ -174,6 +172,17 @@ static void caml_thread_scan_roots(scanning_action action) if (prev_scan_roots_hook != NULL) (*prev_scan_roots_hook)(action); } +/* Hook for iterating over Memprof's entries arrays */ + +static void memprof_ctx_iter(th_ctx_action f, void* data) +{ + caml_thread_t th = curr_thread; + do { + f(th->memprof_ctx, data); + th = th->next; + } while (th != curr_thread); +} + /* Saving and restoring runtime state in curr_thread */ Caml_inline void caml_thread_save_runtime_state(void) @@ -469,6 +478,7 @@ CAMLprim value caml_thread_initialize(value unit) /* ML */ caml_channel_mutex_unlock_exn = caml_io_mutex_unlock_exn; prev_stack_usage_hook = caml_stack_usage_hook; caml_stack_usage_hook = caml_thread_stack_usage; + caml_memprof_th_ctx_iter_hook = memprof_ctx_iter; /* Set up fork() to reinitialize the thread machinery in the child (PR#4577) */ st_atfork(caml_thread_reinitialize); diff --git a/otherlibs/systhreads/thread.ml b/otherlibs/systhreads/thread.ml index bf47d75b0..8a7569200 100644 --- a/otherlibs/systhreads/thread.ml +++ b/otherlibs/systhreads/thread.ml @@ -27,20 +27,27 @@ external yield : unit -> unit = "caml_thread_yield" external self : unit -> t = "caml_thread_self" [@@noalloc] external id : t -> int = "caml_thread_id" [@@noalloc] external join : t -> unit = "caml_thread_join" -external exit : unit -> unit = "caml_thread_exit" +external exit_stub : unit -> unit = "caml_thread_exit" (* For new, make sure the function passed to thread_new never raises an exception. *) +let[@inline never] check_memprof_cb () = ref () + let create fn arg = thread_new (fun () -> try - fn arg; () + fn arg; + ignore (Sys.opaque_identity (check_memprof_cb ())) with exn -> flush stdout; flush stderr; thread_uncaught_exception exn) +let exit () = + ignore (Sys.opaque_identity (check_memprof_cb ())); + exit_stub () + (* Thread.kill is currently not implemented due to problems with cleanup handlers on several platforms *) diff --git a/runtime/caml/memprof.h b/runtime/caml/memprof.h index 2e325baba..6f8ec75a2 100644 --- a/runtime/caml/memprof.h +++ b/runtime/caml/memprof.h @@ -47,6 +47,9 @@ CAMLextern void caml_memprof_leave_thread(void); CAMLextern void caml_memprof_enter_thread(struct caml_memprof_th_ctx*); CAMLextern void caml_memprof_delete_th_ctx(struct caml_memprof_th_ctx*); +typedef void (*th_ctx_action)(struct caml_memprof_th_ctx*, void*); +extern void (*caml_memprof_th_ctx_iter_hook)(th_ctx_action, void*); + #endif #endif /* CAML_MEMPROF_H */ diff --git a/runtime/memprof.c b/runtime/memprof.c index aeb75107a..a76990cda 100644 --- a/runtime/memprof.c +++ b/runtime/memprof.c @@ -57,6 +57,90 @@ static intnat callstack_size; static value tracker; +struct tracked { + /* Memory block being sampled. This is a weak GC root. */ + value block; + + /* Number of samples in this block. */ + uintnat n_samples; + + /* The size of this block. */ + uintnat wosize; + + /* The value returned by the previous callback for this block, or + the callstack if the alloc callback has not been called yet. + This is a strong GC root. */ + value user_data; + + /* Whether this block has been initially allocated in the minor heap. */ + unsigned int alloc_young : 1; + + /* Whether this block comes from unmarshalling. */ + unsigned int unmarshalled : 1; + + /* Whether this block has been promoted. Implies [alloc_young]. */ + unsigned int promoted : 1; + + /* Whether this block has been deallocated. */ + unsigned int deallocated : 1; + + /* Whether the allocation callback has been called depends on + whether the entry is in [local->entries] or in [entries_global]. */ + + /* Whether the promotion callback has been called. */ + unsigned int cb_promote_called : 1; + + /* Whether the deallocation callback has been called. */ + unsigned int cb_dealloc_called : 1; + + /* Whether this entry is deleted. */ + unsigned int deleted : 1; + + /* Whether a callback is currently running for this entry. */ + unsigned int callback_running : 1; + + /* Pointer to the [t_idx] variable in the [run_callback_exn] frame which + is currently running the callback for this entry. This is needed + to make [run_callback_exn] reetrant, in the case it is called + simultaneously by several threads. */ + uintnat* idx_ptr; +}; + +/* During the alloc callback for a minor allocation, the block being + sampled is not yet allocated. Instead, we place in the block field + a value computed with the following macro: */ +#define Placeholder_magic 0x04200000 +#define Placeholder_offs(offset) (Val_long(offset + Placeholder_magic)) +#define Offs_placeholder(block) (Long_val(block) & 0xFFFF) +#define Is_placeholder(block) \ + (Is_long(block) && (Long_val(block) & ~(uintnat)0xFFFF) == Placeholder_magic) + +/* When an entry is deleted, its index is replaced by that integer. */ +#define Invalid_index (~(uintnat)0) + +/* A resizable array of entries */ +struct entry_array { + struct tracked* t; + uintnat min_alloc_len, alloc_len, len; + /* Before this position, the [block] and [user_data] fields point to + the major heap ([young <= len]). */ + uintnat young_idx; + /* There are no blocks to be deleted before this position + ([delete_idx <= len]). */ + uintnat delete_idx; +}; + +#define MIN_ENTRIES_LOCAL_ALLOC_LEN 16 +#define MIN_ENTRIES_GLOBAL_ALLOC_LEN 128 + +/* Entries for other blocks. This variable is shared accross threads. */ +static struct entry_array entries_global = + { NULL, MIN_ENTRIES_GLOBAL_ALLOC_LEN, 0, 0, 0, 0 }; + +/* There are no pending callbacks in [entries_global] before this + position ([callback_idx <= entries_global.len]). */ +static uintnat callback_idx; + /* Structure for thread-local variables. */ struct caml_memprof_th_ctx { /* [suspended] is used for masking memprof callbacks when @@ -67,7 +151,11 @@ struct caml_memprof_th_ctx { /* [callback_running] is used to trigger a fatal error whenever [Thread.exit] is called from a callback. */ int callback_running; -} caml_memprof_main_ctx = { 0, 0 }; + + /* Entries for blocks whose alloc callback has not yet been called. */ + struct entry_array entries; +} caml_memprof_main_ctx = + { 0, 0, { NULL, MIN_ENTRIES_LOCAL_ALLOC_LEN, 0, 0, 0, 0 } }; static struct caml_memprof_th_ctx* local = &caml_memprof_main_ctx; /* Pointer to the word following the next sample in the minor @@ -285,89 +373,7 @@ static value capture_callstack(int alloc_idx) return res; } -/**** Data structures for tracked blocks. ****/ - -struct tracked { - /* Memory block being sampled. This is a weak GC root. */ - value block; - - /* Number of samples in this block. */ - uintnat n_samples; - - /* The size of this block. */ - uintnat wosize; - - /* The value returned by the previous callback for this block, or - the callstack if the alloc callback has not been called yet. - This is a strong GC root. */ - value user_data; - - /* Whether this block has been initially allocated in the minor heap. */ - unsigned int alloc_young : 1; - - /* Whether this block comes from unmarshalling. */ - unsigned int unmarshalled : 1; - - /* Whether this block has been promoted. Implies [alloc_young]. */ - unsigned int promoted : 1; - - /* Whether this block has been deallocated. */ - unsigned int deallocated : 1; - - /* Whether the allocation callback has been called. */ - unsigned int cb_alloc_called : 1; - - /* Whether the promotion callback has been called. */ - unsigned int cb_promote_called : 1; - - /* Whether the deallocation callback has been called. */ - unsigned int cb_dealloc_called : 1; - - /* Whether this entry is deleted. */ - unsigned int deleted : 1; - - /* Whether a callback is currently running for this entry. */ - unsigned int callback_running : 1; - - /* Pointer to the [t_idx] variable in the [run_callback_exn] frame which - is currently running the callback for this entry. This is needed - to make [run_callback_exn] reetrant, in the case it is called - simultaneously by several threads. */ - uintnat* idx_ptr; -}; - -/* During the alloc callback for a minor allocation, the block being - sampled is not yet allocated. Instead, we place in the block field - a value computed with the following macro: */ -#define Placeholder_magic 0x04200000 -#define Placeholder_offs(offset) (Val_long(offset + Placeholder_magic)) -#define Offs_placeholder(block) (Long_val(block) & 0xFFFF) -#define Is_placeholder(block) \ - (Is_long(block) && (Long_val(block) & ~(uintnat)0xFFFF) == Placeholder_magic) - -/* When an entry is deleted, its index is replaced by that integer. */ -#define Invalid_index (~(uintnat)0) - -/* A resizable array of entries */ -struct entry_array { - struct tracked* t; - uintnat min_alloc_len, alloc_len, len; - /* Before this position, the [block] and [user_data] fields point to - the major heap ([young <= len]). */ - uintnat young_idx; - /* There are no blocks to be deleted before this position - ([delete_idx <= len]). */ - uintnat delete_idx; -}; - -#define MIN_ENTRIES_ALLOC_LEN 128 - -static struct entry_array entries = - { NULL, MIN_ENTRIES_ALLOC_LEN, 0, 0, 0, 0 }; - -/* There are no pending callbacks in [entries] before this - position ([callback_idx <= entries.len]). */ -static uintnat callback_idx; +/**** Managing data structures for tracked blocks. ****/ /* Reallocate the [ea] array if it is either too small or too large. @@ -396,10 +402,10 @@ Caml_inline uintnat new_tracked(uintnat n_samples, uintnat wosize, value block, value user_data) { struct tracked *t; - if (!realloc_entries(&entries, 1)) + if (!realloc_entries(&local->entries, 1)) return Invalid_index; - entries.len++; - t = &entries.t[entries.len - 1]; + local->entries.len++; + t = &local->entries.t[local->entries.len - 1]; t->block = block; t->n_samples = n_samples; t->wosize = wosize; @@ -409,47 +415,47 @@ Caml_inline uintnat new_tracked(uintnat n_samples, uintnat wosize, t->unmarshalled = is_unmarshalled; t->promoted = 0; t->deallocated = 0; - t->cb_alloc_called = t->cb_promote_called = t->cb_dealloc_called = 0; + t->cb_promote_called = t->cb_dealloc_called = 0; t->deleted = 0; t->callback_running = 0; - return entries.len - 1; + return local->entries.len - 1; } -static void mark_deleted(uintnat t_idx) +static void mark_deleted(struct entry_array* ea, uintnat t_idx) { - struct tracked* t = &entries.t[t_idx]; + struct tracked* t = &ea->t[t_idx]; t->deleted = 1; t->user_data = Val_unit; t->block = Val_unit; - if (t_idx < entries.delete_idx) entries.delete_idx = t_idx; + if (t_idx < ea->delete_idx) ea->delete_idx = t_idx; CAMLassert(t->idx_ptr == NULL); } -Caml_inline value run_callback_exn(uintnat *t_idx, value cb, value param) +Caml_inline value run_callback_exn( + struct entry_array* ea, uintnat t_idx, value cb, value param) { - struct tracked* t = &entries.t[*t_idx]; + struct tracked* t = &ea->t[t_idx]; value res; CAMLassert(!t->callback_running && t->idx_ptr == NULL); CAMLassert(lambda > 0.); local->callback_running = t->callback_running = 1; - t->idx_ptr = t_idx; + t->idx_ptr = &t_idx; t->user_data = Val_unit; /* Release root. */ res = caml_callback_exn(cb, param); local->callback_running = 0; - /* The call above can modify [*t_idx] and thus invalidate [t]. */ - if (*t_idx == Invalid_index) { + /* The call above can modify [t_idx] and thus invalidate [t]. */ + if (t_idx == Invalid_index) { /* Make sure this entry has not been removed by [caml_memprof_stop] */ return Val_unit; } - t = &entries.t[*t_idx]; + t = &ea->t[t_idx]; t->idx_ptr = NULL; t->callback_running = 0; if (Is_exception_result(res) || res == Val_unit) { /* Callback raised an exception or returned None or (), discard this entry. */ - mark_deleted(*t_idx); - *t_idx = Invalid_index; + mark_deleted(ea, t_idx); return res; } else { /* Callback returned [Some _]. Store the value in [user_data]. */ @@ -457,70 +463,53 @@ Caml_inline value run_callback_exn(uintnat *t_idx, value cb, value param) && Wosize_val(res) == 1); t->user_data = Field(res, 0); if (Is_block(t->user_data) && Is_young(t->user_data) && - *t_idx < entries.young_idx) - entries.young_idx = *t_idx; + t_idx < ea->young_idx) + ea->young_idx = t_idx; + + // If the following condition are met: + // - we are running a promotion callback, + // - the corresponding block is deallocated, + // - another thread is running callbacks in + // [caml_memprof_handle_postponed_exn], + // then [callback_idx] may have moved forward during this callback, + // which means that we may forget to run the deallocation callback. + // Hence, we reset [callback_idx] if appropriate. + if (ea == &entries_global && t->deallocated && !t->cb_dealloc_called && + callback_idx > t_idx) + callback_idx = t_idx; + return Val_unit; } } -/* Run all the needed callbacks for a given entry. - In case of a thread context switch during a callback, this can be - called in a reetrant way. - If [*t_idx] equals [callback_idx], then this function - increments [callback_idx]. - The index of the entry may change. It is set to [Invalid_index] if - the entry is discarded. +/* Run the allocation callback for a given entry of the local entries array. + This assumes that the corresponding [deleted] and + [callback_running] fields of the entry are both set to 0. + Reentrancy is not a problem for this function, since other threads + will use a different array for entries. + The index of the entry will not change, except if [caml_memprof_stop] is + called . Returns: - An exception result if the callback raised an exception - Val_long(0) == Val_unit == None otherwise */ -static value handle_entry_callbacks_exn(uintnat* t_idx) +static value run_alloc_callback_exn(uintnat t_idx) { - value sample_info, res; /* No need to make these roots */ - struct tracked* t = &entries.t[*t_idx]; - if (*t_idx == callback_idx) callback_idx++; + struct tracked* t = &local->entries.t[t_idx]; + value sample_info; - if (t->deleted || t->callback_running) return Val_unit; - - if (!t->cb_alloc_called) { - t->cb_alloc_called = 1; - CAMLassert(Is_block(t->block) - || Is_placeholder(t->block) - || t->deallocated); - sample_info = caml_alloc_small(4, 0); - Field(sample_info, 0) = Val_long(t->n_samples); - Field(sample_info, 1) = Val_long(t->wosize); - Field(sample_info, 2) = Val_long(t->unmarshalled); - Field(sample_info, 3) = t->user_data; - res = run_callback_exn(t_idx, - t->alloc_young ? Alloc_minor(tracker) : Alloc_major(tracker), - sample_info); - if (*t_idx == Invalid_index) return res; - t = &entries.t[*t_idx]; - } - - if (t->promoted && !t->cb_promote_called) { - t->cb_promote_called = 1; - res = run_callback_exn(t_idx, Promote(tracker), t->user_data); - if (*t_idx == Invalid_index) return res; - t = &entries.t[*t_idx]; - } - - if (t->deallocated && !t->cb_dealloc_called) { - value cb = (t->promoted || !t->alloc_young) ? - Dealloc_major(tracker) : Dealloc_minor(tracker); - t->cb_dealloc_called = 1; - res = run_callback_exn(t_idx, cb, t->user_data); - /* [t] is invalid, but we do no longer use it. */ - CAMLassert(*t_idx == Invalid_index); - return res; - } - - return Val_unit; + CAMLassert(Is_block(t->block) || Is_placeholder(t->block) || t->deallocated); + sample_info = caml_alloc_small(4, 0); + Field(sample_info, 0) = Val_long(t->n_samples); + Field(sample_info, 1) = Val_long(t->wosize); + Field(sample_info, 2) = Val_long(t->unmarshalled); + Field(sample_info, 3) = t->user_data; + return run_callback_exn(&local->entries, t_idx, + t->alloc_young ? Alloc_minor(tracker) : Alloc_major(tracker), sample_info); } -/* Remove any deleted entries from the entries array, updating - [ea->young_idx] and [callback_idx] if [ea == &entries]. */ +/* Remove any deleted entries from [ea], updating [ea->young_idx] and + [callback_idx] if [ea == &entries_global]. */ static void flush_deleted(struct entry_array* ea) { uintnat i, j; @@ -536,17 +525,18 @@ static void flush_deleted(struct entry_array* ea) } i++; if (ea->young_idx == i) ea->young_idx = j; - if (ea == &entries && callback_idx == i) callback_idx = j; + if (ea == &entries_global && callback_idx == i) callback_idx = j; } ea->delete_idx = ea->len = j; - CAMLassert(ea != &entries || callback_idx <= ea->len); + CAMLassert(ea != &entries_global || callback_idx <= ea->len); CAMLassert(ea->young_idx <= ea->len); realloc_entries(ea, 0); } static void check_action_pending(void) { - if (!local->suspended && callback_idx < entries.len) + if (local->suspended) return; + if (callback_idx < entries_global.len || local->entries.len > 0) caml_set_action_pending(); } @@ -562,41 +552,101 @@ void caml_memprof_set_suspended(int s) value caml_memprof_handle_postponed_exn(void) { value res = Val_unit; - if (local->suspended || callback_idx >= entries.len) return res; + uintnat i; + if (local->suspended) return Val_unit; + if (callback_idx >= entries_global.len && local->entries.len == 0) + return Val_unit; caml_memprof_set_suspended(1); - while (callback_idx < entries.len) { - uintnat i = callback_idx; - res = handle_entry_callbacks_exn(&i); - if (Is_exception_result(res)) break; + + for (i = 0; i < local->entries.len; i++) { + /* We are the only thread allowed to modify [local->entries], so + the indices cannot shift. */ + res = run_alloc_callback_exn(i); + if (Is_exception_result(res)) goto end; + if (realloc_entries(&entries_global, 1)) + /* Transfer the entry to the global array. */ + entries_global.t[entries_global.len++] = local->entries.t[i]; + mark_deleted(&local->entries, i); } + + while (callback_idx < entries_global.len) { + struct tracked* t = &entries_global.t[callback_idx]; + + if (t->deleted || t->callback_running) { + /* This entry is not ready. Ignore it. */ + callback_idx++; + } else if (t->promoted && !t->cb_promote_called) { + t->cb_promote_called = 1; + res = run_callback_exn(&entries_global, callback_idx, Promote(tracker), + t->user_data); + if (Is_exception_result(res)) goto end; + } else if (t->deallocated && !t->cb_dealloc_called) { + value cb = (t->promoted || !t->alloc_young) ? + Dealloc_major(tracker) : Dealloc_minor(tracker); + t->cb_dealloc_called = 1; + res = run_callback_exn(&entries_global, callback_idx, cb, t->user_data); + if (Is_exception_result(res)) goto end; + } else { + /* There is nothing more to do with this entry. */ + callback_idx++; + } + } + + end: + flush_deleted(&local->entries); + flush_deleted(&entries_global); + /* We need to reset the suspended flag *after* flushing + [local->entries] to make sure the floag is not set back to 1. */ caml_memprof_set_suspended(0); - flush_deleted(&entries); return res; } -void caml_memprof_oldify_young_roots(void) +/**** Handling weak and strong roots when the GC runs. ****/ + +typedef void (*ea_action)(struct entry_array*, void*); +struct call_on_entry_array_data { ea_action f; void *data; }; +static void call_on_entry_array(struct caml_memprof_th_ctx* ctx, void *data) +{ + struct call_on_entry_array_data* closure = data; + closure->f(&ctx->entries, closure->data); +} + +static void entry_arrays_iter(ea_action f, void *data) +{ + struct call_on_entry_array_data closure = { f, data }; + f(&entries_global, data); + caml_memprof_th_ctx_iter_hook(call_on_entry_array, &closure); +} + +static void entry_array_oldify_young_roots(struct entry_array *ea, void *data) { uintnat i; + (void)data; /* This loop should always have a small number of iterations (when compared to the size of the minor heap), because the young_idx pointer should always be close to the end of the array. Indeed, it is only moved back when returning from a callback triggered by allocation or promotion, which can only happen for blocks - allocated recently, which are close to the end of the entries - array. */ - for (i = entries.young_idx; i < entries.len; i++) - caml_oldify_one(entries.t[i].user_data, - &entries.t[i].user_data); + allocated recently, which are close to the end of the + [entries_global] array. */ + for (i = ea->young_idx; i < ea->len; i++) + caml_oldify_one(ea->t[i].user_data, &ea->t[i].user_data); } -void caml_memprof_minor_update(void) +void caml_memprof_oldify_young_roots(void) +{ + entry_arrays_iter(entry_array_oldify_young_roots, NULL); +} + +static void entry_array_minor_update(struct entry_array *ea, void *data) { uintnat i; - /* See comment in [caml_memprof_oldify_young_roots] for the number + (void)data; + /* See comment in [entry_array_oldify_young_roots] for the number of iterations of this loop. */ - for (i = entries.young_idx; i < entries.len; i++) { - struct tracked *t = &entries.t[i]; + for (i = ea->young_idx; i < ea->len; i++) { + struct tracked *t = &ea->t[i]; CAMLassert(Is_block(t->block) || t->deleted || t->deallocated || Is_placeholder(t->block)); if (Is_block(t->block) && Is_young(t->block)) { @@ -612,25 +662,40 @@ void caml_memprof_minor_update(void) } } } - if (callback_idx > entries.young_idx) { - callback_idx = entries.young_idx; + ea->young_idx = ea->len; +} + +void caml_memprof_minor_update(void) +{ + if (callback_idx > entries_global.young_idx) { + /* The entries after [entries_global.young_idx] will possibly get + promoted. Hence, there might be pending promotion callbacks. */ + callback_idx = entries_global.young_idx; check_action_pending(); } - entries.young_idx = entries.len; + + entry_arrays_iter(entry_array_minor_update, NULL); +} + +static void entry_array_do_roots(struct entry_array *ea, void* data) +{ + scanning_action f = data; + uintnat i; + for (i = 0; i < ea->len; i++) + f(ea->t[i].user_data, &ea->t[i].user_data); } void caml_memprof_do_roots(scanning_action f) { - uintnat i; - for (i = 0; i < entries.len; i++) - f(entries.t[i].user_data, &entries.t[i].user_data); + entry_arrays_iter(entry_array_do_roots, f); } -void caml_memprof_update_clean_phase(void) +static void entry_array_clean_phase(struct entry_array *ea, void* data) { uintnat i; - for (i = 0; i < entries.len; i++) { - struct tracked *t = &entries.t[i]; + (void)data; + for (i = 0; i < ea->len; i++) { + struct tracked *t = &ea->t[i]; if (Is_block(t->block) && !Is_young(t->block)) { CAMLassert(Is_in_heap(t->block)); CAMLassert(!t->alloc_young || t->promoted); @@ -640,15 +705,26 @@ void caml_memprof_update_clean_phase(void) } } } +} + +void caml_memprof_update_clean_phase(void) +{ + entry_arrays_iter(entry_array_clean_phase, NULL); callback_idx = 0; check_action_pending(); } -void caml_memprof_invert_tracked(void) +static void entry_array_invert(struct entry_array *ea, void *data) { uintnat i; - for (i = 0; i < entries.len; i++) - caml_invert_root(entries.t[i].block, &entries.t[i].block); + (void)data; + for (i = 0; i < ea->len; i++) + caml_invert_root(ea->t[i].block, &ea->t[i].block); +} + +void caml_memprof_invert_tracked(void) +{ + entry_arrays_iter(entry_array_invert, NULL); } /**** Sampling procedures ****/ @@ -691,7 +767,8 @@ static void shift_sample(uintnat n) geometric distribution. */ void caml_memprof_renew_minor_sample(void) { - if (lambda == 0 || local->suspended) /* No trigger in the current minor heap. */ + if (lambda == 0 || local->suspended) + /* No trigger in the current minor heap. */ caml_memprof_young_trigger = Caml_state->young_alloc_start; else { uintnat geom = rand_geom(); @@ -712,12 +789,8 @@ void caml_memprof_track_young(uintnat wosize, int from_caml, { uintnat whsize = Whsize_wosize(wosize); value callstack, res = Val_unit; - int alloc_idx = 0, i, allocs_sampled = 0, has_delete = 0; + int alloc_idx = 0, i, allocs_sampled = 0; intnat alloc_ofs, trigger_ofs; - /* usually, only one allocation is sampled, even when the block contains - multiple combined allocations. So, we delay allocating the full - sampled_allocs array until we discover we actually need two entries */ - uintnat first_idx, *idx_tab = &first_idx; double saved_lambda = lambda; /* If this condition is false, then [caml_memprof_young_trigger] should be @@ -771,15 +844,18 @@ void caml_memprof_track_young(uintnat wosize, int from_caml, trigger_ofs -= rand_geom(); } if (n_samples > 0) { - uintnat *idx_ptr, t_idx; + uintnat t_idx; + int stopped; callstack = capture_callstack(alloc_idx); t_idx = new_tracked(n_samples, alloc_wosz, 0, 1, Placeholder_offs(alloc_ofs), callstack); if (t_idx == Invalid_index) continue; - res = handle_entry_callbacks_exn(&t_idx); - if (t_idx == Invalid_index) { - has_delete = 1; + res = run_alloc_callback_exn(t_idx); + /* Has [caml_memprof_stop] been called during the callback? */ + stopped = local->entries.len == 0; + if (stopped) { + allocs_sampled = 0; if (saved_lambda != lambda) { /* [lambda] changed during the callback. We need to refresh [trigger_ofs]. */ @@ -788,102 +864,78 @@ void caml_memprof_track_young(uintnat wosize, int from_caml, } } if (Is_exception_result(res)) break; - if (t_idx == Invalid_index) continue; - - if (allocs_sampled == 1) { - /* Found a second sampled allocation! Allocate a buffer for them */ - idx_tab = caml_stat_alloc_noexc(sizeof(uintnat) * nallocs); - if (idx_tab == NULL) { - alloc_ofs = 0; - idx_tab = &first_idx; - break; - } - idx_tab[0] = first_idx; - if (idx_tab[0] != Invalid_index) - entries.t[idx_tab[0]].idx_ptr = &idx_tab[0]; - } - - /* Usually, entries.t[...].idx_ptr is owned by the thread - running a callback for the entry, if any. Here, we take ownership - of idx_ptr until the end of the function. - - This does not conflict with the usual use of idx_ptr because no - callbacks can run on this entry until the end of the function: - the allocation callback has already run and the other callbacks - do not run on Placeholder values */ - idx_ptr = &idx_tab[allocs_sampled]; - *idx_ptr = t_idx; - entries.t[*idx_ptr].idx_ptr = idx_ptr; - allocs_sampled++; + if (!stopped) allocs_sampled++; } } CAMLassert(alloc_ofs == 0 || Is_exception_result(res)); CAMLassert(allocs_sampled <= nallocs); - caml_memprof_set_suspended(0); - /* [caml_memprof_set_suspended] will attempt to set the action - pending flag, but we need to make sure that the action pending - flag is not set systematically, which is to be expected, since - [new_tracked] created a new block without updating - [callback_idx]. Fortunately, [handle_entry_callback_exn] - increments [callback_idx] if it is equal to [t_idx]. */ - /* This condition happens either in the case of an exception or if - one of the callbacks returned [None]. If these cases happen - frequently, then we need to call [flush_deleted] somewhere to - prevent a leak. */ - if (has_delete) - flush_deleted(&entries); + if (!Is_exception_result(res)) { + /* The callbacks did not raise. The allocation will take place. + We now restore the minor heap in the state needed by + [Alloc_small_aux]. */ + if (Caml_state->young_ptr - whsize < Caml_state->young_trigger) { + CAML_EV_COUNTER(EV_C_FORCE_MINOR_MEMPROF, 1); + caml_gc_dispatch(); + } - if (Is_exception_result(res)) { - for (i = 0; i < allocs_sampled; i++) - if (idx_tab[i] != Invalid_index) { - struct tracked* t = &entries.t[idx_tab[i]]; - /* The allocations are cancelled because of the exception, - but this callback has already been called. We simulate a - deallocation. */ - t->block = Val_unit; - t->deallocated = 1; - if (callback_idx > idx_tab[i]) { - callback_idx = idx_tab[i]; - check_action_pending(); - } - } - if (idx_tab != &first_idx) caml_stat_free(idx_tab); - caml_raise(Extract_exception(res)); + /* Re-allocate the blocks in the minor heap. We should not call the + GC after this. */ + Caml_state->young_ptr -= whsize; + + /* Make sure this block is not going to be sampled again. */ + shift_sample(whsize); } - /* We can now restore the minor heap in the state needed by - [Alloc_small_aux]. */ - if (Caml_state->young_ptr - whsize < Caml_state->young_trigger) { - CAML_EV_COUNTER(EV_C_FORCE_MINOR_MEMPROF, 1); - caml_gc_dispatch(); - } - - /* Re-allocate the blocks in the minor heap. We should not call the - GC after this. */ - Caml_state->young_ptr -= whsize; - - /* Make sure this block is not going to be sampled again. */ - shift_sample(whsize); + /* Since [local->entries] is local to the current thread, we know for + sure that the allocated entries are the [alloc_sampled] last entries of + [local->entries]. */ for (i = 0; i < allocs_sampled; i++) { - if (idx_tab[i] != Invalid_index) { - /* If the execution of the callback has succeeded, then we start the - tracking of this block.. + uintnat idx = local->entries.len-allocs_sampled+i; + if (!local->entries.t[idx].deleted) { + if (realloc_entries(&entries_global, 1)) { + /* Transfer the entry to the global array. */ + struct tracked* t = &entries_global.t[entries_global.len]; + entries_global.len++; + *t = local->entries.t[idx]; - Subtlety: we are actually writing [t->block] with an invalid - (uninitialized) block. This is correct because the allocation - and initialization happens right after returning from - [caml_memprof_track_young]. */ - struct tracked *t = &entries.t[idx_tab[i]]; - t->block = Val_hp(Caml_state->young_ptr + Offs_placeholder(t->block)); - t->idx_ptr = NULL; - CAMLassert(t->cb_alloc_called); - if (idx_tab[i] < entries.young_idx) entries.young_idx = idx_tab[i]; + if (Is_exception_result(res)) { + /* The allocations are cancelled because of the exception, + but this callback has already been called. We simulate a + deallocation. */ + t->block = Val_unit; + t->deallocated = 1; + } else { + /* If the execution of the callback has succeeded, then we start the + tracking of this block.. + + Subtlety: we are actually writing [t->block] with an invalid + (uninitialized) block. This is correct because the allocation + and initialization happens right after returning from + [caml_memprof_track_young]. */ + t->block = Val_hp(Caml_state->young_ptr + Offs_placeholder(t->block)); + + /* We make sure that the action pending flag is not set + systematically, which is to be expected, since we created + a new block in the global entry array, but this new block + does not need promotion or deallocationc callback. */ + if (callback_idx == entries_global.len - 1) + callback_idx = entries_global.len; + } + } + mark_deleted(&local->entries, idx); } } - if (idx_tab != &first_idx) caml_stat_free(idx_tab); + + flush_deleted(&local->entries); + /* We need to reset the suspended flag *after* flushing + [local->entries] to make sure the floag is not set back to 1. */ + caml_memprof_set_suspended(0); + + if (Is_exception_result(res)) + caml_raise(Extract_exception(res)); /* /!\ Since the heap is in an invalid state before initialization, very little heap operations are allowed until then. */ @@ -964,21 +1016,25 @@ CAMLprim value caml_memprof_start(value lv, value szv, value tracker_param) CAMLreturn(Val_unit); } -CAMLprim value caml_memprof_stop(value unit) +static void entry_array_discard(struct entry_array* ea, void* data) { uintnat i; + (void)data; + for (i = 0; i < ea->len; i++) + if (ea->t[i].idx_ptr != NULL) + *ea->t[i].idx_ptr = Invalid_index; + ea->alloc_len = ea->len = ea->young_idx = ea->delete_idx = 0; + caml_stat_free(ea->t); + ea->t = NULL; +} +CAMLprim value caml_memprof_stop(value unit) +{ if (!started) caml_failwith("Gc.Memprof.stop: not started."); /* Discard the tracked blocks. */ - for (i = 0; i < entries.len; i++) - if (entries.t[i].idx_ptr != NULL) - *entries.t[i].idx_ptr = Invalid_index; - entries.len = 0; - callback_idx = entries.young_idx = entries.delete_idx = 0; - caml_stat_free(entries.t); - entries.t = NULL; - entries.alloc_len = 0; + entry_arrays_iter(entry_array_discard, NULL); + callback_idx = 0; lambda = 0; // Reset the memprof trigger in order to make sure we won't enter @@ -997,12 +1053,23 @@ CAMLprim value caml_memprof_stop(value unit) /**** Interface with systhread. ****/ +static void th_ctx_iter_default(th_ctx_action f, void* data) { + f(local, data); +} + +CAMLexport void (*caml_memprof_th_ctx_iter_hook)(th_ctx_action, void*) + = th_ctx_iter_default; + CAMLexport struct caml_memprof_th_ctx* caml_memprof_new_th_ctx() { struct caml_memprof_th_ctx* ctx = caml_stat_alloc(sizeof(struct caml_memprof_th_ctx)); ctx->suspended = 0; ctx->callback_running = 0; + ctx->entries.t = NULL; + ctx->entries.min_alloc_len = MIN_ENTRIES_LOCAL_ALLOC_LEN; + ctx->entries.alloc_len = ctx->entries.len = 0; + ctx->entries.young_idx = ctx->entries.delete_idx = 0; return ctx; } @@ -1014,6 +1081,7 @@ CAMLexport void caml_memprof_delete_th_ctx(struct caml_memprof_th_ctx* ctx) if (ctx->callback_running) caml_fatal_error("Thread.exit called from a memprof callback."); if (ctx == local) local = NULL; + caml_stat_free(ctx->entries.t); if (ctx != &caml_memprof_main_ctx) caml_stat_free(ctx); } diff --git a/runtime/minor_gc.c b/runtime/minor_gc.c index 478bf46f3..76e4d5c98 100644 --- a/runtime/minor_gc.c +++ b/runtime/minor_gc.c @@ -454,8 +454,7 @@ void caml_empty_minor_heap (void) extern uintnat caml_instr_alloc_jump; #endif /*CAML_INSTR*/ -/* Do a minor collection or a slice of major collection, call finalisation - functions, etc. +/* Do a minor collection or a slice of major collection, etc. Leave enough room in the minor heap to allocate at least one object. Guaranteed not to call any OCaml callback. */ diff --git a/runtime/startup_aux.c b/runtime/startup_aux.c index ff6c8fba3..3782697c5 100644 --- a/runtime/startup_aux.c +++ b/runtime/startup_aux.c @@ -28,7 +28,6 @@ #endif #include "caml/osdeps.h" #include "caml/startup_aux.h" -#include "caml/memprof.h" #ifdef _WIN32 diff --git a/stdlib/gc.mli b/stdlib/gc.mli index 4d69e2089..1af290f38 100644 --- a/stdlib/gc.mli +++ b/stdlib/gc.mli @@ -494,6 +494,9 @@ module Memprof : to keep for minor blocks, and ['major] the type of metadata for major blocks. + When using threads, it is guaranteed that allocation callbacks are + always run in the thread where the allocation takes place. + If an allocation-tracking or promotion-tracking function returns [None], memprof stops tracking the corresponding value. *) diff --git a/testsuite/tests/backtrace/callstack.reference b/testsuite/tests/backtrace/callstack.reference index 62593d199..38ca17d94 100644 --- a/testsuite/tests/backtrace/callstack.reference +++ b/testsuite/tests/backtrace/callstack.reference @@ -12,4 +12,4 @@ Raised by primitive operation at Callstack.f0 in file "callstack.ml", line 11, c Called from Callstack.f1 in file "callstack.ml", line 12, characters 27-32 Called from Callstack.f2 in file "callstack.ml", line 13, characters 27-32 Called from Callstack.f3 in file "callstack.ml", line 14, characters 27-32 -Called from Thread.create.(fun) in file "thread.ml", line 39, characters 8-14 +Called from Thread.create.(fun) in file "thread.ml", line 41, characters 8-14 diff --git a/testsuite/tests/statmemprof/blocking_in_callback.ml b/testsuite/tests/statmemprof/blocking_in_callback.ml index d5e8d2ce1..00f49cfc7 100644 --- a/testsuite/tests/statmemprof/blocking_in_callback.ml +++ b/testsuite/tests/statmemprof/blocking_in_callback.ml @@ -6,8 +6,7 @@ include systhreads *) let cnt = ref 0 -let alloc_num = ref 0 -let alloc_tot = 100000 +let alloc_thread = 50000 let (rd1, wr1) = Unix.pipe () let (rd2, wr2) = Unix.pipe () @@ -15,20 +14,26 @@ let (rd2, wr2) = Unix.pipe () let main_thread = Thread.self () let cb_main = ref 0 and cb_other = ref 0 let stopped = ref false -let minor_alloc_callback _ = +let alloc_callback alloc = if !stopped then None else begin - let do_stop = !cb_main + !cb_other >= alloc_tot in - if do_stop then stopped := true; let t = Thread.self () in if t == main_thread then begin + assert (alloc.Gc.Memprof.size < 10 || alloc.Gc.Memprof.size mod 2 = 0); + let do_stop = !cb_main >= alloc_thread in + if do_stop then stopped := true; incr cb_main; + assert (Unix.write wr2 (Bytes.make 1 'a') 0 1 = 1); if not do_stop then assert (Unix.read rd1 (Bytes.make 1 'a') 0 1 = 1) end else begin + assert (alloc.Gc.Memprof.size < 10 || alloc.Gc.Memprof.size mod 2 = 1); + let do_stop = !cb_other >= alloc_thread in + if do_stop then stopped := true; incr cb_other; + assert (Unix.write wr1 (Bytes.make 1 'a') 0 1 = 1); if not do_stop then assert (Unix.read rd2 (Bytes.make 1 'a') 0 1 = 1) @@ -39,31 +44,34 @@ let minor_alloc_callback _ = let mut = Mutex.create () let () = Mutex.lock mut -let rec go () = +let rec go alloc_num tid = Mutex.lock mut; Mutex.unlock mut; - if !alloc_num < alloc_tot then begin - alloc_num := !alloc_num + 1; - Sys.opaque_identity (Bytes.make (Random.int 300) 'a') |> ignore; - go () + if alloc_num < alloc_thread then begin + let len = 2 * (Random.int 200 + 1) + tid in + Sys.opaque_identity (Array.make len 0) |> ignore; + go (alloc_num + 1) tid end else begin cnt := !cnt + 1; if !cnt < 2 then begin Gc.minor (); (* check for callbacks *) Thread.yield (); - go () + go alloc_num tid end else begin Gc.minor () (* check for callbacks *) end end let () = - let t = Thread.create go () in + let t = Thread.create (fun () -> go 0 1) () in Gc.Memprof.(start ~callstack_size:10 ~sampling_rate:1. - { null_tracker with alloc_minor = minor_alloc_callback; }); + { null_tracker with + alloc_minor = alloc_callback; + alloc_major = alloc_callback }); Mutex.unlock mut; - go (); + go 0 0; Thread.join t; Gc.Memprof.stop (); - assert (abs (!cb_main - !cb_other) <= 1); - assert (!cb_main + !cb_other >= alloc_tot) + assert (!cb_main >= alloc_thread); + assert (!cb_other >= alloc_thread); + assert (abs (!cb_main - !cb_other) <= 1) diff --git a/testsuite/tests/statmemprof/moved_while_blocking.ml b/testsuite/tests/statmemprof/moved_while_blocking.ml new file mode 100644 index 000000000..8efc172ae --- /dev/null +++ b/testsuite/tests/statmemprof/moved_while_blocking.ml @@ -0,0 +1,76 @@ +(* TEST +* hassysthreads +include systhreads +** bytecode +** native +*) + +let t2_begin = Atomic.make false +let t2_promoting = Atomic.make false +let t2_finish_promote = Atomic.make false +let t2_done = Atomic.make false +let t2_quit = Atomic.make false +let await a = + while not (Atomic.get a) do Thread.yield () done +let set a = + Atomic.set a true + +(* no-alloc printing to stdout *) +let say msg = + Unix.write Unix.stdout (Bytes.unsafe_of_string msg) 0 (String.length msg) |> ignore + +let static_ref = ref 0 +let global = ref static_ref +let thread_fn () = + await t2_begin; + say "T2: alloc\n"; + let r = ref 0 in + global := r; + say "T2: minor GC\n"; + Gc.minor (); + global := static_ref; + say "T2: done\n"; + set t2_done; + await t2_quit + +let big = ref [| |] + +let fill_big () = big := Array.make 1000 42 + [@@inline never] (* Prevent flambda to move the allocated array in a global + root (see #9978). *) +let empty_big () = big := [| |] + [@@inline never] + +let () = + let th = Thread.create thread_fn () in + Gc.Memprof.(start ~sampling_rate:1. + { null_tracker with + alloc_minor = (fun _ -> + say " minor alloc\n"; + Some ()); + alloc_major = (fun _ -> + say " major alloc\n"; + Some "major block\n"); + promote = (fun () -> + say " promoting...\n"; + set t2_promoting; + await t2_finish_promote; + say " ...done promoting\n"; + Some "promoted block\n"); + dealloc_major = (fun msg -> + say " major dealloc: "; say msg) }); + say "T1: alloc\n"; + fill_big (); + set t2_begin; + await t2_promoting; + say "T1: major GC\n"; + empty_big (); + Gc.full_major (); + set t2_finish_promote; + await t2_done; + say "T1: major GC\n"; + Gc.full_major (); + say "T1: done\n"; + Gc.Memprof.stop (); + set t2_quit; + Thread.join th diff --git a/testsuite/tests/statmemprof/moved_while_blocking.reference b/testsuite/tests/statmemprof/moved_while_blocking.reference new file mode 100644 index 000000000..ef99432b4 --- /dev/null +++ b/testsuite/tests/statmemprof/moved_while_blocking.reference @@ -0,0 +1,13 @@ +T1: alloc + major alloc +T2: alloc + minor alloc +T2: minor GC + promoting... +T1: major GC + major dealloc: major block + ...done promoting +T2: done +T1: major GC + major dealloc: promoted block +T1: done From 6d3065be789a0b9e18c243f8d4d9a981b808c5e9 Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Fri, 12 Jun 2020 21:52:14 +0200 Subject: [PATCH 09/12] Memprof: get rid of the idx_ptr pointers. Instead, we use a thread-local variable [callback_status] which contains the index of the corresponding entry when a callback is running. We can do this since there can only be one running callback at the same time in a given thread. This lifts the restriction forbidding the call of Thread.exit from a memprof callback. --- runtime/memprof.c | 182 ++++++++++-------- stdlib/gc.mli | 5 +- .../statmemprof/thread_exit_in_callback.ml | 22 ++- .../thread_exit_in_callback.reference | 1 - .../thread_exit_in_callback_stub.c | 16 -- 5 files changed, 119 insertions(+), 107 deletions(-) delete mode 100644 testsuite/tests/statmemprof/thread_exit_in_callback.reference delete mode 100644 testsuite/tests/statmemprof/thread_exit_in_callback_stub.c diff --git a/runtime/memprof.c b/runtime/memprof.c index a76990cda..f03febffb 100644 --- a/runtime/memprof.c +++ b/runtime/memprof.c @@ -72,6 +72,10 @@ struct tracked { This is a strong GC root. */ value user_data; + /* The thread currently running a callback for this entry, + or NULL if there is none */ + struct caml_memprof_th_ctx* running; + /* Whether this block has been initially allocated in the minor heap. */ unsigned int alloc_young : 1; @@ -85,7 +89,8 @@ struct tracked { unsigned int deallocated : 1; /* Whether the allocation callback has been called depends on - whether the entry is in [local->entries] or in [entries_global]. */ + whether the entry is in a thread local entry array or in + [entries_global]. */ /* Whether the promotion callback has been called. */ unsigned int cb_promote_called : 1; @@ -95,15 +100,6 @@ struct tracked { /* Whether this entry is deleted. */ unsigned int deleted : 1; - - /* Whether a callback is currently running for this entry. */ - unsigned int callback_running : 1; - - /* Pointer to the [t_idx] variable in the [run_callback_exn] frame which - is currently running the callback for this entry. This is needed - to make [run_callback_exn] reetrant, in the case it is called - simultaneously by several threads. */ - uintnat* idx_ptr; }; /* During the alloc callback for a minor allocation, the block being @@ -115,9 +111,6 @@ struct tracked { #define Is_placeholder(block) \ (Is_long(block) && (Long_val(block) & ~(uintnat)0xFFFF) == Placeholder_magic) -/* When an entry is deleted, its index is replaced by that integer. */ -#define Invalid_index (~(uintnat)0) - /* A resizable array of entries */ struct entry_array { struct tracked* t; @@ -141,6 +134,10 @@ static struct entry_array entries_global = position ([callback_idx <= entries_global.len]). */ static uintnat callback_idx; +#define CB_IDLE -1 +#define CB_LOCAL -2 +#define CB_STOPPED -3 + /* Structure for thread-local variables. */ struct caml_memprof_th_ctx { /* [suspended] is used for masking memprof callbacks when @@ -148,14 +145,22 @@ struct caml_memprof_th_ctx { called. */ int suspended; - /* [callback_running] is used to trigger a fatal error whenever - [Thread.exit] is called from a callback. */ - int callback_running; + /* [callback_status] contains: + - CB_STOPPED if the current thread is running a callback, but + sampling has been stopped using [caml_memprof_stop]; + - The index of the corresponding entry in the [entries_global] + array if the current thread is currently running a promotion or + a deallocation callback; + - CB_LOCAL if the current thread is currently running an + allocation callback; + - CB_IDLE if the current thread is not running any callback. + */ + intnat callback_status; /* Entries for blocks whose alloc callback has not yet been called. */ struct entry_array entries; } caml_memprof_main_ctx = - { 0, 0, { NULL, MIN_ENTRIES_LOCAL_ALLOC_LEN, 0, 0, 0, 0 } }; + { 0, CB_IDLE, { NULL, MIN_ENTRIES_LOCAL_ALLOC_LEN, 0, 0, 0, 0 } }; static struct caml_memprof_th_ctx* local = &caml_memprof_main_ctx; /* Pointer to the word following the next sample in the minor @@ -397,6 +402,8 @@ static int realloc_entries(struct entry_array* ea, uintnat grow) return 1; } +#define Invalid_index (~(uintnat)0) + Caml_inline uintnat new_tracked(uintnat n_samples, uintnat wosize, int is_unmarshalled, int is_young, value block, value user_data) @@ -410,14 +417,13 @@ Caml_inline uintnat new_tracked(uintnat n_samples, uintnat wosize, t->n_samples = n_samples; t->wosize = wosize; t->user_data = user_data; - t->idx_ptr = NULL; + t->running = NULL; t->alloc_young = is_young; t->unmarshalled = is_unmarshalled; t->promoted = 0; t->deallocated = 0; t->cb_promote_called = t->cb_dealloc_called = 0; t->deleted = 0; - t->callback_running = 0; return local->entries.len - 1; } @@ -428,7 +434,6 @@ static void mark_deleted(struct entry_array* ea, uintnat t_idx) t->user_data = Val_unit; t->block = Val_unit; if (t_idx < ea->delete_idx) ea->delete_idx = t_idx; - CAMLassert(t->idx_ptr == NULL); } Caml_inline value run_callback_exn( @@ -436,22 +441,28 @@ Caml_inline value run_callback_exn( { struct tracked* t = &ea->t[t_idx]; value res; - CAMLassert(!t->callback_running && t->idx_ptr == NULL); + CAMLassert(t->running == NULL); CAMLassert(lambda > 0.); - local->callback_running = t->callback_running = 1; - t->idx_ptr = &t_idx; + local->callback_status = ea == &entries_global ? t_idx : CB_LOCAL; + t->running = local; t->user_data = Val_unit; /* Release root. */ res = caml_callback_exn(cb, param); - local->callback_running = 0; - /* The call above can modify [t_idx] and thus invalidate [t]. */ - if (t_idx == Invalid_index) { + if (local->callback_status == CB_STOPPED) { /* Make sure this entry has not been removed by [caml_memprof_stop] */ - return Val_unit; + local->callback_status = CB_IDLE; + return Is_exception_result(res) ? res : Val_unit; } - t = &ea->t[t_idx]; - t->idx_ptr = NULL; - t->callback_running = 0; + /* The call above can move the tracked entry and thus invalidate + [t_idx] and [t]. */ + if (ea == &entries_global) { + CAMLassert(local->callback_status >= 0 && local->callback_status < ea->len); + t_idx = local->callback_status; + t = &ea->t[t_idx]; + } + local->callback_status = CB_IDLE; + CAMLassert(t->running == local); + t->running = NULL; if (Is_exception_result(res) || res == Val_unit) { /* Callback raised an exception or returned None or (), discard this entry. */ @@ -484,7 +495,7 @@ Caml_inline value run_callback_exn( /* Run the allocation callback for a given entry of the local entries array. This assumes that the corresponding [deleted] and - [callback_running] fields of the entry are both set to 0. + [running] fields of the entry are both set to 0. Reentrancy is not a problem for this function, since other threads will use a different array for entries. The index of the entry will not change, except if [caml_memprof_stop] is @@ -505,7 +516,7 @@ static value run_alloc_callback_exn(uintnat t_idx) Field(sample_info, 2) = Val_long(t->unmarshalled); Field(sample_info, 3) = t->user_data; return run_callback_exn(&local->entries, t_idx, - t->alloc_young ? Alloc_minor(tracker) : Alloc_major(tracker), sample_info); + t->alloc_young ? Alloc_minor(tracker) : Alloc_major(tracker), sample_info); } /* Remove any deleted entries from [ea], updating [ea->young_idx] and @@ -519,7 +530,9 @@ static void flush_deleted(struct entry_array* ea) j = i = ea->delete_idx; while (i < ea->len) { if (!ea->t[i].deleted) { - if (ea->t[i].idx_ptr != NULL) *ea->t[i].idx_ptr = j; + struct caml_memprof_th_ctx* runner = ea->t[i].running; + if (runner != NULL && runner->callback_status == i) + runner->callback_status = j; ea->t[j] = ea->t[i]; j++; } @@ -561,9 +574,14 @@ value caml_memprof_handle_postponed_exn(void) for (i = 0; i < local->entries.len; i++) { /* We are the only thread allowed to modify [local->entries], so - the indices cannot shift. */ + the indices cannot shift, but it is still possible that + [caml_memprof_stop] got called during the callback, + invalidating all the entries. */ res = run_alloc_callback_exn(i); if (Is_exception_result(res)) goto end; + if (local->entries.len == 0) + goto end; /* [caml_memprof_stop] has been called. */ + if (local->entries.t[i].deleted) continue; if (realloc_entries(&entries_global, 1)) /* Transfer the entry to the global array. */ entries_global.t[entries_global.len++] = local->entries.t[i]; @@ -573,7 +591,7 @@ value caml_memprof_handle_postponed_exn(void) while (callback_idx < entries_global.len) { struct tracked* t = &entries_global.t[callback_idx]; - if (t->deleted || t->callback_running) { + if (t->deleted || t->running != NULL) { /* This entry is not ready. Ignore it. */ callback_idx++; } else if (t->promoted && !t->cb_promote_called) { @@ -894,39 +912,38 @@ void caml_memprof_track_young(uintnat wosize, int from_caml, for (i = 0; i < allocs_sampled; i++) { uintnat idx = local->entries.len-allocs_sampled+i; - if (!local->entries.t[idx].deleted) { - if (realloc_entries(&entries_global, 1)) { - /* Transfer the entry to the global array. */ - struct tracked* t = &entries_global.t[entries_global.len]; - entries_global.len++; - *t = local->entries.t[idx]; + if (local->entries.t[idx].deleted) continue; + if (realloc_entries(&entries_global, 1)) { + /* Transfer the entry to the global array. */ + struct tracked* t = &entries_global.t[entries_global.len]; + entries_global.len++; + *t = local->entries.t[idx]; - if (Is_exception_result(res)) { - /* The allocations are cancelled because of the exception, - but this callback has already been called. We simulate a - deallocation. */ - t->block = Val_unit; - t->deallocated = 1; - } else { - /* If the execution of the callback has succeeded, then we start the - tracking of this block.. + if (Is_exception_result(res)) { + /* The allocations are cancelled because of the exception, + but this callback has already been called. We simulate a + deallocation. */ + t->block = Val_unit; + t->deallocated = 1; + } else { + /* If the execution of the callback has succeeded, then we start the + tracking of this block.. - Subtlety: we are actually writing [t->block] with an invalid - (uninitialized) block. This is correct because the allocation - and initialization happens right after returning from - [caml_memprof_track_young]. */ - t->block = Val_hp(Caml_state->young_ptr + Offs_placeholder(t->block)); + Subtlety: we are actually writing [t->block] with an invalid + (uninitialized) block. This is correct because the allocation + and initialization happens right after returning from + [caml_memprof_track_young]. */ + t->block = Val_hp(Caml_state->young_ptr + Offs_placeholder(t->block)); - /* We make sure that the action pending flag is not set - systematically, which is to be expected, since we created - a new block in the global entry array, but this new block - does not need promotion or deallocationc callback. */ - if (callback_idx == entries_global.len - 1) - callback_idx = entries_global.len; - } + /* We make sure that the action pending flag is not set + systematically, which is to be expected, since we created + a new block in the global entry array, but this new block + does not need promotion or deallocationc callback. */ + if (callback_idx == entries_global.len - 1) + callback_idx = entries_global.len; } - mark_deleted(&local->entries, idx); } + mark_deleted(&local->entries, idx); } flush_deleted(&local->entries); @@ -1016,24 +1033,32 @@ CAMLprim value caml_memprof_start(value lv, value szv, value tracker_param) CAMLreturn(Val_unit); } -static void entry_array_discard(struct entry_array* ea, void* data) +static void empty_entry_array(struct entry_array *ea) { + if (ea != NULL) { + ea->alloc_len = ea->len = ea->young_idx = ea->delete_idx = 0; + caml_stat_free(ea->t); + ea->t = NULL; + } +} + +static void th_ctx_memprof_stop(struct caml_memprof_th_ctx* ctx, void* data) { - uintnat i; (void)data; - for (i = 0; i < ea->len; i++) - if (ea->t[i].idx_ptr != NULL) - *ea->t[i].idx_ptr = Invalid_index; - ea->alloc_len = ea->len = ea->young_idx = ea->delete_idx = 0; - caml_stat_free(ea->t); - ea->t = NULL; + if (ctx->callback_status != CB_IDLE) ctx->callback_status = CB_STOPPED; + empty_entry_array(&ctx->entries); } CAMLprim value caml_memprof_stop(value unit) { if (!started) caml_failwith("Gc.Memprof.stop: not started."); - /* Discard the tracked blocks. */ - entry_arrays_iter(entry_array_discard, NULL); + /* Discard the tracked blocks in the global entries array. */ + empty_entry_array(&entries_global); + + /* Discard the tracked blocks in the local entries array, + and set [callback_status] to [CB_STOPPED]. */ + caml_memprof_th_ctx_iter_hook(th_ctx_memprof_stop, NULL); + callback_idx = 0; lambda = 0; @@ -1065,7 +1090,7 @@ CAMLexport struct caml_memprof_th_ctx* caml_memprof_new_th_ctx() struct caml_memprof_th_ctx* ctx = caml_stat_alloc(sizeof(struct caml_memprof_th_ctx)); ctx->suspended = 0; - ctx->callback_running = 0; + ctx->callback_status = CB_IDLE; ctx->entries.t = NULL; ctx->entries.min_alloc_len = MIN_ENTRIES_LOCAL_ALLOC_LEN; ctx->entries.alloc_len = ctx->entries.len = 0; @@ -1075,11 +1100,10 @@ CAMLexport struct caml_memprof_th_ctx* caml_memprof_new_th_ctx() CAMLexport void caml_memprof_delete_th_ctx(struct caml_memprof_th_ctx* ctx) { - /* Make sure that no memprof callback is being executed in this - thread. If so, memprof data structures may have pointers to the - thread's stack. */ - if (ctx->callback_running) - caml_fatal_error("Thread.exit called from a memprof callback."); + if (ctx->callback_status >= 0) + /* A callback is running in this thread from the global entries + array. We delete the corresponding entry. */ + mark_deleted(&entries_global, ctx->callback_status); if (ctx == local) local = NULL; caml_stat_free(ctx->entries.t); if (ctx != &caml_memprof_main_ctx) caml_stat_free(ctx); diff --git a/stdlib/gc.mli b/stdlib/gc.mli index 1af290f38..fccb211e6 100644 --- a/stdlib/gc.mli +++ b/stdlib/gc.mli @@ -531,10 +531,7 @@ module Memprof : Note that the callback can be postponed slightly after the actual event. The callstack passed to the callback is always - accurate, but the program state may have evolved. - - Calling [Thread.exit] in a callback is currently unsafe and can - result in undefined behavior. *) + accurate, but the program state may have evolved. *) val stop : unit -> unit (** Stop the sampling. Fails if sampling is not active. diff --git a/testsuite/tests/statmemprof/thread_exit_in_callback.ml b/testsuite/tests/statmemprof/thread_exit_in_callback.ml index 97c1a3aee..753f7726f 100644 --- a/testsuite/tests/statmemprof/thread_exit_in_callback.ml +++ b/testsuite/tests/statmemprof/thread_exit_in_callback.ml @@ -1,18 +1,26 @@ (* TEST -modules = "thread_exit_in_callback_stub.c" -exit_status = "42" * hassysthreads include systhreads ** bytecode ** native *) -(* We cannot tell Ocamltest that this program is supposed to stop with - a fatal error. Instead, we install a fatal error hook and call exit(42) *) -external install_fatal_error_hook : unit -> unit = "install_fatal_error_hook" +let _ = + let main_thread = Thread.id (Thread.self ()) in + Gc.Memprof.(start ~callstack_size:10 ~sampling_rate:1. + { null_tracker with alloc_minor = fun _ -> + if Thread.id (Thread.self ()) <> main_thread then + Thread.exit (); + None }); + let t = Thread.create (fun () -> + ignore (Sys.opaque_identity (ref 1)); + assert false) () + in + Thread.join t; + Gc.Memprof.stop () let _ = - install_fatal_error_hook (); Gc.Memprof.(start ~callstack_size:10 ~sampling_rate:1. { null_tracker with alloc_minor = fun _ -> Thread.exit (); None }); - ignore (Sys.opaque_identity (ref 1)) + ignore (Sys.opaque_identity (ref 1)); + assert false diff --git a/testsuite/tests/statmemprof/thread_exit_in_callback.reference b/testsuite/tests/statmemprof/thread_exit_in_callback.reference deleted file mode 100644 index 4d745f0ce..000000000 --- a/testsuite/tests/statmemprof/thread_exit_in_callback.reference +++ /dev/null @@ -1 +0,0 @@ -Fatal error hook: Thread.exit called from a memprof callback. diff --git a/testsuite/tests/statmemprof/thread_exit_in_callback_stub.c b/testsuite/tests/statmemprof/thread_exit_in_callback_stub.c deleted file mode 100644 index 91ed43cc9..000000000 --- a/testsuite/tests/statmemprof/thread_exit_in_callback_stub.c +++ /dev/null @@ -1,16 +0,0 @@ -#include -#include "caml/misc.h" -#include "caml/mlvalues.h" - -void fatal_error_hook_exit_3 (char *msg, va_list args) { - fprintf(stderr, "Fatal error hook: "); - vfprintf(stderr, msg, args); - fprintf(stderr, "\n"); - exit(42); -} - - -value install_fatal_error_hook (value unit) { - caml_fatal_error_hook = fatal_error_hook_exit_3; - return Val_unit; -} From b9abf833ca733211b2d827b9188c650024dc0aca Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Fri, 12 Jun 2020 22:17:06 +0200 Subject: [PATCH 10/12] Memprof/systhreads : call [caml_memprof_delete_th_ctx] when threads gets killed by fork. --- otherlibs/systhreads/st_stubs.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/otherlibs/systhreads/st_stubs.c b/otherlibs/systhreads/st_stubs.c index 5fb73ce6d..9af8c45fe 100644 --- a/otherlibs/systhreads/st_stubs.c +++ b/otherlibs/systhreads/st_stubs.c @@ -403,20 +403,15 @@ static void caml_thread_remove_info(caml_thread_t th) static void caml_thread_reinitialize(void) { - caml_thread_t thr, next; struct channel * chan; /* Remove all other threads (now nonexistent) from the doubly-linked list of threads */ - thr = curr_thread->next; - while (thr != curr_thread) { - next = thr->next; - caml_stat_free(thr); - thr = next; + while (curr_thread->next != curr_thread) { + caml_memprof_delete_th_ctx(curr_thread->next->memprof_ctx); + caml_thread_remove_info(curr_thread->next); } - curr_thread->next = curr_thread; - curr_thread->prev = curr_thread; - all_threads = curr_thread; + /* Reinitialize the master lock machinery, just in case the fork happened while other threads were doing caml_leave_blocking_section */ From 7e4748cf9cae6094883effafe27a89aef7e26363 Mon Sep 17 00:00:00 2001 From: Jacques-Henri Jourdan Date: Fri, 26 Jun 2020 09:56:44 +0200 Subject: [PATCH 11/12] Changes. --- Changes | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Changes b/Changes index 9932441fe..ec7fa05c4 100644 --- a/Changes +++ b/Changes @@ -180,6 +180,10 @@ OCaml 4.12.0 (Enguerrand Decorne, KC Sivaramakrishnan, Xavier Leroy, Stephen Dolan, David Allsopp, Nicolás Ojeda Bär review by Xavier Leroy, Nicolás Ojeda Bär) +* #9674: Memprof: guarantee that an allocation callback is always run + in the same thread the allocation takes place + (Jacques-Henri Jourdan, review by Stephen Dolan) + ### Code generation and optimizations: - #9551: ocamlc no longer loads DLLs at link time to check that From a6980b256d6dbe4e6e222e3f878d742d2979dd69 Mon Sep 17 00:00:00 2001 From: Stephen Dolan Date: Mon, 12 Oct 2020 14:40:47 +0100 Subject: [PATCH 12/12] Add a test for Memprof.stop within a Memprof callback --- testsuite/tests/statmemprof/exception_callback.ml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/testsuite/tests/statmemprof/exception_callback.ml b/testsuite/tests/statmemprof/exception_callback.ml index 55dd5e555..e1589372a 100644 --- a/testsuite/tests/statmemprof/exception_callback.ml +++ b/testsuite/tests/statmemprof/exception_callback.ml @@ -16,6 +16,11 @@ let alloc_tracker on_alloc = its uncaught exception handler. *) let _ = Printexc.record_backtrace false +let () = + start ~callstack_size:10 ~sampling_rate:1. + (alloc_tracker (fun _ -> stop ())); + ignore (Sys.opaque_identity (Array.make 200 0)) + let _ = start ~callstack_size:10 ~sampling_rate:1. (alloc_tracker (fun _ -> failwith "callback failed"));