Merge pull request #9279 from stedolan/optimise-callstacks-again

Avoid scanning the stack twice when collecting callstacks in Memprof.
master
Gabriel Scherer 2020-02-05 17:15:14 +01:00 committed by GitHub
commit edee8cea3e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 117 additions and 84 deletions

View File

@ -44,6 +44,9 @@ Working version
- #9230: Memprof support for native allocations.
(Jacques-Henri Jourdan and Stephen Dolan, review by Gabriel Scherer)
- #9279: Memprof optimisation.
(Stephen Dolan, review by Jacques-Henri Jourdan)
### Code generation and optimizations:
- #8637, #8805, #9247: Record debug info for each allocation.

View File

@ -325,12 +325,17 @@ CAMLprim value caml_get_exception_backtrace(value unit)
CAMLreturn(res);
}
CAMLprim value caml_get_current_callstack(value max_frames_value) {
CAMLprim value caml_get_current_callstack(value max_frames_value)
{
CAMLparam1(max_frames_value);
CAMLlocal1(res);
res = caml_alloc(caml_current_callstack_size(Long_val(max_frames_value)), 0);
caml_current_callstack_write(res, -1);
value* callstack = NULL;
intnat callstack_alloc_len = 0;
intnat callstack_len =
caml_collect_current_callstack(&callstack, &callstack_alloc_len,
Long_val(max_frames_value), -1);
res = caml_alloc(callstack_len, 0);
memcpy(Op_val(res), callstack, sizeof(value) * callstack_len);
caml_stat_free(callstack);
CAMLreturn(res);
}

View File

@ -275,34 +275,38 @@ code_t caml_next_frame_pointer(value ** sp, value ** trsp)
return NULL;
}
intnat caml_current_callstack_size(intnat max_frames)
{
intnat trace_size;
value * sp = Caml_state->extern_sp;
value * trsp = Caml_state->trapsp;
for (trace_size = 0; trace_size < max_frames; trace_size++) {
code_t p = caml_next_frame_pointer(&sp, &trsp);
if (p == NULL) break;
}
return trace_size;
}
void caml_current_callstack_write(value trace, int alloc_idx)
#define Default_callstack_size 32
intnat caml_collect_current_callstack(value** ptrace, intnat* plen,
intnat max_frames, int alloc_idx)
{
value * sp = Caml_state->extern_sp;
value * trsp = Caml_state->trapsp;
uintnat trace_pos, trace_size = Wosize_val(trace);
intnat trace_pos = 0;
CAMLassert(alloc_idx == 0 || alloc_idx == -1);
for (trace_pos = 0; trace_pos < trace_size; trace_pos++) {
code_t p = caml_next_frame_pointer(&sp, &trsp);
CAMLassert(p != NULL);
/* [Val_backtrace_slot(...)] is always a long, no need to call
[caml_modify]. */
Field(trace, trace_pos) = Val_backtrace_slot(p);
if (max_frames <= 0) return 0;
if (*plen == 0) {
value* trace =
caml_stat_alloc_noexc(Default_callstack_size * sizeof(value));
if (trace == NULL) return 0;
*ptrace = trace;
*plen = Default_callstack_size;
}
while (trace_pos < max_frames) {
code_t p = caml_next_frame_pointer(&sp, &trsp);
if (p == NULL) break;
if (trace_pos == *plen) {
intnat new_len = *plen * 2;
value * trace = caml_stat_resize_noexc(*ptrace, new_len * sizeof(value));
if (trace == NULL) break;
*ptrace = trace;
*plen = new_len;
}
(*ptrace)[trace_pos++] = Val_backtrace_slot(p);
}
return trace_pos;
}
/* Read the debugging info contained in the current bytecode executable. */

View File

@ -104,23 +104,6 @@ void caml_stash_backtrace(value exn, uintnat pc, char * sp, char * trapsp)
}
}
intnat caml_current_callstack_size(intnat max_frames) {
intnat trace_size = 0;
uintnat pc = Caml_state->last_return_address;
char * sp = Caml_state->bottom_of_stack;
while (1) {
frame_descr * descr = caml_next_frame_descriptor(&pc, &sp);
if (descr == NULL) break;
if (trace_size >= max_frames) break;
++trace_size;
if (sp > Caml_state->top_of_stack) break;
}
return trace_size;
}
/* A backtrace_slot is either a debuginfo or a frame_descr* */
#define Slot_is_debuginfo(s) ((uintnat)(s) & 2)
#define Debuginfo_slot(s) ((debuginfo)((uintnat)(s) - 2))
@ -128,27 +111,49 @@ intnat caml_current_callstack_size(intnat max_frames) {
#define Frame_descr_slot(s) ((frame_descr*)(s))
#define Slot_frame_descr(f) ((backtrace_slot)(f))
static debuginfo debuginfo_extract(frame_descr* d, int alloc_idx);
void caml_current_callstack_write(value trace, int alloc_idx)
#define Default_callstack_size 32
intnat caml_collect_current_callstack(value** ptrace, intnat* plen,
intnat max_frames, int alloc_idx)
{
uintnat pc = Caml_state->last_return_address;
char * sp = Caml_state->bottom_of_stack;
intnat trace_pos = 0, trace_size = Wosize_val(trace);
intnat trace_pos = 0;
if (alloc_idx >= 0 && trace_size > 0) {
if (max_frames <= 0) return 0;
if (*plen == 0) {
value* trace =
caml_stat_alloc_noexc(Default_callstack_size * sizeof(value));
if (trace == NULL) return 0;
*ptrace = trace;
*plen = Default_callstack_size;
}
if (alloc_idx >= 0) {
/* First frame has a Comballoc selector */
frame_descr * descr = caml_next_frame_descriptor(&pc, &sp);
debuginfo info = debuginfo_extract(descr, alloc_idx);
debuginfo info;
if (descr == NULL) return 0;
info = debuginfo_extract(descr, alloc_idx);
CAMLassert(((uintnat)info & 3) == 0);
Field(trace, 0) = Val_backtrace_slot(Slot_debuginfo(info));
trace_pos++;
(*ptrace)[trace_pos++] = Val_backtrace_slot(Slot_debuginfo(info));
}
for (; trace_pos < trace_size; trace_pos++) {
while (trace_pos < max_frames) {
frame_descr * descr = caml_next_frame_descriptor(&pc, &sp);
CAMLassert(descr != NULL && ((uintnat)descr & 3) == 0);
/* [Val_backtrace_slot(...)] is always a long, no need to call
[caml_modify]. */
Field(trace, trace_pos) = Val_backtrace_slot(Slot_frame_descr(descr));
if (descr == NULL) break;
CAMLassert(((uintnat)descr & 3) == 0);
if (trace_pos == *plen) {
intnat new_len = *plen * 2;
value * trace = caml_stat_resize_noexc(*ptrace, new_len * sizeof(value));
if (trace == NULL) break;
*ptrace = trace;
*plen = new_len;
}
(*ptrace)[trace_pos++] = Val_backtrace_slot(Slot_frame_descr(descr));
}
return trace_pos;
}
static debuginfo debuginfo_extract(frame_descr* d, int alloc_idx)

View File

@ -90,33 +90,21 @@ value caml_remove_debug_info(code_t start);
* It defines the [caml_stash_backtrace] function, which is called to quickly
* fill the backtrace buffer by walking the stack when an exception is raised.
*
* It also defines the two following functions, which makes it possible
* to store upto [max_frames_value] frames of the current call
* stack. This is not used in an exception-raising context, but only
* when the user requests to save the trace (hopefully less often), or
* the context of profiling. Instead of using a bounded buffer as
* [caml_stash_backtrace], we first traverse the stack to compute the
* right size, then allocate space for the trace.
* It also defines [caml_collect_current_callstack], which stores up
* to [max_frames] frames of the current call stack into the
* statically allocated buffer [*pbuffer] of length [*plen]. If the
* buffer is not long enough, it will be reallocated. The number of
* frames collected is returned.
*
* The first function, [caml_current_callstack_size] computes the size
* (in words) of the needed buffer, while the second actually writes
* the call stack to the buffer as an object of type
* [raw_backtrace]. It should always be called with a buffer of the
* size predicted by [caml_current_callstack_size]. The reason we use
* two separated functions is to allow using either [caml_alloc] (for
* performance) or [caml_alloc_shr] (when we need to avoid a call to
* the GC, in memprof.c).
*
* The alloc_idx parameter to [caml_current_callstack_write] is used
* to select between the backtraces of different allocation sites which
* were combined by Comballoc. Passing -1 here means the caller doesn't
* care which is chosen.
* The alloc_idx parameter is used to select between the backtraces of
* different allocation sites which were combined by Comballoc.
* Passing -1 here means the caller doesn't care which is chosen.
*
* We use `intnat` for max_frames because, were it only `int`, passing
* `max_int` from the OCaml side would overflow on 64bits machines. */
intnat caml_current_callstack_size(intnat max_frames);
void caml_current_callstack_write(value trace, int alloc_idx);
intnat caml_collect_current_callstack(value** pbuffer, intnat* plen,
intnat max_frames, int alloc_idx);
#endif /* CAML_INTERNALS */

View File

@ -63,6 +63,10 @@ static int init = 0;
/* Whether memprof is started. */
static int started = 0;
/* Buffer used to compute backtraces */
static value* callstack_buffer = NULL;
static intnat callstack_buffer_len = 0;
/**** Statistical sampling ****/
static double mt_generate_uniform(void)
@ -147,10 +151,20 @@ static uintnat mt_generate_binom(uintnat len)
static value capture_callstack_postponed()
{
value res;
uintnat wosize = caml_current_callstack_size(callstack_size);
if (wosize == 0) return Atom(0);
res = caml_alloc_shr_no_track_noexc(wosize, 0);
if (res != 0) caml_current_callstack_write(res, -1);
intnat callstack_len =
caml_collect_current_callstack(&callstack_buffer, &callstack_buffer_len,
callstack_size, -1);
if (callstack_len == 0)
return Atom(0);
res = caml_alloc_shr_no_track_noexc(callstack_len, 0);
if (res == 0)
return Atom(0);
memcpy(Op_val(res), callstack_buffer, sizeof(value) * callstack_len);
if (callstack_buffer_len > 256 && callstack_buffer_len > callstack_len * 8) {
caml_stat_free(callstack_buffer);
callstack_buffer = NULL;
callstack_buffer_len = 0;
}
return res;
}
@ -161,10 +175,17 @@ static value capture_callstack_postponed()
static value capture_callstack(int alloc_idx)
{
value res;
uintnat wosize = caml_current_callstack_size(callstack_size);
intnat callstack_len =
caml_collect_current_callstack(&callstack_buffer, &callstack_buffer_len,
callstack_size, alloc_idx);
CAMLassert(caml_memprof_suspended);
res = caml_alloc(wosize, 0);
caml_current_callstack_write(res, alloc_idx);
res = caml_alloc(callstack_len, 0);
memcpy(Op_val(res), callstack_buffer, sizeof(value) * callstack_len);
if (callstack_buffer_len > 256 && callstack_buffer_len > callstack_len * 8) {
caml_stat_free(callstack_buffer);
callstack_buffer = NULL;
callstack_buffer_len = 0;
}
return res;
}
@ -841,6 +862,9 @@ void caml_memprof_shutdown(void) {
caml_stat_free(trackst.entries);
trackst.entries = NULL;
trackst.alloc_len = 0;
caml_stat_free(callstack_buffer);
callstack_buffer = NULL;
callstack_buffer_len = 0;
}
CAMLprim value caml_memprof_start(value lv, value szv,
@ -926,5 +950,9 @@ CAMLprim value caml_memprof_stop(value unit)
caml_remove_generational_global_root(&callback_dealloc_minor);
caml_remove_generational_global_root(&callback_dealloc_major);
caml_stat_free(callstack_buffer);
callstack_buffer = NULL;
callstack_buffer_len = 0;
return Val_unit;
}