ocaml/asmrun/spacetime.c

1097 lines
34 KiB
C

/**************************************************************************/
/* */
/* OCaml */
/* */
/* Mark Shinwell and Leo White, Jane Street Europe */
/* */
/* Copyright 2013--2016, Jane Street Group, LLC */
/* */
/* All rights reserved. This file is distributed under the terms of */
/* the GNU Lesser General Public License version 2.1, with the */
/* special exception on linking described in the file LICENSE. */
/* */
/**************************************************************************/
#define CAML_INTERNALS
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <math.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include "caml/config.h"
#ifdef HAS_UNISTD
#include <unistd.h>
#endif
#include "caml/alloc.h"
#include "caml/backtrace_prim.h"
#include "caml/fail.h"
#include "caml/gc.h"
#include "caml/intext.h"
#include "caml/major_gc.h"
#include "caml/memory.h"
#include "caml/minor_gc.h"
#include "caml/misc.h"
#include "caml/mlvalues.h"
#include "caml/roots.h"
#include "caml/signals.h"
#include "caml/stack.h"
#include "caml/sys.h"
#include "spacetime.h"
#ifdef WITH_SPACETIME
/* We force "noinline" in certain places to be sure we know how many
frames there will be on the stack. */
#define NOINLINE __attribute__((noinline))
#ifdef HAS_LIBUNWIND
#define UNW_LOCAL_ONLY
#include "libunwind.h"
#endif
static int automatic_snapshots = 0;
static double snapshot_interval = 0.0;
static double next_snapshot_time = 0.0;
static struct channel *snapshot_channel;
static int pid_when_snapshot_channel_opened;
extern value caml_spacetime_debug(value);
static char* start_of_free_node_block;
static char* end_of_free_node_block;
typedef struct per_thread {
value* trie_node_root;
value* finaliser_trie_node_root;
struct per_thread* next;
} per_thread;
/* List of tries corresponding to threads that have been created. */
/* CR-soon mshinwell: just include the main trie in this list. */
static per_thread* per_threads = NULL;
static int num_per_threads = 0;
/* [caml_spacetime_shapes] is defined in the startup file. */
extern uint64_t* caml_spacetime_shapes;
uint64_t** caml_spacetime_static_shape_tables = NULL;
shape_table* caml_spacetime_dynamic_shape_tables = NULL;
static uintnat caml_spacetime_profinfo = (uintnat) 0;
static value caml_spacetime_trie_root = Val_unit;
value* caml_spacetime_trie_node_ptr = &caml_spacetime_trie_root;
static value caml_spacetime_finaliser_trie_root_main_thread = Val_unit;
value* caml_spacetime_finaliser_trie_root
= &caml_spacetime_finaliser_trie_root_main_thread;
/* CR-someday mshinwell: think about thread safety of the manipulation of
this list for multicore */
allocation_point* caml_all_allocation_points = NULL;
static const uintnat chunk_size = 1024 * 1024;
static void reinitialise_free_node_block(void)
{
size_t index;
start_of_free_node_block = (char*) malloc(chunk_size);
end_of_free_node_block = start_of_free_node_block + chunk_size;
for (index = 0; index < chunk_size / sizeof(value); index++) {
((value*) start_of_free_node_block)[index] = Val_unit;
}
}
#ifndef O_BINARY
#define O_BINARY 0
#endif
#if defined (_WIN32) || defined (_WIN64)
extern value val_process_id;
#endif
static uint32_t version_number = 0;
static uint32_t magic_number_base = 0xace00ace;
static void caml_spacetime_write_magic_number_internal(struct channel* chan)
{
value magic_number =
Val_long(((uint64_t) magic_number_base)
| (((uint64_t) version_number) << 32));
Lock(chan);
caml_output_val(chan, magic_number, Val_long(0));
Unlock(chan);
}
CAMLprim value caml_spacetime_write_magic_number(value v_channel)
{
caml_spacetime_write_magic_number_internal(Channel(v_channel));
return Val_unit;
}
static char* automatic_snapshot_dir;
static void open_snapshot_channel(void)
{
int fd;
char filename[8192];
int pid;
#if defined (_WIN32) || defined (_WIN64)
pid = Int_val(val_process_id);
#else
pid = getpid();
#endif
snprintf(filename, 8192, "%s/spacetime-%d", automatic_snapshot_dir, pid);
filename[8191] = '\0';
fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
if (fd == -1) {
automatic_snapshots = 0;
}
else {
snapshot_channel = caml_open_descriptor_out(fd);
pid_when_snapshot_channel_opened = pid;
caml_spacetime_write_magic_number_internal(snapshot_channel);
}
}
static void maybe_reopen_snapshot_channel(void)
{
/* This function should be used before writing to the automatic snapshot
channel. It detects whether we have forked since the channel was opened.
If so, we close the old channel (ignoring any errors just in case the
old fd has been closed, e.g. in a double-fork situation where the middle
process has a loop to manually close all fds and no Spacetime snapshot
was written during that time) and then open a new one. */
int pid;
#if defined (_WIN32) || defined (_WIN64)
pid = Int_val(val_process_id);
#else
pid = getpid();
#endif
if (pid != pid_when_snapshot_channel_opened) {
caml_close_channel(snapshot_channel);
open_snapshot_channel();
}
}
extern void caml_spacetime_automatic_save(void);
void caml_spacetime_initialize(void)
{
/* Note that this is called very early (even prior to GC initialisation). */
char *ap_interval;
reinitialise_free_node_block();
caml_spacetime_static_shape_tables = &caml_spacetime_shapes;
ap_interval = getenv ("OCAML_SPACETIME_INTERVAL");
if (ap_interval != NULL) {
unsigned int interval = 0;
sscanf(ap_interval, "%u", &interval);
if (interval != 0) {
double time;
char cwd[4096];
char* user_specified_automatic_snapshot_dir;
int dir_ok = 1;
user_specified_automatic_snapshot_dir =
getenv("OCAML_SPACETIME_SNAPSHOT_DIR");
if (user_specified_automatic_snapshot_dir == NULL) {
#ifdef HAS_GETCWD
if (getcwd(cwd, sizeof(cwd)) == NULL) {
dir_ok = 0;
}
#else
if (getwd(cwd) == NULL) {
dir_ok = 0;
}
#endif
if (dir_ok) {
automatic_snapshot_dir = strdup(cwd);
}
}
else {
automatic_snapshot_dir =
strdup(user_specified_automatic_snapshot_dir);
}
if (dir_ok) {
automatic_snapshots = 1;
open_snapshot_channel();
if (automatic_snapshots) {
snapshot_interval = interval / 1e3;
time = caml_sys_time_unboxed(Val_unit);
next_snapshot_time = time + snapshot_interval;
atexit(&caml_spacetime_automatic_save);
}
}
}
}
}
void caml_spacetime_register_shapes(void* dynlinked_table)
{
shape_table* table;
table = (shape_table*) malloc(sizeof(shape_table));
if (table == NULL) {
fprintf(stderr, "Out of memory whilst registering shape table");
abort();
}
table->table = (uint64_t*) dynlinked_table;
table->next = caml_spacetime_dynamic_shape_tables;
caml_spacetime_dynamic_shape_tables = table;
}
CAMLprim value caml_spacetime_trie_is_initialized (value v_unit)
{
return (caml_spacetime_trie_root == Val_unit) ? Val_false : Val_true;
}
CAMLprim value caml_spacetime_get_trie_root (value v_unit)
{
return caml_spacetime_trie_root;
}
void caml_spacetime_register_thread(
value* trie_node_root, value* finaliser_trie_node_root)
{
per_thread* thr;
thr = (per_thread*) malloc(sizeof(per_thread));
if (thr == NULL) {
fprintf(stderr, "Out of memory while registering thread for profiling\n");
abort();
}
thr->next = per_threads;
per_threads = thr;
thr->trie_node_root = trie_node_root;
thr->finaliser_trie_node_root = finaliser_trie_node_root;
/* CR-soon mshinwell: record thread ID (and for the main thread too) */
num_per_threads++;
}
static void caml_spacetime_save_event_internal (value v_time_opt,
struct channel* chan,
value v_event_name)
{
value v_time;
double time_override = 0.0;
int use_time_override = 0;
if (Is_block(v_time_opt)) {
time_override = Double_field(Field(v_time_opt, 0), 0);
use_time_override = 1;
}
v_time = caml_spacetime_timestamp(time_override, use_time_override);
Lock(chan);
caml_output_val(chan, Val_long(2), Val_long(0));
caml_output_val(chan, v_event_name, Val_long(0));
caml_extern_allow_out_of_heap = 1;
caml_output_val(chan, v_time, Val_long(0));
caml_extern_allow_out_of_heap = 0;
Unlock(chan);
caml_stat_free(Hp_val(v_time));
}
CAMLprim value caml_spacetime_save_event (value v_time_opt,
value v_channel,
value v_event_name)
{
struct channel* chan = Channel(v_channel);
caml_spacetime_save_event_internal(v_time_opt, chan, v_event_name);
return Val_unit;
}
void save_trie (struct channel *chan, double time_override,
int use_time_override)
{
value v_time, v_frames, v_shapes;
int num_marshalled = 0;
per_thread* thr = per_threads;
Lock(chan);
caml_output_val(chan, Val_long(1), Val_long(0));
v_time = caml_spacetime_timestamp(time_override, use_time_override);
v_frames = caml_spacetime_frame_table();
v_shapes = caml_spacetime_shape_table();
caml_extern_allow_out_of_heap = 1;
caml_output_val(chan, v_time, Val_long(0));
caml_output_val(chan, v_frames, Val_long(0));
caml_output_val(chan, v_shapes, Val_long(0));
caml_extern_allow_out_of_heap = 0;
caml_output_val(chan, Val_long(num_per_threads + 1), Val_long(0));
/* Marshal both the main and finaliser tries, for all threads that have
been created, to an [out_channel]. This can be done by using the
extern.c code as usual, since the trie looks like standard OCaml values;
but we must allow it to traverse outside the heap. */
caml_extern_allow_out_of_heap = 1;
caml_output_val(chan, caml_spacetime_trie_root, Val_long(0));
caml_output_val(chan,
caml_spacetime_finaliser_trie_root_main_thread, Val_long(0));
while (thr != NULL) {
caml_output_val(chan, *(thr->trie_node_root), Val_long(0));
caml_output_val(chan, *(thr->finaliser_trie_node_root),
Val_long(0));
thr = thr->next;
num_marshalled++;
}
caml_extern_allow_out_of_heap = 0;
Assert(num_marshalled == num_per_threads);
Unlock(chan);
}
CAMLprim value caml_spacetime_save_trie (value v_time_opt, value v_channel)
{
struct channel* channel = Channel(v_channel);
double time_override = 0.0;
int use_time_override = 0;
if (Is_block(v_time_opt)) {
time_override = Double_field(Field(v_time_opt, 0), 0);
use_time_override = 1;
}
save_trie(channel, time_override, use_time_override);
return Val_unit;
}
c_node_type caml_spacetime_classify_c_node(c_node* node)
{
return (node->pc & 2) ? CALL : ALLOCATION;
}
c_node* caml_spacetime_c_node_of_stored_pointer(value node_stored)
{
Assert(node_stored == Val_unit || Is_c_node(node_stored));
return (node_stored == Val_unit) ? NULL : (c_node*) Hp_val(node_stored);
}
c_node* caml_spacetime_c_node_of_stored_pointer_not_null(
value node_stored)
{
Assert(Is_c_node(node_stored));
return (c_node*) Hp_val(node_stored);
}
value caml_spacetime_stored_pointer_of_c_node(c_node* c_node)
{
value node;
Assert(c_node != NULL);
node = Val_hp(c_node);
Assert(Is_c_node(node));
return node;
}
static int pc_inside_c_node_matches(c_node* node, void* pc)
{
return Decode_c_node_pc(node->pc) == pc;
}
static value allocate_uninitialized_ocaml_node(int size_including_header)
{
void* node;
uintnat size;
Assert(size_including_header >= 3);
node = caml_stat_alloc(sizeof(uintnat) * size_including_header);
size = size_including_header * sizeof(value);
node = (void*) start_of_free_node_block;
if (end_of_free_node_block - start_of_free_node_block < size) {
reinitialise_free_node_block();
node = (void*) start_of_free_node_block;
Assert(end_of_free_node_block - start_of_free_node_block >= size);
}
start_of_free_node_block += size;
/* We don't currently rely on [uintnat] alignment, but we do need some
alignment, so just be sure. */
Assert (((uintnat) node) % sizeof(uintnat) == 0);
return Val_hp(node);
}
static value find_tail_node(value node, void* callee)
{
/* Search the tail chain within [node] (which corresponds to an invocation
of a caller of [callee]) to determine whether it contains a tail node
corresponding to [callee]. Returns any such node, or [Val_unit] if no
such node exists. */
value starting_node;
value pc;
value found = Val_unit;
starting_node = node;
pc = Encode_node_pc(callee);
do {
Assert(Is_ocaml_node(node));
if (Node_pc(node) == pc) {
found = node;
}
else {
node = Tail_link(node);
}
} while (found == Val_unit && starting_node != node);
return found;
}
CAMLprim value caml_spacetime_allocate_node(
int size_including_header, void* pc, value* node_hole)
{
value node;
value caller_node = Val_unit;
node = *node_hole;
/* The node hole should either contain [Val_unit], indicating that this
function was not tail called and we have not been to this point in the
trie before; or it should contain a value encoded using
[Encoded_tail_caller_node] that points at the node of a caller
that tail called the current function. (Such a value is necessary to
be able to find the start of the caller's node, and hence its tail
chain, so we as a tail-called callee can link ourselves in.) */
Assert(Is_tail_caller_node_encoded(node));
if (node != Val_unit) {
value tail_node;
/* The callee was tail called. Find whether there already exists a node
for it in the tail call chain within the caller's node. The caller's
node must always be an OCaml node. */
caller_node = Decode_tail_caller_node(node);
tail_node = find_tail_node(caller_node, pc);
if (tail_node != Val_unit) {
/* This tail calling sequence has happened before; just fill the hole
with the existing node and return. */
*node_hole = tail_node;
return 0; /* indicates an existing node was returned */
}
}
node = allocate_uninitialized_ocaml_node(size_including_header);
Hd_val(node) =
Make_header(size_including_header - 1, OCaml_node_tag, Caml_black);
Assert((((uintnat) pc) % 1) == 0);
Node_pc(node) = Encode_node_pc(pc);
/* If the callee was tail called, then the tail link field will link this
new node into an existing tail chain. Otherwise, it is initialized with
the empty tail chain, i.e. the one pointing directly at [node]. */
if (caller_node == Val_unit) {
Tail_link(node) = node;
}
else {
Tail_link(node) = Tail_link(caller_node);
Tail_link(caller_node) = node;
}
/* The callee node pointers for direct tail call points are
initialized from code emitted by the OCaml compiler. This is done to
avoid having to pass this function a description of which nodes are
direct tail call points. (We cannot just count them and put them at the
beginning of the node because we need the indexes of elements within the
node during instruction selection before we have found all call points.)
All other fields have already been initialised by
[reinitialise_free_node_block].
*/
*node_hole = node;
return 1; /* indicates a new node was created */
}
static c_node* allocate_c_node(void)
{
c_node* node;
size_t index;
node = (c_node*) start_of_free_node_block;
if (end_of_free_node_block - start_of_free_node_block < sizeof(c_node)) {
reinitialise_free_node_block();
node = (c_node*) start_of_free_node_block;
Assert(end_of_free_node_block - start_of_free_node_block
>= sizeof(c_node));
}
start_of_free_node_block += sizeof(c_node);
Assert((sizeof(c_node) % sizeof(uintnat)) == 0);
/* CR-soon mshinwell: remove this and pad the structure properly */
for (index = 0; index < sizeof(c_node) / sizeof(value); index++) {
((value*) node)[index] = Val_unit;
}
node->gc_header =
Make_header(sizeof(c_node)/sizeof(uintnat) - 1, C_node_tag, Caml_black);
node->data.callee_node = Val_unit;
node->next = Val_unit;
return node;
}
/* Since a given indirect call site either always yields tail calls or
always yields non-tail calls, the output of
[caml_spacetime_indirect_node_hole_ptr] is uniquely determined by its
first two arguments (the callee and the node hole). We cache these
to increase performance of recursive functions containing an indirect
call (e.g. [List.map] when not inlined). */
static void* last_indirect_node_hole_ptr_callee;
static value* last_indirect_node_hole_ptr_node_hole;
static value* last_indirect_node_hole_ptr_result;
CAMLprim value* caml_spacetime_indirect_node_hole_ptr
(void* callee, value* node_hole, value caller_node)
{
/* Find the address of the node hole for an indirect call to [callee].
If [caller_node] is not [Val_unit], it is a pointer to the caller's
node, and indicates that this is a tail call site. */
c_node* c_node;
value encoded_callee;
if (callee == last_indirect_node_hole_ptr_callee
&& node_hole == last_indirect_node_hole_ptr_node_hole) {
return last_indirect_node_hole_ptr_result;
}
last_indirect_node_hole_ptr_callee = callee;
last_indirect_node_hole_ptr_node_hole = node_hole;
encoded_callee = Encode_c_node_pc_for_call(callee);
while (*node_hole != Val_unit) {
Assert(((uintnat) *node_hole) % sizeof(value) == 0);
c_node = caml_spacetime_c_node_of_stored_pointer_not_null(*node_hole);
Assert(c_node != NULL);
Assert(caml_spacetime_classify_c_node(c_node) == CALL);
if (c_node->pc == encoded_callee) {
last_indirect_node_hole_ptr_result = &(c_node->data.callee_node);
return last_indirect_node_hole_ptr_result;
}
else {
node_hole = &c_node->next;
}
}
c_node = allocate_c_node();
c_node->pc = encoded_callee;
if (caller_node != Val_unit) {
/* This is a tail call site.
Perform the initialization equivalent to that emitted by
[Spacetime.code_for_function_prologue] for direct tail call
sites. */
c_node->data.callee_node = Encode_tail_caller_node(caller_node);
}
*node_hole = caml_spacetime_stored_pointer_of_c_node(c_node);
Assert(((uintnat) *node_hole) % sizeof(value) == 0);
Assert(*node_hole != Val_unit);
last_indirect_node_hole_ptr_result = &(c_node->data.callee_node);
return last_indirect_node_hole_ptr_result;
}
/* Some notes on why caml_call_gc doesn't need a distinguished node.
(Remember that thread switches are irrelevant here because each thread
has its own trie.)
caml_call_gc only invokes OCaml functions in the following circumstances:
1. running an OCaml finaliser;
2. executing an OCaml signal handler.
Both of these are done on the finaliser trie. Furthermore, both of
these invocations start via caml_callback; the code in this file for
handling that (caml_spacetime_c_to_ocaml) correctly copes with that by
attaching a single "caml_start_program" node that can cope with any
number of indirect OCaml calls from that point.
caml_call_gc may also invoke C functions that cause allocation. All of
these (assuming libunwind support is present) will cause a chain of
c_node structures to be attached to the trie, starting at the node hole
passed to caml_call_gc from OCaml code. These structures are extensible
and can thus accommodate any number of C backtraces leading from
caml_call_gc.
*/
/* CR-soon mshinwell: it might in fact be the case now that nothing called
from caml_call_gc will do any allocation that ends up on the trie. We
can revisit this after the first release. */
static NOINLINE void* find_trie_node_from_libunwind(int for_allocation,
uintnat wosize, struct ext_table** cached_frames)
{
#ifdef HAS_LIBUNWIND
/* Given that [caml_last_return_address] is the most recent call site in
OCaml code, and that we are now in C (or other) code called from that
site, obtain a backtrace using libunwind and graft the most recent
portion (everything back to but not including [caml_last_return_address])
onto the trie. See the important comment below regarding the fact that
call site, and not callee, addresses are recorded during this process.
If [for_allocation] is non-zero, the final node recorded will be for
an allocation, and the returned pointer is to the allocation node.
Otherwise, no node is recorded for the innermost frame, and the
returned pointer is a pointer to the *node hole* where a node for that
frame should be attached.
If [for_allocation] is non-zero then [wosize] must give the size in
words, excluding the header, of the value being allocated.
If [cached_frames != NULL] then:
1. If [*cached_frames] is NULL then save the captured backtrace in a
newly-allocated table and store the pointer to that table in
[*cached_frames];
2. Otherwise use [*cached_frames] as the unwinding information.
The intention is that when the context is known (e.g. a function such
as [caml_make_vect] known to have been directly invoked from OCaml),
we can avoid expensive calls to libunwind.
*/
unw_cursor_t cur;
unw_context_t ctx;
int ret;
int innermost_frame;
int frame;
static struct ext_table frames_local;
struct ext_table* frames;
static int ext_table_initialised = 0;
int have_frames_already = 0;
value* node_hole;
c_node* node = NULL;
int initial_table_size = 1000;
int must_initialise_node_for_allocation = 0;
if (!cached_frames) {
if (!ext_table_initialised) {
caml_ext_table_init(&frames_local, initial_table_size);
ext_table_initialised = 1;
}
else {
caml_ext_table_clear(&frames_local, 0);
}
frames = &frames_local;
} else {
if (*cached_frames) {
frames = *cached_frames;
have_frames_already = 1;
}
else {
frames = (struct ext_table*) malloc(sizeof(struct ext_table));
if (!frames) {
caml_fatal_error("Not enough memory for ext_table allocation");
}
caml_ext_table_init(frames, initial_table_size);
*cached_frames = frames;
}
}
if (!have_frames_already) {
/* Get the stack backtrace as far as [caml_last_return_address]. */
ret = unw_getcontext(&ctx);
if (ret != UNW_ESUCCESS) {
return NULL;
}
ret = unw_init_local(&cur, &ctx);
if (ret != UNW_ESUCCESS) {
return NULL;
}
while ((ret = unw_step(&cur)) > 0) {
unw_word_t ip;
unw_get_reg(&cur, UNW_REG_IP, &ip);
if (caml_last_return_address == (uintnat) ip) {
break;
}
else {
/* Inlined some of [caml_ext_table_add] for speed. */
if (frames->size < frames->capacity) {
frames->contents[frames->size++] = (void*) ip;
} else {
caml_ext_table_add(frames, (void*) ip);
}
}
}
}
/* We always need to ignore the frames for:
#0 find_trie_node_from_libunwind
#1 caml_spacetime_c_to_ocaml
Further, if this is not an allocation point, we should not create the
node for the current C function that triggered us (i.e. frame #2). */
innermost_frame = for_allocation ? 1 : 2;
if (frames->size - 1 < innermost_frame) {
/* Insufficiently many frames (maybe no frames) returned from
libunwind; just don't do anything. */
return NULL;
}
node_hole = caml_spacetime_trie_node_ptr;
/* Note that if [node_hole] is filled, then it must point to a C node,
since it is not possible for there to be a call point in an OCaml
function that sometimes calls C and sometimes calls OCaml. */
for (frame = frames->size - 1; frame >= innermost_frame; frame--) {
c_node_type expected_type;
void* pc = frames->contents[frame];
Assert (pc != (void*) caml_last_return_address);
if (!for_allocation) {
expected_type = CALL;
}
else {
expected_type = (frame > innermost_frame ? CALL : ALLOCATION);
}
if (*node_hole == Val_unit) {
node = allocate_c_node();
/* Note: for CALL nodes, the PC is the program counter at each call
site. We do not store program counter addresses of the start of
callees, unlike for OCaml nodes. This means that some trie nodes
will become conflated. These can be split during post-processing by
working out which function each call site was in. */
node->pc = (expected_type == CALL ? Encode_c_node_pc_for_call(pc)
: Encode_c_node_pc_for_alloc_point(pc));
*node_hole = caml_spacetime_stored_pointer_of_c_node(node);
if (expected_type == ALLOCATION) {
must_initialise_node_for_allocation = 1;
}
}
else {
c_node* prev;
int found = 0;
node = caml_spacetime_c_node_of_stored_pointer_not_null(*node_hole);
Assert(node != NULL);
Assert(node->next == Val_unit
|| (((uintnat) (node->next)) % sizeof(value) == 0));
prev = NULL;
while (!found && node != NULL) {
if (caml_spacetime_classify_c_node(node) == expected_type
&& pc_inside_c_node_matches(node, pc)) {
found = 1;
}
else {
prev = node;
node = caml_spacetime_c_node_of_stored_pointer(node->next);
}
}
if (!found) {
Assert(prev != NULL);
node = allocate_c_node();
node->pc = (expected_type == CALL ? Encode_c_node_pc_for_call(pc)
: Encode_c_node_pc_for_alloc_point(pc));
if (expected_type == ALLOCATION) {
must_initialise_node_for_allocation = 1;
}
prev->next = caml_spacetime_stored_pointer_of_c_node(node);
}
}
Assert(node != NULL);
Assert(caml_spacetime_classify_c_node(node) == expected_type);
Assert(pc_inside_c_node_matches(node, pc));
node_hole = &node->data.callee_node;
}
if (must_initialise_node_for_allocation) {
caml_spacetime_profinfo++;
if (caml_spacetime_profinfo > PROFINFO_MASK) {
/* Profiling counter overflow. */
caml_spacetime_profinfo = PROFINFO_MASK;
}
node->data.allocation.profinfo =
Make_header_with_profinfo(
/* "-1" because [c_node] has the GC header as its first
element. */
offsetof(c_node, data.allocation.count)/sizeof(value) - 1,
Infix_tag,
Caml_black,
caml_spacetime_profinfo);
node->data.allocation.count = Val_long(0);
/* Add the new allocation point into the linked list of all allocation
points. */
if (caml_all_allocation_points != NULL) {
node->data.allocation.next =
(value) &caml_all_allocation_points->count;
} else {
node->data.allocation.next = Val_unit;
}
caml_all_allocation_points = &node->data.allocation;
}
if (for_allocation) {
Assert(caml_spacetime_classify_c_node(node) == ALLOCATION);
Assert(caml_spacetime_c_node_of_stored_pointer(node->next) != node);
Assert(Profinfo_hd(node->data.allocation.profinfo) > 0);
node->data.allocation.count =
Val_long(Long_val(node->data.allocation.count) + (1 + wosize));
}
Assert(node->next != (value) NULL);
return for_allocation ? (void*) node : (void*) node_hole;
#else
return NULL;
#endif
}
void caml_spacetime_c_to_ocaml(void* ocaml_entry_point,
void* identifying_pc_for_caml_start_program)
{
/* Called in [caml_start_program] and [caml_callback*] when we are about
to cross from C into OCaml. [ocaml_entry_point] is the branch target.
This situation is handled by ensuring the presence of a new OCaml node
for the callback veneer; the node contains a single indirect call point
which accumulates the [ocaml_entry_point]s.
The layout of the node is described in the "system shape table"; see
asmrun/amd64.S.
*/
value node;
/* Update the trie with the current backtrace, as far back as
[caml_last_return_address], and leave the node hole pointer at
the correct place for attachment of a [caml_start_program] node. */
#ifdef HAS_LIBUNWIND
value* node_temp;
node_temp = (value*) find_trie_node_from_libunwind(0, 0, NULL);
if (node_temp != NULL) {
caml_spacetime_trie_node_ptr = node_temp;
}
#endif
if (*caml_spacetime_trie_node_ptr == Val_unit) {
uintnat size_including_header;
size_including_header =
1 /* GC header */ + Node_num_header_words + Indirect_num_fields;
node = allocate_uninitialized_ocaml_node(size_including_header);
Hd_val(node) =
Make_header(size_including_header - 1, OCaml_node_tag, Caml_black);
Assert((((uintnat) identifying_pc_for_caml_start_program) % 1) == 0);
Node_pc(node) = Encode_node_pc(identifying_pc_for_caml_start_program);
Tail_link(node) = node;
Indirect_pc_linked_list(node, Node_num_header_words) = Val_unit;
*caml_spacetime_trie_node_ptr = node;
}
else {
node = *caml_spacetime_trie_node_ptr;
/* If there is a node here already, it should never be an initialized
(but as yet unused) tail call point, since calls from OCaml into C
are never tail calls (and no C -> C call is marked as tail). */
Assert(!Is_tail_caller_node_encoded(node));
}
Assert(Is_ocaml_node(node));
Assert(Decode_node_pc(Node_pc(node))
== identifying_pc_for_caml_start_program);
Assert(Tail_link(node) == node);
Assert(Wosize_val(node) == Node_num_header_words + Indirect_num_fields);
/* Search the node to find the node hole corresponding to the indirect
call to the OCaml function. */
caml_spacetime_trie_node_ptr =
caml_spacetime_indirect_node_hole_ptr(
ocaml_entry_point,
&Indirect_pc_linked_list(node, Node_num_header_words),
Val_unit);
Assert(*caml_spacetime_trie_node_ptr == Val_unit
|| Is_ocaml_node(*caml_spacetime_trie_node_ptr));
}
extern void caml_garbage_collection(void); /* signals_asm.c */
extern void caml_array_bound_error(void); /* fail.c */
CAMLprim uintnat caml_spacetime_generate_profinfo (void* profinfo_words,
uintnat index_within_node)
{
/* Called from code that creates a value's header inside an OCaml
function. */
value node;
uintnat profinfo;
caml_spacetime_profinfo++;
if (caml_spacetime_profinfo > PROFINFO_MASK) {
/* Profiling counter overflow. */
caml_spacetime_profinfo = PROFINFO_MASK;
}
profinfo = caml_spacetime_profinfo;
/* CR-someday mshinwell: we could always use the [struct allocation_point]
overlay instead of the macros now. */
/* [node] isn't really a node; it points into the middle of
one---specifically to the "profinfo" word of an allocation point.
It's done like this to avoid re-calculating the place in the node
(which already has to be done in the OCaml-generated code run before
this function). */
node = (value) profinfo_words;
Assert(Alloc_point_profinfo(node, 0) == Val_unit);
/* The profinfo value is stored shifted to reduce the number of
instructions required on the OCaml side. It also enables us to use
[Infix_tag] to obtain valid value pointers into the middle of nodes,
which is used for the linked list of all allocation points. */
profinfo = Make_header_with_profinfo(
index_within_node, Infix_tag, Caml_black, profinfo);
Assert(!Is_block(profinfo));
Alloc_point_profinfo(node, 0) = profinfo;
/* The count is set to zero by the initialisation when the node was
created (see above). */
Assert(Alloc_point_count(node, 0) == Val_long(0));
/* Add the new allocation point into the linked list of all allocation
points. */
if (caml_all_allocation_points != NULL) {
Alloc_point_next_ptr(node, 0) = (value) &caml_all_allocation_points->count;
}
else {
Assert(Alloc_point_next_ptr(node, 0) == Val_unit);
}
caml_all_allocation_points = (allocation_point*) node;
return profinfo;
}
uintnat caml_spacetime_my_profinfo (struct ext_table** cached_frames,
uintnat wosize)
{
/* Return the profinfo value that should be written into a value's header
during an allocation from C. This may necessitate extending the trie
with information obtained from libunwind. */
c_node* node;
uintnat profinfo = 0;
node = find_trie_node_from_libunwind(1, wosize, cached_frames);
if (node != NULL) {
profinfo = ((uintnat) (node->data.allocation.profinfo)) >> PROFINFO_SHIFT;
}
return profinfo; /* N.B. not shifted by PROFINFO_SHIFT */
}
void caml_spacetime_automatic_snapshot (void)
{
if (automatic_snapshots) {
double start_time, end_time;
start_time = caml_sys_time_unboxed(Val_unit);
if (start_time >= next_snapshot_time) {
maybe_reopen_snapshot_channel();
caml_spacetime_save_snapshot(snapshot_channel, 0.0, 0);
end_time = caml_sys_time_unboxed(Val_unit);
next_snapshot_time = end_time + snapshot_interval;
}
}
}
CAMLprim value caml_spacetime_save_event_for_automatic_snapshots
(value v_event_name)
{
if (automatic_snapshots) {
maybe_reopen_snapshot_channel();
caml_spacetime_save_event_internal (Val_unit, snapshot_channel,
v_event_name);
}
return Val_unit;
}
void caml_spacetime_automatic_save (void)
{
/* Called from [atexit]. */
if (automatic_snapshots) {
automatic_snapshots = 0;
maybe_reopen_snapshot_channel();
save_trie(snapshot_channel, 0.0, 0);
caml_flush(snapshot_channel);
caml_close_channel(snapshot_channel);
}
}
CAMLprim value caml_spacetime_enabled (value v_unit)
{
return Val_true;
}
#else
/* Functions for when the compiler was not configured with "-spacetime". */
CAMLprim value caml_spacetime_write_magic_number(value v_channel)
{
return Val_unit;
}
CAMLprim value caml_spacetime_enabled (value v_unit)
{
return Val_false;
}
CAMLprim value caml_spacetime_save_event (value v_time_opt,
value v_channel,
value v_event_name)
{
return Val_unit;
}
CAMLprim value caml_spacetime_save_event_for_automatic_snapshots
(value v_event_name)
{
return Val_unit;
}
CAMLprim value caml_spacetime_save_trie (value ignored)
{
return Val_unit;
}
#endif