More efficient management of code fragments (#9654)

* Introducing codefrag: a new runtime module to work with code fragments

This module collects all the operations on code fragments performed in
various places of the runtime systems.  Applies both to bytecode and
to native code.

The implementation is based on skiplists, so that "lookup fragment by
PC" and "lookup fragment by number" are efficient (logarithmic in the
number of code fragments).  "Lookup fragment by digest" remains
linear-time.

The new module also improves the handling of digests: now it is
possible to mark a code fragment as "no digest" i.e. not marshal-able.

* Use the new "codefrag" runtime module for marshaling and for the
  debugger interface

Replace the previous handling of code fragments with calls to the
functions provided by the "codefrag" runtime module.
master
Xavier Leroy 2020-06-11 10:39:19 +02:00 committed by GitHub
parent b94a3776a0
commit 08e58c836e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 258 additions and 113 deletions

View File

@ -40,6 +40,11 @@ Working version
- #9634: Allow initial and repeated commas in `OCAMLRUNPARAM`.
(Nicolás Ojeda Bär, review by Gabriel Scherer)
- #9654: More efficient management of code fragments.
(Xavier Leroy, review by Jacques-Henri Jourdan, Damien Doligez, and
Stephen Dolan)
### Code generation and optimizations:
- #9441: Add RISC-V RV64G native-code backend.

View File

@ -26,7 +26,7 @@ BYTECODE_C_SOURCES := $(addsuffix .c, \
floats str array io extern intern hash sys meta parsing gc_ctrl md5 obj \
lexing callback debugger weak compact finalise custom dynlink \
spacetime_byt afl $(UNIX_OR_WIN32) bigarray main memprof domain \
skiplist)
skiplist codefrag)
NATIVE_C_SOURCES := $(addsuffix .c, \
startup_aux startup_nat main fail_nat roots_nat signals \
@ -35,7 +35,7 @@ NATIVE_C_SOURCES := $(addsuffix .c, \
lexing $(UNIX_OR_WIN32) printexc callback weak compact finalise custom \
globroots backtrace_nat backtrace dynlink_nat debugger meta \
dynlink clambda_checks spacetime_nat spacetime_snapshot afl bigarray \
memprof domain skiplist)
memprof domain skiplist codefrag)
GENERATED_HEADERS := caml/opnames.h caml/version.h caml/jumptbl.h
CONFIG_HEADERS := caml/m.h caml/s.h

80
runtime/caml/codefrag.h Normal file
View File

@ -0,0 +1,80 @@
/**************************************************************************/
/* */
/* OCaml */
/* */
/* Xavier Leroy, projet Cambium, INRIA Paris */
/* */
/* Copyright 2020 Institut National de Recherche en Informatique et */
/* en Automatique. */
/* */
/* All rights reserved. This file is distributed under the terms of */
/* the GNU Lesser General Public License version 2.1, with the */
/* special exception on linking described in the file LICENSE. */
/* */
/**************************************************************************/
/* A table of all code fragments (main program and dynlinked modules) */
#ifndef CAML_CODEFRAG_H
#define CAML_CODEFRAG_H
#ifdef CAML_INTERNALS
enum digest_status {
DIGEST_LATER, /* computed on demand */
DIGEST_NOW, /* computed by caml_register_code_fragment */
DIGEST_PROVIDED, /* passed by caller of caml_register_code_fragment */
DIGEST_IGNORE /* this code fragment is private and cannot be
identified by its digest */
};
struct code_fragment {
char *code_start;
char *code_end;
int fragnum;
unsigned char digest[16];
enum digest_status digest_status;
};
/* Register a code fragment for addresses [start] (included)
to [end] (excluded). This range of addresses is assumed
disjoint from all currently-registered code fragments.
[digest_kind] explains what digest is to be associated to the code
fragment. If [digest_kind == DIGEST_PROVIDED], the [opt_digest]
parameter points to the 16-byte digest of the code.
For all other values of [digest_kind], [opt_digest] is ignored
and should be [NULL].
The returned integer is the fragment number (fragnum) associated
with the new code fragment. */
extern int caml_register_code_fragment(char * start, char * end,
enum digest_status digest_kind,
unsigned char * opt_digest);
/* Un-register a code fragment. */
extern void caml_remove_code_fragment(struct code_fragment * cf);
/* Find the code fragment whose range of addresses contains [pc].
Returns NULL if none exists. */
extern struct code_fragment * caml_find_code_fragment_by_pc(char *pc);
/* Find the code fragment whose fragment number is [fragnum].
Returns NULL if none exists. */
extern struct code_fragment * caml_find_code_fragment_by_num(int fragnum);
/* Find the code fragment whose digest is equal to the given digest.
Returns NULL if none exists. */
extern struct code_fragment *
caml_find_code_fragment_by_digest(unsigned char digest[16]);
/* Return the digest of the given code fragment.
If the code fragment was registered in [DIGEST_LATER] mode
and if the digest was not computed yet, it is obtained by hashing
the bytes between [code_start] and [code_end].
Returns NULL if the code fragment was registered with [DIGEST_IGNORE]. */
extern unsigned char * caml_digest_of_code_fragment(struct code_fragment *);
#endif
#endif

View File

@ -430,18 +430,6 @@ extern int caml_snwprintf(wchar_t * buf,
# endif
#endif
/* A table of all code fragments (main program and dynlinked modules) */
struct code_fragment {
char *code_start;
char *code_end;
unsigned char digest[16];
char digest_computed;
};
extern struct ext_table caml_code_fragments_table;
int caml_find_code_fragment(char *pc, int *index, struct code_fragment **cf);
#endif /* CAML_INTERNALS */
/* The [backtrace_slot] type represents values stored in

115
runtime/codefrag.c Normal file
View File

@ -0,0 +1,115 @@
/**************************************************************************/
/* */
/* OCaml */
/* */
/* Xavier Leroy, projet Cambium, INRIA Paris */
/* */
/* Copyright 2020 Institut National de Recherche en Informatique et */
/* en Automatique. */
/* */
/* All rights reserved. This file is distributed under the terms of */
/* the GNU Lesser General Public License version 2.1, with the */
/* special exception on linking described in the file LICENSE. */
/* */
/**************************************************************************/
#define CAML_INTERNALS
/* A table of all code fragments (main program and dynlinked modules) */
#include <string.h>
#include <stddef.h>
#include "caml/codefrag.h"
#include "caml/misc.h"
#include "caml/md5.h"
#include "caml/memory.h"
#include "caml/skiplist.h"
static struct skiplist code_fragments_by_pc = SKIPLIST_STATIC_INITIALIZER;
static struct skiplist code_fragments_by_num = SKIPLIST_STATIC_INITIALIZER;
static int code_fragments_counter = 0;
int caml_register_code_fragment(char * start, char * end,
enum digest_status digest_kind,
unsigned char * opt_digest)
{
struct code_fragment * cf = caml_stat_alloc(sizeof(struct code_fragment));
cf->code_start = start;
cf->code_end = end;
switch (digest_kind) {
case DIGEST_LATER:
break;
case DIGEST_NOW:
caml_md5_block(cf->digest, cf->code_start, cf->code_end - cf->code_start);
digest_kind = DIGEST_PROVIDED;
break;
case DIGEST_PROVIDED:
memcpy(cf->digest, opt_digest, 16);
break;
case DIGEST_IGNORE:
break;
}
cf->digest_status = digest_kind;
cf->fragnum = code_fragments_counter++;
caml_skiplist_insert(&code_fragments_by_pc,
(uintnat) start, (uintnat) cf);
caml_skiplist_insert(&code_fragments_by_num,
(uintnat) cf->fragnum, (uintnat) cf);
return cf->fragnum;
}
void caml_remove_code_fragment(struct code_fragment * cf)
{
caml_skiplist_remove(&code_fragments_by_pc, (uintnat) cf->code_start);
caml_skiplist_remove(&code_fragments_by_num, cf->fragnum);
caml_stat_free(cf);
}
struct code_fragment * caml_find_code_fragment_by_pc(char *pc)
{
struct code_fragment * cf;
uintnat key, data;
if (caml_skiplist_find_below(&code_fragments_by_pc,
(uintnat) pc, &key, &data)) {
cf = (struct code_fragment *) data;
CAMLassert(cf->code_start <= pc);
if (pc < cf->code_end) return cf;
}
return NULL;
}
struct code_fragment * caml_find_code_fragment_by_num(int fragnum)
{
uintnat data;
if (caml_skiplist_find(&code_fragments_by_num, fragnum, &data)) {
return (struct code_fragment *) data;
} else {
return NULL;
}
}
unsigned char * caml_digest_of_code_fragment(struct code_fragment * cf)
{
if (cf->digest_status == DIGEST_IGNORE)
return NULL;
if (cf->digest_status == DIGEST_LATER) {
caml_md5_block(cf->digest, cf->code_start, cf->code_end - cf->code_start);
cf->digest_status = DIGEST_PROVIDED;
}
return cf->digest;
}
struct code_fragment *
caml_find_code_fragment_by_digest(unsigned char digest[16])
{
FOREACH_SKIPLIST_ELEMENT(e, &code_fragments_by_pc, {
struct code_fragment * cf = (struct code_fragment *) e->data;
unsigned char * d = caml_digest_of_code_fragment(cf);
if (d != NULL && memcmp(digest, d, 16) == 0) return cf;
})
return NULL;
}

View File

@ -24,6 +24,7 @@
#include <string.h>
#include "caml/alloc.h"
#include "caml/codefrag.h"
#include "caml/config.h"
#include "caml/debugger.h"
#include "caml/misc.h"
@ -308,13 +309,8 @@ static void restore_instruction(code_t pc)
static code_t pc_from_pos(int frag, intnat pos)
{
struct code_fragment *cf;
CAMLassert (frag >= 0);
CAMLassert (frag < caml_code_fragments_table.size);
CAMLassert (pos >= 0);
CAMLassert (pos < caml_code_size);
cf = (struct code_fragment *) caml_code_fragments_table.contents[frag];
struct code_fragment *cf = caml_find_code_fragment_by_num(frag);
CAMLassert(cf != NULL);
return (code_t) (cf->code_start + pos);
}
@ -337,7 +333,8 @@ void caml_debugger_code_unloaded(int index)
caml_putch(dbg_out, REP_CODE_UNLOADED);
caml_putword(dbg_out, index);
cf = (struct code_fragment *) caml_code_fragments_table.contents[index];
cf = caml_find_code_fragment_by_num(index);
CAMLassert(cf != NULL);
FOREACH_SKIPLIST_ELEMENT(elt, &event_points_table, {
pc = (char *) elt->key;
@ -357,9 +354,8 @@ void caml_debugger(enum event_kind event, value param)
value *frame, *newframe;
intnat i, pos;
value val;
int frag, found = 0;
int frag;
struct code_fragment *cf;
(void) found; /* Silence unused variable warning. */
if (dbg_socket == -1) return; /* Not connected to a debugger. */
@ -407,9 +403,9 @@ void caml_debugger(enum event_kind event, value param)
caml_putword(dbg_out, caml_event_count);
if (event == EVENT_COUNT || event == BREAKPOINT) {
caml_putword(dbg_out, Caml_state->stack_high - frame);
found = caml_find_code_fragment((char*) Pc(frame), &frag, &cf);
CAMLassert(found);
caml_putword(dbg_out, frag);
cf = caml_find_code_fragment_by_pc((char*) Pc(frame));
CAMLassert(cf != NULL);
caml_putword(dbg_out, cf->fragnum);
caml_putword(dbg_out, (char*) Pc(frame) - cf->code_start);
} else {
/* No PC and no stack frame associated with other events */
@ -472,8 +468,8 @@ void caml_debugger(enum event_kind event, value param)
case REQ_GET_FRAME:
caml_putword(dbg_out, Caml_state->stack_high - frame);
if (frame < Caml_state->stack_high &&
caml_find_code_fragment((char*) Pc(frame), &frag, &cf)) {
caml_putword(dbg_out, frag);
(cf = caml_find_code_fragment_by_pc((char*) Pc(frame))) != NULL) {
caml_putword(dbg_out, cf->fragnum);
caml_putword(dbg_out, (char*) Pc(frame) - cf->code_start);
} else {
caml_putword(dbg_out, 0);
@ -489,12 +485,12 @@ void caml_debugger(enum event_kind event, value param)
i = caml_getword(dbg_in);
newframe = frame + Extra_args(frame) + i + 3;
if (newframe >= Caml_state->stack_high ||
!caml_find_code_fragment((char*) Pc(newframe), &frag, &cf)) {
(cf = caml_find_code_fragment_by_pc((char *) Pc(newframe))) == NULL) {
caml_putword(dbg_out, -1);
} else {
frame = newframe;
caml_putword(dbg_out, Caml_state->stack_high - frame);
caml_putword(dbg_out, frag);
caml_putword(dbg_out, cf->fragnum);
caml_putword(dbg_out, (char*) Pc(frame) - cf->code_start);
}
caml_flush(dbg_out);
@ -547,9 +543,9 @@ void caml_debugger(enum event_kind event, value param)
break;
case REQ_GET_CLOSURE_CODE:
val = getval(dbg_in);
found = caml_find_code_fragment((char*) Code_val(val), &frag, &cf);
CAMLassert(found);
caml_putword(dbg_out, frag);
cf = caml_find_code_fragment_by_pc((char*) Code_val(val));
CAMLassert(cf != NULL);
caml_putword(dbg_out, cf->fragnum);
caml_putword(dbg_out, (char*) Code_val(val) - cf->code_start);
caml_flush(dbg_out);
break;

View File

@ -20,6 +20,7 @@
#include "caml/memory.h"
#include "caml/stack.h"
#include "caml/callback.h"
#include "caml/codefrag.h"
#include "caml/alloc.h"
#include "caml/intext.h"
#include "caml/osdeps.h"
@ -100,7 +101,6 @@ CAMLprim value caml_natdynlink_run(value handle_v, value symbol) {
CAMLlocal1 (result);
void *sym,*sym2;
void* handle = Handle_val(handle_v);
struct code_fragment * cf;
#define optsym(n) getsym(handle,unit,n)
const char *unit;
@ -128,11 +128,8 @@ CAMLprim value caml_natdynlink_run(value handle_v, value symbol) {
sym2 = optsym("__code_end");
if (NULL != sym && NULL != sym2) {
caml_page_table_add(In_code_area, sym, sym2);
cf = caml_stat_alloc(sizeof(struct code_fragment));
cf->code_start = (char *) sym;
cf->code_end = (char *) sym2;
cf->digest_computed = 0;
caml_ext_table_add(&caml_code_fragments_table, cf);
caml_register_code_fragment((char *) sym, (char *) sym2,
DIGEST_LATER, NULL);
}
if( caml_natdynlink_hook != NULL ) caml_natdynlink_hook(handle,unit);

View File

@ -21,13 +21,13 @@
#include <string.h>
#include "caml/alloc.h"
#include "caml/codefrag.h"
#include "caml/config.h"
#include "caml/custom.h"
#include "caml/fail.h"
#include "caml/gc.h"
#include "caml/intext.h"
#include "caml/io.h"
#include "caml/md5.h"
#include "caml/memory.h"
#include "caml/misc.h"
#include "caml/mlvalues.h"
@ -713,15 +713,15 @@ static void extern_rec(value v)
}
}
}
else if (caml_find_code_fragment((char*) v, NULL, &cf)) {
else if ((cf = caml_find_code_fragment_by_pc((char*) v)) != NULL) {
const char * digest;
if ((extern_flags & CLOSURES) == 0)
extern_invalid_argument("output_value: functional value");
if (! cf->digest_computed) {
caml_md5_block(cf->digest, cf->code_start, cf->code_end - cf->code_start);
cf->digest_computed = 1;
}
digest = (const char *) caml_digest_of_code_fragment(cf);
if (digest == NULL)
extern_invalid_argument("output_value: private function");
writecode32(CODE_CODEPOINTER, (char *) v - cf->code_start);
writeblock((const char *)cf->digest, 16);
writeblock(digest, 16);
} else {
extern_invalid_argument("output_value: abstract value (outside heap)");
}

View File

@ -25,11 +25,11 @@
#include <io.h>
#endif
#include "caml/codefrag.h"
#include "caml/debugger.h"
#include "caml/fix_code.h"
#include "caml/instruct.h"
#include "caml/intext.h"
#include "caml/md5.h"
#include "caml/memory.h"
#include "caml/misc.h"
#include "caml/mlvalues.h"
@ -37,20 +37,14 @@
code_t caml_start_code;
asize_t caml_code_size;
struct ext_table caml_code_fragments_table;
/* Read the main bytecode block from a file */
void caml_init_code_fragments(void) {
struct code_fragment * cf;
/* Register the code in the table of code fragments */
cf = caml_stat_alloc(sizeof(struct code_fragment));
cf->code_start = (char *) caml_start_code;
cf->code_end = (char *) caml_start_code + caml_code_size;
caml_md5_block(cf->digest, caml_start_code, caml_code_size);
cf->digest_computed = 1;
caml_ext_table_init(&caml_code_fragments_table, 8);
caml_ext_table_add(&caml_code_fragments_table, cf);
/* Register the main bytecode block in the table of code fragments */
caml_register_code_fragment((char *) caml_start_code,
(char *) caml_start_code + caml_code_size,
DIGEST_NOW, NULL);
}
void caml_load_code(int fd, asize_t len)

View File

@ -23,13 +23,13 @@
#include <stdio.h>
#include "caml/alloc.h"
#include "caml/callback.h"
#include "caml/codefrag.h"
#include "caml/config.h"
#include "caml/custom.h"
#include "caml/fail.h"
#include "caml/gc.h"
#include "caml/intext.h"
#include "caml/io.h"
#include "caml/md5.h"
#include "caml/memory.h"
#include "caml/memprof.h"
#include "caml/mlvalues.h"
@ -953,21 +953,11 @@ CAMLprim value caml_marshal_data_size(value buff, value ofs)
static char * intern_resolve_code_pointer(unsigned char digest[16],
asize_t offset)
{
int i;
for (i = caml_code_fragments_table.size - 1; i >= 0; i--) {
struct code_fragment * cf = caml_code_fragments_table.contents[i];
if (! cf->digest_computed) {
caml_md5_block(cf->digest, cf->code_start, cf->code_end - cf->code_start);
cf->digest_computed = 1;
}
if (memcmp(digest, cf->digest, 16) == 0) {
if (cf->code_start + offset < cf->code_end)
return cf->code_start + offset;
else
return NULL;
}
}
return NULL;
struct code_fragment * cf = caml_find_code_fragment_by_digest(digest);
if (cf != NULL && cf->code_start + offset < cf->code_end)
return cf->code_start + offset;
else
return NULL;
}
static void intern_bad_code_pointer(unsigned char digest[16])

View File

@ -20,6 +20,7 @@
#include <string.h>
#include "caml/alloc.h"
#include "caml/backtrace_prim.h"
#include "caml/codefrag.h"
#include "caml/config.h"
#include "caml/debugger.h"
#include "caml/fail.h"
@ -93,25 +94,26 @@ CAMLprim value caml_reify_bytecode(value ls_prog,
{
CAMLparam3(ls_prog, debuginfo, digest_opt);
CAMLlocal3(clos, bytecode, retval);
struct code_fragment * cf = caml_stat_alloc(sizeof(struct code_fragment));
code_t prog;
asize_t len;
enum digest_status digest_kind;
unsigned char * digest;
int fragnum;
prog = (code_t)buffer_of_bytes_array(ls_prog, &len);
caml_add_debug_info(prog, Val_long(len), debuginfo);
cf->code_start = (char *) prog;
cf->code_end = (char *) prog + len;
/* match (digest_opt : string option) with */
if (Is_block(digest_opt)) {
/* | Some digest -> */
memcpy(cf->digest, String_val(Field(digest_opt, 0)), 16);
cf->digest_computed = 1;
digest_kind = DIGEST_PROVIDED;
digest = (unsigned char *) String_val(Field(digest_opt, 0));
} else {
/* | None -> */
cf->digest_computed = 0;
digest_kind = DIGEST_LATER;
digest = NULL;
}
caml_ext_table_add(&caml_code_fragments_table, cf);
fragnum = caml_register_code_fragment((char *) prog, (char *) prog + len,
digest_kind, digest);
#ifdef ARCH_BIG_ENDIAN
caml_fixup_endianness((code_t) prog, len);
#endif
@ -121,7 +123,7 @@ CAMLprim value caml_reify_bytecode(value ls_prog,
caml_prepare_bytecode((code_t) prog, len);
/* Notify debugger after fragment gets added and reified. */
caml_debugger(CODE_LOADED, Val_long(caml_code_fragments_table.size - 1));
caml_debugger(CODE_LOADED, Val_long(fragnum));
clos = caml_alloc_small (1, Closure_tag);
Code_val(clos) = (code_t) prog;
@ -142,21 +144,19 @@ CAMLprim value caml_static_release_bytecode(value bc)
{
code_t prog;
asize_t len;
int found, index;
struct code_fragment *cf;
prog = Bytecode_val(bc)->prog;
len = Bytecode_val(bc)->len;
caml_remove_debug_info(prog);
found = caml_find_code_fragment((char*) prog, &index, &cf);
/* Not matched with a caml_reify_bytecode call; impossible. */
CAMLassert(found); (void) found; /* Silence unused variable warning. */
cf = caml_find_code_fragment_by_pc((char *) prog);
CAMLassert(cf != NULL);
/* Notify debugger before the fragment gets destroyed. */
caml_debugger(CODE_UNLOADED, Val_long(index));
caml_debugger(CODE_UNLOADED, Val_long(cf->fragnum));
caml_ext_table_remove(&caml_code_fragments_table, cf);
caml_remove_code_fragment(cf);
#ifndef NATIVE_CODE
caml_release_bytecode(prog, len);

View File

@ -205,19 +205,3 @@ int caml_runtime_warnings_active(void)
}
return 1;
}
int caml_find_code_fragment(char *pc, int *index, struct code_fragment **cf)
{
struct code_fragment *cfi;
int i;
for (i = 0; i < caml_code_fragments_table.size; i++) {
cfi = (struct code_fragment *) caml_code_fragments_table.contents[i];
if ((char*) pc >= cfi->code_start && (char*) pc < cfi->code_end) {
if (index != NULL) *index = i;
if (cf != NULL) *cf = cfi;
return 1;
}
}
return 0;
}

View File

@ -22,6 +22,7 @@
#include "caml/callback.h"
#include "caml/backtrace.h"
#include "caml/custom.h"
#include "caml/codefrag.h"
#include "caml/debugger.h"
#include "caml/domain.h"
#include "caml/eventlog.h"
@ -47,7 +48,6 @@
extern int caml_parser_trace;
char * caml_code_area_start, * caml_code_area_end;
struct ext_table caml_code_fragments_table;
/* Initialize the atom table and the static data and code area limits. */
@ -57,7 +57,6 @@ static void init_static(void)
{
extern struct segment caml_data_segments[], caml_code_segments[];
int i;
struct code_fragment * cf;
caml_init_atom_table ();
@ -79,12 +78,9 @@ static void init_static(void)
caml_code_area_end = caml_code_segments[i].end;
}
/* Register the code in the table of code fragments */
cf = caml_stat_alloc(sizeof(struct code_fragment));
cf->code_start = caml_code_area_start;
cf->code_end = caml_code_area_end;
cf->digest_computed = 0;
caml_ext_table_init(&caml_code_fragments_table, 8);
caml_ext_table_add(&caml_code_fragments_table, cf);
caml_register_code_fragment(caml_code_area_start,
caml_code_area_end,
DIGEST_LATER, NULL);
}
/* These are termination hooks used by the systhreads library */