diff --git a/Changes b/Changes index a314ad553..753da53fd 100644 --- a/Changes +++ b/Changes @@ -40,6 +40,11 @@ Working version - #9634: Allow initial and repeated commas in `OCAMLRUNPARAM`. (Nicolás Ojeda Bär, review by Gabriel Scherer) +- #9654: More efficient management of code fragments. + (Xavier Leroy, review by Jacques-Henri Jourdan, Damien Doligez, and + Stephen Dolan) + + ### Code generation and optimizations: - #9441: Add RISC-V RV64G native-code backend. diff --git a/runtime/Makefile b/runtime/Makefile index 1d4563119..aa7853430 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -26,7 +26,7 @@ BYTECODE_C_SOURCES := $(addsuffix .c, \ floats str array io extern intern hash sys meta parsing gc_ctrl md5 obj \ lexing callback debugger weak compact finalise custom dynlink \ spacetime_byt afl $(UNIX_OR_WIN32) bigarray main memprof domain \ - skiplist) + skiplist codefrag) NATIVE_C_SOURCES := $(addsuffix .c, \ startup_aux startup_nat main fail_nat roots_nat signals \ @@ -35,7 +35,7 @@ NATIVE_C_SOURCES := $(addsuffix .c, \ lexing $(UNIX_OR_WIN32) printexc callback weak compact finalise custom \ globroots backtrace_nat backtrace dynlink_nat debugger meta \ dynlink clambda_checks spacetime_nat spacetime_snapshot afl bigarray \ - memprof domain skiplist) + memprof domain skiplist codefrag) GENERATED_HEADERS := caml/opnames.h caml/version.h caml/jumptbl.h CONFIG_HEADERS := caml/m.h caml/s.h diff --git a/runtime/caml/codefrag.h b/runtime/caml/codefrag.h new file mode 100644 index 000000000..ff623c01f --- /dev/null +++ b/runtime/caml/codefrag.h @@ -0,0 +1,80 @@ +/**************************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Xavier Leroy, projet Cambium, INRIA Paris */ +/* */ +/* Copyright 2020 Institut National de Recherche en Informatique et */ +/* en Automatique. */ +/* */ +/* All rights reserved. This file is distributed under the terms of */ +/* the GNU Lesser General Public License version 2.1, with the */ +/* special exception on linking described in the file LICENSE. */ +/* */ +/**************************************************************************/ + +/* A table of all code fragments (main program and dynlinked modules) */ + +#ifndef CAML_CODEFRAG_H +#define CAML_CODEFRAG_H + +#ifdef CAML_INTERNALS + +enum digest_status { + DIGEST_LATER, /* computed on demand */ + DIGEST_NOW, /* computed by caml_register_code_fragment */ + DIGEST_PROVIDED, /* passed by caller of caml_register_code_fragment */ + DIGEST_IGNORE /* this code fragment is private and cannot be + identified by its digest */ +}; + +struct code_fragment { + char *code_start; + char *code_end; + int fragnum; + unsigned char digest[16]; + enum digest_status digest_status; +}; + +/* Register a code fragment for addresses [start] (included) + to [end] (excluded). This range of addresses is assumed + disjoint from all currently-registered code fragments. + + [digest_kind] explains what digest is to be associated to the code + fragment. If [digest_kind == DIGEST_PROVIDED], the [opt_digest] + parameter points to the 16-byte digest of the code. + For all other values of [digest_kind], [opt_digest] is ignored + and should be [NULL]. + + The returned integer is the fragment number (fragnum) associated + with the new code fragment. */ +extern int caml_register_code_fragment(char * start, char * end, + enum digest_status digest_kind, + unsigned char * opt_digest); + +/* Un-register a code fragment. */ +extern void caml_remove_code_fragment(struct code_fragment * cf); + +/* Find the code fragment whose range of addresses contains [pc]. + Returns NULL if none exists. */ +extern struct code_fragment * caml_find_code_fragment_by_pc(char *pc); + +/* Find the code fragment whose fragment number is [fragnum]. + Returns NULL if none exists. */ +extern struct code_fragment * caml_find_code_fragment_by_num(int fragnum); + +/* Find the code fragment whose digest is equal to the given digest. + Returns NULL if none exists. */ +extern struct code_fragment * + caml_find_code_fragment_by_digest(unsigned char digest[16]); + +/* Return the digest of the given code fragment. + If the code fragment was registered in [DIGEST_LATER] mode + and if the digest was not computed yet, it is obtained by hashing + the bytes between [code_start] and [code_end]. + Returns NULL if the code fragment was registered with [DIGEST_IGNORE]. */ +extern unsigned char * caml_digest_of_code_fragment(struct code_fragment *); + +#endif + +#endif diff --git a/runtime/caml/misc.h b/runtime/caml/misc.h index 6c3810ded..81798a014 100644 --- a/runtime/caml/misc.h +++ b/runtime/caml/misc.h @@ -430,18 +430,6 @@ extern int caml_snwprintf(wchar_t * buf, # endif #endif -/* A table of all code fragments (main program and dynlinked modules) */ -struct code_fragment { - char *code_start; - char *code_end; - unsigned char digest[16]; - char digest_computed; -}; - -extern struct ext_table caml_code_fragments_table; - -int caml_find_code_fragment(char *pc, int *index, struct code_fragment **cf); - #endif /* CAML_INTERNALS */ /* The [backtrace_slot] type represents values stored in diff --git a/runtime/codefrag.c b/runtime/codefrag.c new file mode 100644 index 000000000..2ab957167 --- /dev/null +++ b/runtime/codefrag.c @@ -0,0 +1,115 @@ +/**************************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Xavier Leroy, projet Cambium, INRIA Paris */ +/* */ +/* Copyright 2020 Institut National de Recherche en Informatique et */ +/* en Automatique. */ +/* */ +/* All rights reserved. This file is distributed under the terms of */ +/* the GNU Lesser General Public License version 2.1, with the */ +/* special exception on linking described in the file LICENSE. */ +/* */ +/**************************************************************************/ + +#define CAML_INTERNALS + +/* A table of all code fragments (main program and dynlinked modules) */ + +#include +#include +#include "caml/codefrag.h" +#include "caml/misc.h" +#include "caml/md5.h" +#include "caml/memory.h" +#include "caml/skiplist.h" + +static struct skiplist code_fragments_by_pc = SKIPLIST_STATIC_INITIALIZER; + +static struct skiplist code_fragments_by_num = SKIPLIST_STATIC_INITIALIZER; + +static int code_fragments_counter = 0; + +int caml_register_code_fragment(char * start, char * end, + enum digest_status digest_kind, + unsigned char * opt_digest) +{ + struct code_fragment * cf = caml_stat_alloc(sizeof(struct code_fragment)); + + cf->code_start = start; + cf->code_end = end; + switch (digest_kind) { + case DIGEST_LATER: + break; + case DIGEST_NOW: + caml_md5_block(cf->digest, cf->code_start, cf->code_end - cf->code_start); + digest_kind = DIGEST_PROVIDED; + break; + case DIGEST_PROVIDED: + memcpy(cf->digest, opt_digest, 16); + break; + case DIGEST_IGNORE: + break; + } + cf->digest_status = digest_kind; + cf->fragnum = code_fragments_counter++; + caml_skiplist_insert(&code_fragments_by_pc, + (uintnat) start, (uintnat) cf); + caml_skiplist_insert(&code_fragments_by_num, + (uintnat) cf->fragnum, (uintnat) cf); + return cf->fragnum; +} + +void caml_remove_code_fragment(struct code_fragment * cf) +{ + caml_skiplist_remove(&code_fragments_by_pc, (uintnat) cf->code_start); + caml_skiplist_remove(&code_fragments_by_num, cf->fragnum); + caml_stat_free(cf); +} + +struct code_fragment * caml_find_code_fragment_by_pc(char *pc) +{ + struct code_fragment * cf; + uintnat key, data; + + if (caml_skiplist_find_below(&code_fragments_by_pc, + (uintnat) pc, &key, &data)) { + cf = (struct code_fragment *) data; + CAMLassert(cf->code_start <= pc); + if (pc < cf->code_end) return cf; + } + return NULL; +} + +struct code_fragment * caml_find_code_fragment_by_num(int fragnum) +{ + uintnat data; + if (caml_skiplist_find(&code_fragments_by_num, fragnum, &data)) { + return (struct code_fragment *) data; + } else { + return NULL; + } +} + +unsigned char * caml_digest_of_code_fragment(struct code_fragment * cf) +{ + if (cf->digest_status == DIGEST_IGNORE) + return NULL; + if (cf->digest_status == DIGEST_LATER) { + caml_md5_block(cf->digest, cf->code_start, cf->code_end - cf->code_start); + cf->digest_status = DIGEST_PROVIDED; + } + return cf->digest; +} + +struct code_fragment * + caml_find_code_fragment_by_digest(unsigned char digest[16]) +{ + FOREACH_SKIPLIST_ELEMENT(e, &code_fragments_by_pc, { + struct code_fragment * cf = (struct code_fragment *) e->data; + unsigned char * d = caml_digest_of_code_fragment(cf); + if (d != NULL && memcmp(digest, d, 16) == 0) return cf; + }) + return NULL; +} diff --git a/runtime/debugger.c b/runtime/debugger.c index 37e425518..050389e21 100644 --- a/runtime/debugger.c +++ b/runtime/debugger.c @@ -24,6 +24,7 @@ #include #include "caml/alloc.h" +#include "caml/codefrag.h" #include "caml/config.h" #include "caml/debugger.h" #include "caml/misc.h" @@ -308,13 +309,8 @@ static void restore_instruction(code_t pc) static code_t pc_from_pos(int frag, intnat pos) { - struct code_fragment *cf; - CAMLassert (frag >= 0); - CAMLassert (frag < caml_code_fragments_table.size); - CAMLassert (pos >= 0); - CAMLassert (pos < caml_code_size); - - cf = (struct code_fragment *) caml_code_fragments_table.contents[frag]; + struct code_fragment *cf = caml_find_code_fragment_by_num(frag); + CAMLassert(cf != NULL); return (code_t) (cf->code_start + pos); } @@ -337,7 +333,8 @@ void caml_debugger_code_unloaded(int index) caml_putch(dbg_out, REP_CODE_UNLOADED); caml_putword(dbg_out, index); - cf = (struct code_fragment *) caml_code_fragments_table.contents[index]; + cf = caml_find_code_fragment_by_num(index); + CAMLassert(cf != NULL); FOREACH_SKIPLIST_ELEMENT(elt, &event_points_table, { pc = (char *) elt->key; @@ -357,9 +354,8 @@ void caml_debugger(enum event_kind event, value param) value *frame, *newframe; intnat i, pos; value val; - int frag, found = 0; + int frag; struct code_fragment *cf; - (void) found; /* Silence unused variable warning. */ if (dbg_socket == -1) return; /* Not connected to a debugger. */ @@ -407,9 +403,9 @@ void caml_debugger(enum event_kind event, value param) caml_putword(dbg_out, caml_event_count); if (event == EVENT_COUNT || event == BREAKPOINT) { caml_putword(dbg_out, Caml_state->stack_high - frame); - found = caml_find_code_fragment((char*) Pc(frame), &frag, &cf); - CAMLassert(found); - caml_putword(dbg_out, frag); + cf = caml_find_code_fragment_by_pc((char*) Pc(frame)); + CAMLassert(cf != NULL); + caml_putword(dbg_out, cf->fragnum); caml_putword(dbg_out, (char*) Pc(frame) - cf->code_start); } else { /* No PC and no stack frame associated with other events */ @@ -472,8 +468,8 @@ void caml_debugger(enum event_kind event, value param) case REQ_GET_FRAME: caml_putword(dbg_out, Caml_state->stack_high - frame); if (frame < Caml_state->stack_high && - caml_find_code_fragment((char*) Pc(frame), &frag, &cf)) { - caml_putword(dbg_out, frag); + (cf = caml_find_code_fragment_by_pc((char*) Pc(frame))) != NULL) { + caml_putword(dbg_out, cf->fragnum); caml_putword(dbg_out, (char*) Pc(frame) - cf->code_start); } else { caml_putword(dbg_out, 0); @@ -489,12 +485,12 @@ void caml_debugger(enum event_kind event, value param) i = caml_getword(dbg_in); newframe = frame + Extra_args(frame) + i + 3; if (newframe >= Caml_state->stack_high || - !caml_find_code_fragment((char*) Pc(newframe), &frag, &cf)) { + (cf = caml_find_code_fragment_by_pc((char *) Pc(newframe))) == NULL) { caml_putword(dbg_out, -1); } else { frame = newframe; caml_putword(dbg_out, Caml_state->stack_high - frame); - caml_putword(dbg_out, frag); + caml_putword(dbg_out, cf->fragnum); caml_putword(dbg_out, (char*) Pc(frame) - cf->code_start); } caml_flush(dbg_out); @@ -547,9 +543,9 @@ void caml_debugger(enum event_kind event, value param) break; case REQ_GET_CLOSURE_CODE: val = getval(dbg_in); - found = caml_find_code_fragment((char*) Code_val(val), &frag, &cf); - CAMLassert(found); - caml_putword(dbg_out, frag); + cf = caml_find_code_fragment_by_pc((char*) Code_val(val)); + CAMLassert(cf != NULL); + caml_putword(dbg_out, cf->fragnum); caml_putword(dbg_out, (char*) Code_val(val) - cf->code_start); caml_flush(dbg_out); break; diff --git a/runtime/dynlink_nat.c b/runtime/dynlink_nat.c index 95626109e..0bd2319b0 100644 --- a/runtime/dynlink_nat.c +++ b/runtime/dynlink_nat.c @@ -20,6 +20,7 @@ #include "caml/memory.h" #include "caml/stack.h" #include "caml/callback.h" +#include "caml/codefrag.h" #include "caml/alloc.h" #include "caml/intext.h" #include "caml/osdeps.h" @@ -100,7 +101,6 @@ CAMLprim value caml_natdynlink_run(value handle_v, value symbol) { CAMLlocal1 (result); void *sym,*sym2; void* handle = Handle_val(handle_v); - struct code_fragment * cf; #define optsym(n) getsym(handle,unit,n) const char *unit; @@ -128,11 +128,8 @@ CAMLprim value caml_natdynlink_run(value handle_v, value symbol) { sym2 = optsym("__code_end"); if (NULL != sym && NULL != sym2) { caml_page_table_add(In_code_area, sym, sym2); - cf = caml_stat_alloc(sizeof(struct code_fragment)); - cf->code_start = (char *) sym; - cf->code_end = (char *) sym2; - cf->digest_computed = 0; - caml_ext_table_add(&caml_code_fragments_table, cf); + caml_register_code_fragment((char *) sym, (char *) sym2, + DIGEST_LATER, NULL); } if( caml_natdynlink_hook != NULL ) caml_natdynlink_hook(handle,unit); diff --git a/runtime/extern.c b/runtime/extern.c index 7613e280f..440753a26 100644 --- a/runtime/extern.c +++ b/runtime/extern.c @@ -21,13 +21,13 @@ #include #include "caml/alloc.h" +#include "caml/codefrag.h" #include "caml/config.h" #include "caml/custom.h" #include "caml/fail.h" #include "caml/gc.h" #include "caml/intext.h" #include "caml/io.h" -#include "caml/md5.h" #include "caml/memory.h" #include "caml/misc.h" #include "caml/mlvalues.h" @@ -713,15 +713,15 @@ static void extern_rec(value v) } } } - else if (caml_find_code_fragment((char*) v, NULL, &cf)) { + else if ((cf = caml_find_code_fragment_by_pc((char*) v)) != NULL) { + const char * digest; if ((extern_flags & CLOSURES) == 0) extern_invalid_argument("output_value: functional value"); - if (! cf->digest_computed) { - caml_md5_block(cf->digest, cf->code_start, cf->code_end - cf->code_start); - cf->digest_computed = 1; - } + digest = (const char *) caml_digest_of_code_fragment(cf); + if (digest == NULL) + extern_invalid_argument("output_value: private function"); writecode32(CODE_CODEPOINTER, (char *) v - cf->code_start); - writeblock((const char *)cf->digest, 16); + writeblock(digest, 16); } else { extern_invalid_argument("output_value: abstract value (outside heap)"); } diff --git a/runtime/fix_code.c b/runtime/fix_code.c index 3cfcac497..aa059be5d 100644 --- a/runtime/fix_code.c +++ b/runtime/fix_code.c @@ -25,11 +25,11 @@ #include #endif +#include "caml/codefrag.h" #include "caml/debugger.h" #include "caml/fix_code.h" #include "caml/instruct.h" #include "caml/intext.h" -#include "caml/md5.h" #include "caml/memory.h" #include "caml/misc.h" #include "caml/mlvalues.h" @@ -37,20 +37,14 @@ code_t caml_start_code; asize_t caml_code_size; -struct ext_table caml_code_fragments_table; /* Read the main bytecode block from a file */ void caml_init_code_fragments(void) { - struct code_fragment * cf; - /* Register the code in the table of code fragments */ - cf = caml_stat_alloc(sizeof(struct code_fragment)); - cf->code_start = (char *) caml_start_code; - cf->code_end = (char *) caml_start_code + caml_code_size; - caml_md5_block(cf->digest, caml_start_code, caml_code_size); - cf->digest_computed = 1; - caml_ext_table_init(&caml_code_fragments_table, 8); - caml_ext_table_add(&caml_code_fragments_table, cf); + /* Register the main bytecode block in the table of code fragments */ + caml_register_code_fragment((char *) caml_start_code, + (char *) caml_start_code + caml_code_size, + DIGEST_NOW, NULL); } void caml_load_code(int fd, asize_t len) diff --git a/runtime/intern.c b/runtime/intern.c index 5d7d3817c..5f189bacf 100644 --- a/runtime/intern.c +++ b/runtime/intern.c @@ -23,13 +23,13 @@ #include #include "caml/alloc.h" #include "caml/callback.h" +#include "caml/codefrag.h" #include "caml/config.h" #include "caml/custom.h" #include "caml/fail.h" #include "caml/gc.h" #include "caml/intext.h" #include "caml/io.h" -#include "caml/md5.h" #include "caml/memory.h" #include "caml/memprof.h" #include "caml/mlvalues.h" @@ -953,21 +953,11 @@ CAMLprim value caml_marshal_data_size(value buff, value ofs) static char * intern_resolve_code_pointer(unsigned char digest[16], asize_t offset) { - int i; - for (i = caml_code_fragments_table.size - 1; i >= 0; i--) { - struct code_fragment * cf = caml_code_fragments_table.contents[i]; - if (! cf->digest_computed) { - caml_md5_block(cf->digest, cf->code_start, cf->code_end - cf->code_start); - cf->digest_computed = 1; - } - if (memcmp(digest, cf->digest, 16) == 0) { - if (cf->code_start + offset < cf->code_end) - return cf->code_start + offset; - else - return NULL; - } - } - return NULL; + struct code_fragment * cf = caml_find_code_fragment_by_digest(digest); + if (cf != NULL && cf->code_start + offset < cf->code_end) + return cf->code_start + offset; + else + return NULL; } static void intern_bad_code_pointer(unsigned char digest[16]) diff --git a/runtime/meta.c b/runtime/meta.c index 282833287..3cf1222bd 100644 --- a/runtime/meta.c +++ b/runtime/meta.c @@ -20,6 +20,7 @@ #include #include "caml/alloc.h" #include "caml/backtrace_prim.h" +#include "caml/codefrag.h" #include "caml/config.h" #include "caml/debugger.h" #include "caml/fail.h" @@ -93,25 +94,26 @@ CAMLprim value caml_reify_bytecode(value ls_prog, { CAMLparam3(ls_prog, debuginfo, digest_opt); CAMLlocal3(clos, bytecode, retval); - struct code_fragment * cf = caml_stat_alloc(sizeof(struct code_fragment)); code_t prog; asize_t len; + enum digest_status digest_kind; + unsigned char * digest; + int fragnum; prog = (code_t)buffer_of_bytes_array(ls_prog, &len); caml_add_debug_info(prog, Val_long(len), debuginfo); - cf->code_start = (char *) prog; - cf->code_end = (char *) prog + len; /* match (digest_opt : string option) with */ if (Is_block(digest_opt)) { /* | Some digest -> */ - memcpy(cf->digest, String_val(Field(digest_opt, 0)), 16); - cf->digest_computed = 1; + digest_kind = DIGEST_PROVIDED; + digest = (unsigned char *) String_val(Field(digest_opt, 0)); } else { /* | None -> */ - cf->digest_computed = 0; + digest_kind = DIGEST_LATER; + digest = NULL; } - caml_ext_table_add(&caml_code_fragments_table, cf); - + fragnum = caml_register_code_fragment((char *) prog, (char *) prog + len, + digest_kind, digest); #ifdef ARCH_BIG_ENDIAN caml_fixup_endianness((code_t) prog, len); #endif @@ -121,7 +123,7 @@ CAMLprim value caml_reify_bytecode(value ls_prog, caml_prepare_bytecode((code_t) prog, len); /* Notify debugger after fragment gets added and reified. */ - caml_debugger(CODE_LOADED, Val_long(caml_code_fragments_table.size - 1)); + caml_debugger(CODE_LOADED, Val_long(fragnum)); clos = caml_alloc_small (1, Closure_tag); Code_val(clos) = (code_t) prog; @@ -142,21 +144,19 @@ CAMLprim value caml_static_release_bytecode(value bc) { code_t prog; asize_t len; - int found, index; struct code_fragment *cf; prog = Bytecode_val(bc)->prog; len = Bytecode_val(bc)->len; caml_remove_debug_info(prog); - found = caml_find_code_fragment((char*) prog, &index, &cf); - /* Not matched with a caml_reify_bytecode call; impossible. */ - CAMLassert(found); (void) found; /* Silence unused variable warning. */ + cf = caml_find_code_fragment_by_pc((char *) prog); + CAMLassert(cf != NULL); /* Notify debugger before the fragment gets destroyed. */ - caml_debugger(CODE_UNLOADED, Val_long(index)); + caml_debugger(CODE_UNLOADED, Val_long(cf->fragnum)); - caml_ext_table_remove(&caml_code_fragments_table, cf); + caml_remove_code_fragment(cf); #ifndef NATIVE_CODE caml_release_bytecode(prog, len); diff --git a/runtime/misc.c b/runtime/misc.c index 8aa0d0903..397bd7cff 100644 --- a/runtime/misc.c +++ b/runtime/misc.c @@ -205,19 +205,3 @@ int caml_runtime_warnings_active(void) } return 1; } - -int caml_find_code_fragment(char *pc, int *index, struct code_fragment **cf) -{ - struct code_fragment *cfi; - int i; - - for (i = 0; i < caml_code_fragments_table.size; i++) { - cfi = (struct code_fragment *) caml_code_fragments_table.contents[i]; - if ((char*) pc >= cfi->code_start && (char*) pc < cfi->code_end) { - if (index != NULL) *index = i; - if (cf != NULL) *cf = cfi; - return 1; - } - } - return 0; -} diff --git a/runtime/startup_nat.c b/runtime/startup_nat.c index c9ae1a931..c0a200630 100644 --- a/runtime/startup_nat.c +++ b/runtime/startup_nat.c @@ -22,6 +22,7 @@ #include "caml/callback.h" #include "caml/backtrace.h" #include "caml/custom.h" +#include "caml/codefrag.h" #include "caml/debugger.h" #include "caml/domain.h" #include "caml/eventlog.h" @@ -47,7 +48,6 @@ extern int caml_parser_trace; char * caml_code_area_start, * caml_code_area_end; -struct ext_table caml_code_fragments_table; /* Initialize the atom table and the static data and code area limits. */ @@ -57,7 +57,6 @@ static void init_static(void) { extern struct segment caml_data_segments[], caml_code_segments[]; int i; - struct code_fragment * cf; caml_init_atom_table (); @@ -79,12 +78,9 @@ static void init_static(void) caml_code_area_end = caml_code_segments[i].end; } /* Register the code in the table of code fragments */ - cf = caml_stat_alloc(sizeof(struct code_fragment)); - cf->code_start = caml_code_area_start; - cf->code_end = caml_code_area_end; - cf->digest_computed = 0; - caml_ext_table_init(&caml_code_fragments_table, 8); - caml_ext_table_add(&caml_code_fragments_table, cf); + caml_register_code_fragment(caml_code_area_start, + caml_code_area_end, + DIGEST_LATER, NULL); } /* These are termination hooks used by the systhreads library */