From 5e3964da8692e4d5facf8c9b043c1933bbe0d502 Mon Sep 17 00:00:00 2001 From: Benoit Vaugon Date: Fri, 4 Dec 2015 00:51:44 +0100 Subject: [PATCH 1/2] Fix "weak hash of serialised closures" from mantis #0005942. --- asmrun/natdynlink.c | 22 ++++--- asmrun/startup.c | 17 +++++- byterun/caml/fix_code.h | 2 + byterun/caml/intext.h | 6 ++ byterun/extern.c | 6 +- byterun/fix_code.c | 8 ++- byterun/intern.c | 6 +- byterun/meta.c | 2 + byterun/misc.c | 13 ++++ byterun/startup.c | 14 +++-- .../tests/lib-marshal-closure/.gitignore | 2 + testsuite/tests/lib-marshal-closure/Makefile | 61 +++++++++++++++++++ testsuite/tests/lib-marshal-closure/read32.ml | 31 ++++++++++ testsuite/tests/lib-marshal-closure/read64.ml | 31 ++++++++++ .../tests/lib-marshal-closure/write32.ml | 31 ++++++++++ .../tests/lib-marshal-closure/write64.ml | 31 ++++++++++ 16 files changed, 256 insertions(+), 27 deletions(-) create mode 100644 testsuite/tests/lib-marshal-closure/.gitignore create mode 100644 testsuite/tests/lib-marshal-closure/Makefile create mode 100644 testsuite/tests/lib-marshal-closure/read32.ml create mode 100644 testsuite/tests/lib-marshal-closure/read64.ml create mode 100644 testsuite/tests/lib-marshal-closure/write32.ml create mode 100644 testsuite/tests/lib-marshal-closure/write64.ml diff --git a/asmrun/natdynlink.c b/asmrun/natdynlink.c index 3e1a47fb3..4284fee92 100644 --- a/asmrun/natdynlink.c +++ b/asmrun/natdynlink.c @@ -79,7 +79,7 @@ CAMLprim value caml_natdynlink_run(void *handle, value symbol) { CAMLparam1 (symbol); CAMLlocal1 (result); void *sym,*sym2; - struct code_fragment * cf; + struct code_fragment * cf = NULL; #define optsym(n) getsym(handle,unit,n) char *unit; @@ -93,11 +93,6 @@ CAMLprim value caml_natdynlink_run(void *handle, value symbol) { sym = optsym("__gc_roots"); if (NULL != sym) caml_register_dyn_global(sym); - sym = optsym("__data_begin"); - sym2 = optsym("__data_end"); - if (NULL != sym && NULL != sym2) - caml_page_table_add(In_static_data, sym, sym2); - sym = optsym("__code_begin"); sym2 = optsym("__code_end"); if (NULL != sym && NULL != sym2) { @@ -105,10 +100,23 @@ CAMLprim value caml_natdynlink_run(void *handle, value symbol) { cf = caml_stat_alloc(sizeof(struct code_fragment)); cf->code_start = (char *) sym; cf->code_end = (char *) sym2; + cf->data_start = NULL; + cf->data_end = NULL; cf->digest_computed = 0; - caml_ext_table_add(&caml_code_fragments_table, cf); } + sym = optsym("__data_begin"); + sym2 = optsym("__data_end"); + if (NULL != sym && NULL != sym2) { + caml_page_table_add(In_static_data, sym, sym2); + if (cf != NULL) { + cf->data_start = (char *) sym; + cf->data_end = (char *) sym2; + } + } + + if (cf != NULL) caml_ext_table_add(&caml_code_fragments_table, cf); + entrypoint = optsym("__entry"); if (NULL != entrypoint) result = caml_callback((value)(&entrypoint), 0); else result = Val_unit; diff --git a/asmrun/startup.c b/asmrun/startup.c index 5cbd42cff..dd46b20de 100644 --- a/asmrun/startup.c +++ b/asmrun/startup.c @@ -52,6 +52,22 @@ static void init_static(void) caml_init_atom_table (); + /* Register the data in the table of code fragments */ + cf = caml_stat_alloc(sizeof(struct code_fragment)); + if (caml_data_segments[0].begin != 0) { + cf->data_start = caml_data_segments[0].begin; + cf->data_end = caml_data_segments[0].end; + for (i = 1; caml_data_segments[i].begin != 0; i ++) { + if (caml_data_segments[i].begin < cf->data_start) + cf->data_start = caml_data_segments[i].begin; + if (caml_data_segments[i].end > cf->data_end) + cf->data_end = caml_data_segments[i].end; + } + } else { + cf->data_start = NULL; + cf->data_end = NULL; + } + for (i = 0; caml_data_segments[i].begin != 0; i++) { /* PR#5509: we must include the zero word at end of data segment, because pointers equal to caml_data_segments[i].end are static data. */ @@ -70,7 +86,6 @@ static void init_static(void) caml_code_area_end = caml_code_segments[i].end; } /* Register the code in the table of code fragments */ - cf = caml_stat_alloc(sizeof(struct code_fragment)); cf->code_start = caml_code_area_start; cf->code_end = caml_code_area_end; cf->digest_computed = 0; diff --git a/byterun/caml/fix_code.h b/byterun/caml/fix_code.h index c63989452..82f4507bd 100644 --- a/byterun/caml/fix_code.h +++ b/byterun/caml/fix_code.h @@ -21,6 +21,8 @@ #include "misc.h" #include "mlvalues.h" +extern char *caml_data; +extern asize_t caml_data_size; extern code_t caml_start_code; extern asize_t caml_code_size; extern unsigned char * caml_saved_code; diff --git a/byterun/caml/intext.h b/byterun/caml/intext.h index b72d802ac..1b6ddcb6e 100644 --- a/byterun/caml/intext.h +++ b/byterun/caml/intext.h @@ -177,12 +177,18 @@ CAMLextern void caml_deserialize_error(char * msg); struct code_fragment { char * code_start; char * code_end; + char * data_start; + char * data_end; unsigned char digest[16]; char digest_computed; }; struct ext_table caml_code_fragments_table; +/* Defined in misc.c */ + +CAMLextern void caml_update_code_fragment_digest(struct code_fragment *cf); + /* */ #ifdef __cplusplus diff --git a/byterun/extern.c b/byterun/extern.c index 8bdaf3090..b77cd5935 100644 --- a/byterun/extern.c +++ b/byterun/extern.c @@ -22,7 +22,6 @@ #include "caml/gc.h" #include "caml/intext.h" #include "caml/io.h" -#include "caml/md5.h" #include "caml/memory.h" #include "caml/misc.h" #include "caml/mlvalues.h" @@ -895,10 +894,7 @@ static struct code_fragment * extern_find_code(char *addr) int i; for (i = caml_code_fragments_table.size - 1; i >= 0; i--) { struct code_fragment * cf = caml_code_fragments_table.contents[i]; - if (! cf->digest_computed) { - caml_md5_block(cf->digest, cf->code_start, cf->code_end - cf->code_start); - cf->digest_computed = 1; - } + if (! cf->digest_computed) caml_update_code_fragment_digest(cf); if (cf->code_start <= addr && addr < cf->code_end) return cf; } return NULL; diff --git a/byterun/fix_code.c b/byterun/fix_code.c index 0d7c8cbaf..b22f82997 100644 --- a/byterun/fix_code.c +++ b/byterun/fix_code.c @@ -23,12 +23,13 @@ #include "caml/fix_code.h" #include "caml/instruct.h" #include "caml/intext.h" -#include "caml/md5.h" #include "caml/memory.h" #include "caml/misc.h" #include "caml/mlvalues.h" #include "caml/reverse.h" +char *caml_data; +asize_t caml_data_size; code_t caml_start_code; asize_t caml_code_size; unsigned char * caml_saved_code; @@ -41,8 +42,9 @@ void caml_init_code_fragments(void) { cf = caml_stat_alloc(sizeof(struct code_fragment)); cf->code_start = (char *) caml_start_code; cf->code_end = (char *) caml_start_code + caml_code_size; - caml_md5_block(cf->digest, caml_start_code, caml_code_size); - cf->digest_computed = 1; + cf->data_start = (char *) caml_data; + cf->data_end = (char *) caml_data + caml_data_size; + caml_update_code_fragment_digest(cf); caml_ext_table_init(&caml_code_fragments_table, 8); caml_ext_table_add(&caml_code_fragments_table, cf); } diff --git a/byterun/intern.c b/byterun/intern.c index 23eaebce5..313ca5ec5 100644 --- a/byterun/intern.c +++ b/byterun/intern.c @@ -24,7 +24,6 @@ #include "caml/gc.h" #include "caml/intext.h" #include "caml/io.h" -#include "caml/md5.h" #include "caml/memory.h" #include "caml/mlvalues.h" #include "caml/misc.h" @@ -855,10 +854,7 @@ static char * intern_resolve_code_pointer(unsigned char digest[16], int i; for (i = caml_code_fragments_table.size - 1; i >= 0; i--) { struct code_fragment * cf = caml_code_fragments_table.contents[i]; - if (! cf->digest_computed) { - caml_md5_block(cf->digest, cf->code_start, cf->code_end - cf->code_start); - cf->digest_computed = 1; - } + if (! cf->digest_computed) caml_update_code_fragment_digest(cf); if (memcmp(digest, cf->digest, 16) == 0) { if (cf->code_start + offset < cf->code_end) return cf->code_start + offset; diff --git a/byterun/meta.c b/byterun/meta.c index 0e0864a3a..4298d7b6e 100644 --- a/byterun/meta.c +++ b/byterun/meta.c @@ -52,6 +52,8 @@ CAMLprim value caml_reify_bytecode(value prog, value len) cf->code_start = (char *) prog; cf->code_end = (char *) prog + Long_val(len); + cf->data_start = NULL; + cf->data_end = NULL; cf->digest_computed = 0; caml_ext_table_add(&caml_code_fragments_table, cf); diff --git a/byterun/misc.c b/byterun/misc.c index 8191b83e7..a30cece3b 100644 --- a/byterun/misc.c +++ b/byterun/misc.c @@ -15,8 +15,10 @@ #include #include #include "caml/config.h" +#include "caml/intext.h" #include "caml/misc.h" #include "caml/memory.h" +#include "caml/md5.h" caml_timing_hook caml_major_slice_begin_hook = NULL; caml_timing_hook caml_major_slice_end_hook = NULL; @@ -144,6 +146,17 @@ void caml_ext_table_free(struct ext_table * tbl, int free_entries) caml_stat_free(tbl->contents); } +void caml_update_code_fragment_digest(struct code_fragment *cf) { + int i; + unsigned char code_digest[16]; + unsigned char data_digest[16]; + caml_md5_block(code_digest, cf->code_start, cf->code_end - cf->code_start); + caml_md5_block(data_digest, cf->data_start, cf->data_end - cf->data_start); + for (i = 0 ; i < 16 ; i ++) + cf->digest[i] = code_digest[i] ^ data_digest[i]; + cf->digest_computed = 1; +} + CAMLexport char * caml_strdup(const char * s) { size_t slen = strlen(s); diff --git a/byterun/startup.c b/byterun/startup.c index faa0e4a1f..b03d24048 100644 --- a/byterun/startup.c +++ b/byterun/startup.c @@ -270,7 +270,6 @@ CAMLexport void caml_main(char **argv) { int fd, pos; struct exec_trailer trail; - struct channel * chan; value res; char * shared_lib_path, * shared_libs, * req_prims; char * exe_name; @@ -334,6 +333,10 @@ CAMLexport void caml_main(char **argv) caml_interprete(NULL, 0); /* Initialize the debugger, if needed */ caml_debugger_init(); + /* Load the globals */ + caml_data_size = caml_seek_section(fd, &trail, "DATA"); + caml_data = read_section(fd, &trail, "DATA"); + caml_global_data = caml_input_value_from_block(caml_data, caml_data_size); /* Load the code */ caml_code_size = caml_seek_section(fd, &trail, "CODE"); caml_load_code(fd, caml_code_size); @@ -347,11 +350,8 @@ CAMLexport void caml_main(char **argv) caml_stat_free(shared_lib_path); caml_stat_free(shared_libs); caml_stat_free(req_prims); - /* Load the globals */ - caml_seek_section(fd, &trail, "DATA"); - chan = caml_open_descriptor_in(fd); - caml_global_data = caml_input_val(chan); - caml_close_channel(chan); /* this also closes fd */ + /* Close */ + close(fd); caml_stat_free(trail.section); /* Ensure that the globals are in the major heap. */ caml_oldify_one (caml_global_data, &caml_global_data); @@ -421,6 +421,8 @@ CAMLexport void caml_startup_code( /* Load the code */ caml_start_code = code; caml_code_size = code_size; + caml_data = data; + caml_data_size = data_size; caml_init_code_fragments(); caml_init_debug_info(); if (caml_debugger_in_use) { diff --git a/testsuite/tests/lib-marshal-closure/.gitignore b/testsuite/tests/lib-marshal-closure/.gitignore new file mode 100644 index 000000000..9f2824deb --- /dev/null +++ b/testsuite/tests/lib-marshal-closure/.gitignore @@ -0,0 +1,2 @@ +*.err +data diff --git a/testsuite/tests/lib-marshal-closure/Makefile b/testsuite/tests/lib-marshal-closure/Makefile new file mode 100644 index 000000000..5b5d92b20 --- /dev/null +++ b/testsuite/tests/lib-marshal-closure/Makefile @@ -0,0 +1,61 @@ +######################################################################### +# # +# OCaml # +# # +# Benoit Vaugon, ENSTA # +# # +# Copyright 2014 Institut National de Recherche en Informatique et # +# en Automatique. All rights reserved. This file is distributed # +# under the terms of the Q Public License version 1.0. # +# # +######################################################################### + +BINARIES := write32.byte write32.native read32.byte read32.native \ + write64.byte write64.native read64.byte read64.native + +BASEDIR=../.. + +.PHONY: default +default: + @$(MAKE) compile + @$(MAKE) run + +.PHONY: compile +compile: $(BINARIES) + +.PHONY: run +run: + @for arch in 32 64; do \ + for kind in byte native; do \ + echo -n " ... testing 'arch-$$arch/$$kind'"; \ + rm -f data; \ + ./write$$arch.$$kind \ + > write$$arch$$kind.out 2> write$$arch$$kind.err \ + || exit 1; \ + ./read$$arch.$$kind \ + > read$$arch$$kind.out 2> read$$arch$$kind.err \ + && ( echo " => passed" ) \ + || ( echo " => failed" ) \ + done; \ + done + +### + +.PHONY: promote +promote: defaultpromote + +.PHONY: clean +clean: defaultclean + @rm -f data *.byte *.native *.out *.err + +### + +%.byte: %.ml + @$(OCAMLC) $< -o $@ + +%.native: %.ml + @$(OCAMLOPT) $< -o $@ + +### + +include $(BASEDIR)/makefiles/Makefile.common diff --git a/testsuite/tests/lib-marshal-closure/read32.ml b/testsuite/tests/lib-marshal-closure/read32.ml new file mode 100644 index 000000000..fbcdded50 --- /dev/null +++ b/testsuite/tests/lib-marshal-closure/read32.ml @@ -0,0 +1,31 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Benoit Vaugon, ENSTA *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +let fname = "data";; + +let f () = [ 1; 2; 3; 4 ];; + +try + let ic = open_in fname in + let g : unit -> float list = Marshal.from_channel ic in + close_in ic; + Printf.printf "%f\n" (List.nth (g ()) 0); +with +| Sys_error _ -> + Printf.printf "Do not import %S, ok\n" fname; +| Failure msg when String.sub msg 0 32 = "input_value: unknown code module" -> + Printf.printf "Incompatible closure, ok\n"; +;; + +let oc = open_out fname in +Marshal.to_channel oc f [ Marshal.Closures ]; +close_out oc;; diff --git a/testsuite/tests/lib-marshal-closure/read64.ml b/testsuite/tests/lib-marshal-closure/read64.ml new file mode 100644 index 000000000..5501a01ee --- /dev/null +++ b/testsuite/tests/lib-marshal-closure/read64.ml @@ -0,0 +1,31 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Benoit Vaugon, ENSTA *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +let fname = "data";; + +let f () = [ 1; 2; 3; 4; 5 ];; + +try + let ic = open_in fname in + let g : unit -> float list = Marshal.from_channel ic in + close_in ic; + Printf.printf "%f\n" (List.nth (g ()) 0); +with +| Sys_error _ -> + Printf.printf "Do not import %S, ok\n" fname; +| Failure msg when String.sub msg 0 32 = "input_value: unknown code module" -> + Printf.printf "Incompatible closure, ok\n"; +;; + +let oc = open_out fname in +Marshal.to_channel oc f [ Marshal.Closures ]; +close_out oc;; diff --git a/testsuite/tests/lib-marshal-closure/write32.ml b/testsuite/tests/lib-marshal-closure/write32.ml new file mode 100644 index 000000000..8140bc1d2 --- /dev/null +++ b/testsuite/tests/lib-marshal-closure/write32.ml @@ -0,0 +1,31 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Benoit Vaugon, ENSTA *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +let fname = "data";; + +let f () = [ 3.14; 6.28 ];; + +try + let ic = open_in fname in + let g : unit -> float list = Marshal.from_channel ic in + close_in ic; + Printf.printf "%f\n" (List.nth (g ()) 0); +with +| Sys_error _ -> + Printf.printf "Do not import %S, ok\n" fname; +| Failure msg when String.sub msg 0 32 = "input_value: unknown code module" -> + Printf.printf "Incompatible closure, ok\n"; +;; + +let oc = open_out fname in +Marshal.to_channel oc f [ Marshal.Closures ]; +close_out oc;; diff --git a/testsuite/tests/lib-marshal-closure/write64.ml b/testsuite/tests/lib-marshal-closure/write64.ml new file mode 100644 index 000000000..fab080a2b --- /dev/null +++ b/testsuite/tests/lib-marshal-closure/write64.ml @@ -0,0 +1,31 @@ +(***********************************************************************) +(* *) +(* OCaml *) +(* *) +(* Benoit Vaugon, ENSTA *) +(* *) +(* Copyright 2014 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the Q Public License version 1.0. *) +(* *) +(***********************************************************************) + +let fname = "data";; + +let f () = [ 3.14; 6.28; 9.42 ];; + +try + let ic = open_in fname in + let g : unit -> float list = Marshal.from_channel ic in + close_in ic; + Printf.printf "%f\n" (List.nth (g ()) 0); +with +| Sys_error _ -> + Printf.printf "Do not import %S, ok\n" fname; +| Failure msg when String.sub msg 0 32 = "input_value: unknown code module" -> + Printf.printf "Incompatible closure, ok\n"; +;; + +let oc = open_out fname in +Marshal.to_channel oc f [ Marshal.Closures ]; +close_out oc;; From e9bb7e117bace4eecb38b07f96033d0958d09a64 Mon Sep 17 00:00:00 2001 From: Benoit Vaugon Date: Fri, 4 Dec 2015 11:30:46 +0100 Subject: [PATCH 2/2] Update Changes. --- Changes | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Changes b/Changes index e6e6287c0..b55d793a2 100644 --- a/Changes +++ b/Changes @@ -363,6 +363,8 @@ Bug fixes: (Marc Lasson, review by Alain Frisch) - GPR#313: Prevent quadratic cases in CSE (Pierre Chambart, review by Xavier Leroy) +- GPR#330: Fix weak hash of serialised closures + (BenoƮt Vaugon, review by Alain Frish) Features wishes: - PR#4518, GPR#29: change location format for reporting errors in ocamldoc