Merge pull request #9763 from xavierleroy/remove-old-hash-function
Remove pre-4.00 generic hash functionmaster
commit
db9b707e0c
6
Changes
6
Changes
|
@ -140,6 +140,12 @@ Working version
|
|||
- #9663: Extend Printexc API for raw backtrace entries.
|
||||
(Stephen Dolan, review by Nicolás Ojeda Bär and Gabriel Scherer)
|
||||
|
||||
- #9763: Add function Hashtbl.rebuild to convert from old hash table
|
||||
formats (that may have been saved to persistent storage) to the
|
||||
current hash table format. Remove leftover support for the hash
|
||||
table format and generic hash function that were in use before OCaml 4.00.
|
||||
(Xavier Leroy, review by Nicolás Ojeda Bär)
|
||||
|
||||
### Other libraries:
|
||||
|
||||
* #9206, #9419: update documentation of the threads library;
|
||||
|
|
BIN
boot/ocamlc
BIN
boot/ocamlc
Binary file not shown.
BIN
boot/ocamllex
BIN
boot/ocamllex
Binary file not shown.
144
runtime/hash.c
144
runtime/hash.c
|
@ -25,8 +25,8 @@
|
|||
#include "caml/memory.h"
|
||||
#include "caml/hash.h"
|
||||
|
||||
/* The new implementation, based on MurmurHash 3,
|
||||
http://code.google.com/p/smhasher/ */
|
||||
/* The implementation based on MurmurHash 3,
|
||||
https://github.com/aappleby/smhasher/ */
|
||||
|
||||
#define ROTL32(x,n) ((x) << n | (x) >> (32-n))
|
||||
|
||||
|
@ -301,146 +301,6 @@ CAMLprim value caml_hash(value count, value limit, value seed, value obj)
|
|||
return Val_int(h & 0x3FFFFFFFU);
|
||||
}
|
||||
|
||||
/* The old implementation */
|
||||
|
||||
struct hash_state {
|
||||
uintnat accu;
|
||||
intnat univ_limit, univ_count;
|
||||
};
|
||||
|
||||
static void hash_aux(struct hash_state*, value obj);
|
||||
|
||||
CAMLprim value caml_hash_univ_param(value count, value limit, value obj)
|
||||
{
|
||||
struct hash_state h;
|
||||
h.univ_limit = Long_val(limit);
|
||||
h.univ_count = Long_val(count);
|
||||
h.accu = 0;
|
||||
hash_aux(&h, obj);
|
||||
return Val_long(h.accu & 0x3FFFFFFF);
|
||||
/* The & has two purposes: ensure that the return value is positive
|
||||
and give the same result on 32 bit and 64 bit architectures. */
|
||||
}
|
||||
|
||||
#define Alpha 65599
|
||||
#define Beta 19
|
||||
#define Combine(new) (h->accu = h->accu * Alpha + (new))
|
||||
#define Combine_small(new) (h->accu = h->accu * Beta + (new))
|
||||
|
||||
static void hash_aux(struct hash_state* h, value obj)
|
||||
{
|
||||
unsigned char * p;
|
||||
mlsize_t i, j;
|
||||
tag_t tag;
|
||||
|
||||
h->univ_limit--;
|
||||
if (h->univ_count < 0 || h->univ_limit < 0) return;
|
||||
|
||||
again:
|
||||
if (Is_long(obj)) {
|
||||
h->univ_count--;
|
||||
Combine(Long_val(obj));
|
||||
return;
|
||||
}
|
||||
if (! Is_in_value_area(obj)) {
|
||||
/* obj is a pointer outside the heap, to an object with
|
||||
a priori unknown structure. Use its physical address as hash key. */
|
||||
Combine((intnat) obj);
|
||||
return;
|
||||
}
|
||||
/* Pointers into the heap are well-structured blocks. So are atoms.
|
||||
We can inspect the block contents. */
|
||||
/* The code needs reindenting later. Leaving as is to facilitate review. */
|
||||
tag = Tag_val(obj);
|
||||
switch (tag) {
|
||||
case String_tag:
|
||||
h->univ_count--;
|
||||
i = caml_string_length(obj);
|
||||
for (p = &Byte_u(obj, 0); i > 0; i--, p++)
|
||||
Combine_small(*p);
|
||||
break;
|
||||
case Double_tag:
|
||||
/* For doubles, we inspect their binary representation, LSB first.
|
||||
The results are consistent among all platforms with IEEE floats. */
|
||||
h->univ_count--;
|
||||
#ifdef ARCH_BIG_ENDIAN
|
||||
for (p = &Byte_u(obj, sizeof(double) - 1), i = sizeof(double);
|
||||
i > 0;
|
||||
p--, i--)
|
||||
#else
|
||||
for (p = &Byte_u(obj, 0), i = sizeof(double);
|
||||
i > 0;
|
||||
p++, i--)
|
||||
#endif
|
||||
Combine_small(*p);
|
||||
break;
|
||||
case Double_array_tag:
|
||||
h->univ_count--;
|
||||
for (j = 0; j < Bosize_val(obj); j += sizeof(double)) {
|
||||
#ifdef ARCH_BIG_ENDIAN
|
||||
for (p = &Byte_u(obj, j + sizeof(double) - 1), i = sizeof(double);
|
||||
i > 0;
|
||||
p--, i--)
|
||||
#else
|
||||
for (p = &Byte_u(obj, j), i = sizeof(double);
|
||||
i > 0;
|
||||
p++, i--)
|
||||
#endif
|
||||
Combine_small(*p);
|
||||
}
|
||||
break;
|
||||
case Abstract_tag:
|
||||
/* We don't know anything about the contents of the block.
|
||||
Better do nothing. */
|
||||
break;
|
||||
case Infix_tag:
|
||||
hash_aux(h, obj - Infix_offset_val(obj));
|
||||
break;
|
||||
case Forward_tag:
|
||||
obj = Forward_val (obj);
|
||||
goto again;
|
||||
case Object_tag:
|
||||
h->univ_count--;
|
||||
Combine(Oid_val(obj));
|
||||
break;
|
||||
case Custom_tag:
|
||||
/* If no hashing function provided, do nothing */
|
||||
if (Custom_ops_val(obj)->hash != NULL) {
|
||||
h->univ_count--;
|
||||
Combine(Custom_ops_val(obj)->hash(obj));
|
||||
}
|
||||
break;
|
||||
#ifdef NO_NAKED_POINTERS
|
||||
case Closure_tag:
|
||||
h->univ_count--;
|
||||
Combine_small(tag);
|
||||
/* Recursively hash the environment fields */
|
||||
i = Wosize_val(obj);
|
||||
j = Start_env_closinfo(Closinfo_val(obj));
|
||||
while (i > j) {
|
||||
i--;
|
||||
hash_aux(h, Field(obj, i));
|
||||
}
|
||||
/* Combine the code pointers, closure info fields, and infix headers */
|
||||
while (i > 0) {
|
||||
i--;
|
||||
Combine(Field(obj, i));
|
||||
h->univ_count--;
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
h->univ_count--;
|
||||
Combine_small(tag);
|
||||
i = Wosize_val(obj);
|
||||
while (i != 0) {
|
||||
i--;
|
||||
hash_aux(h, Field(obj, i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Hashing variant tags */
|
||||
|
||||
CAMLexport value caml_hash_variant(char const * tag)
|
||||
|
|
|
@ -112,39 +112,43 @@ let copy h = { h with data = Array.map copy_bucketlist h.data }
|
|||
|
||||
let length h = h.size
|
||||
|
||||
let insert_all_buckets indexfun inplace odata ndata =
|
||||
let nsize = Array.length ndata in
|
||||
let ndata_tail = Array.make nsize Empty in
|
||||
let rec insert_bucket = function
|
||||
| Empty -> ()
|
||||
| Cons {key; data; next} as cell ->
|
||||
let cell =
|
||||
if inplace then cell
|
||||
else Cons {key; data; next = Empty}
|
||||
in
|
||||
let nidx = indexfun key in
|
||||
begin match ndata_tail.(nidx) with
|
||||
| Empty -> ndata.(nidx) <- cell;
|
||||
| Cons tail -> tail.next <- cell;
|
||||
end;
|
||||
ndata_tail.(nidx) <- cell;
|
||||
insert_bucket next
|
||||
in
|
||||
for i = 0 to Array.length odata - 1 do
|
||||
insert_bucket odata.(i)
|
||||
done;
|
||||
if inplace then
|
||||
for i = 0 to nsize - 1 do
|
||||
match ndata_tail.(i) with
|
||||
| Empty -> ()
|
||||
| Cons tail -> tail.next <- Empty
|
||||
done
|
||||
|
||||
let resize indexfun h =
|
||||
let odata = h.data in
|
||||
let osize = Array.length odata in
|
||||
let nsize = osize * 2 in
|
||||
if nsize < Sys.max_array_length then begin
|
||||
let ndata = Array.make nsize Empty in
|
||||
let ndata_tail = Array.make nsize Empty in
|
||||
let inplace = not (ongoing_traversal h) in
|
||||
h.data <- ndata; (* so that indexfun sees the new bucket count *)
|
||||
let rec insert_bucket = function
|
||||
| Empty -> ()
|
||||
| Cons {key; data; next} as cell ->
|
||||
let cell =
|
||||
if inplace then cell
|
||||
else Cons {key; data; next = Empty}
|
||||
in
|
||||
let nidx = indexfun h key in
|
||||
begin match ndata_tail.(nidx) with
|
||||
| Empty -> ndata.(nidx) <- cell;
|
||||
| Cons tail -> tail.next <- cell;
|
||||
end;
|
||||
ndata_tail.(nidx) <- cell;
|
||||
insert_bucket next
|
||||
in
|
||||
for i = 0 to osize - 1 do
|
||||
insert_bucket odata.(i)
|
||||
done;
|
||||
if inplace then
|
||||
for i = 0 to nsize - 1 do
|
||||
match ndata_tail.(i) with
|
||||
| Empty -> ()
|
||||
| Cons tail -> tail.next <- Empty
|
||||
done;
|
||||
insert_all_buckets (indexfun h) inplace odata ndata
|
||||
end
|
||||
|
||||
let iter f h =
|
||||
|
@ -489,18 +493,15 @@ module Make(H: HashedType): (S with type key = H.t) =
|
|||
|
||||
external seeded_hash_param :
|
||||
int -> int -> int -> 'a -> int = "caml_hash" [@@noalloc]
|
||||
external old_hash_param :
|
||||
int -> int -> 'a -> int = "caml_hash_univ_param" [@@noalloc]
|
||||
|
||||
let hash x = seeded_hash_param 10 100 0 x
|
||||
let hash_param n1 n2 x = seeded_hash_param n1 n2 0 x
|
||||
let seeded_hash seed x = seeded_hash_param 10 100 seed x
|
||||
|
||||
let key_index h key =
|
||||
(* compatibility with old hash tables *)
|
||||
if Obj.size (Obj.repr h) >= 3
|
||||
if Obj.size (Obj.repr h) >= 4
|
||||
then (seeded_hash_param 10 100 h.seed key) land (Array.length h.data - 1)
|
||||
else (old_hash_param 10 100 key) mod (Array.length h.data)
|
||||
else invalid_arg "Hashtbl: unsupported hash table format"
|
||||
|
||||
let add h key data =
|
||||
let i = key_index h key in
|
||||
|
@ -611,3 +612,18 @@ let of_seq i =
|
|||
let tbl = create 16 in
|
||||
replace_seq tbl i;
|
||||
tbl
|
||||
|
||||
let rebuild ?(random = !randomized) h =
|
||||
let s = power_2_above 16 (Array.length h.data) in
|
||||
let seed =
|
||||
if random then Random.State.bits (Lazy.force prng)
|
||||
else if Obj.size (Obj.repr h) >= 4 then h.seed
|
||||
else 0 in
|
||||
let h' = {
|
||||
size = h.size;
|
||||
data = Array.make s Empty;
|
||||
seed = seed;
|
||||
initial_size = if Obj.size (Obj.repr h) >= 4 then h.initial_size else s
|
||||
} in
|
||||
insert_all_buckets (key_index h') false h.data h'.data;
|
||||
h'
|
||||
|
|
|
@ -191,10 +191,27 @@ val randomize : unit -> unit
|
|||
@since 4.00.0 *)
|
||||
|
||||
val is_randomized : unit -> bool
|
||||
(** return if the tables are currently created in randomized mode by default
|
||||
(** Return [true] if the tables are currently created in randomized mode
|
||||
by default, [false] otherwise.
|
||||
|
||||
@since 4.03.0 *)
|
||||
|
||||
val rebuild : ?random:bool -> ('a, 'b) t -> ('a, 'b) t
|
||||
(** Return a copy of the given hashtable. Unlike {!Hashtbl.copy},
|
||||
[{!Hashtbl.rebuild} h] re-hashes all the (key, value) entries of
|
||||
the original table [h]. The returned hash table is randomized if
|
||||
[h] was randomized, or the optional [random] parameter is true, or
|
||||
if the default is to create randomized hash tables; see
|
||||
{!Hashtbl.create} for more information.
|
||||
|
||||
{!Hashtbl.rebuild} can safely be used to import a hash table built
|
||||
by an old version of the {!Hashtbl} module, then marshaled to
|
||||
persistent storage. After unmarshaling, apply {!Hashtbl.rebuild}
|
||||
to produce a hash table for the current version of the {!Hashtbl}
|
||||
module.
|
||||
|
||||
@since 4.12.0 *)
|
||||
|
||||
(** @since 4.00.0 *)
|
||||
type statistics = {
|
||||
num_bindings: int;
|
||||
|
|
|
@ -45,6 +45,7 @@ module Hashtbl : sig
|
|||
val length : ('a, 'b) t -> int
|
||||
val randomize : unit -> unit
|
||||
val is_randomized : unit -> bool
|
||||
val rebuild : ?random:bool -> ('a, 'b) t -> ('a, 'b) t
|
||||
type statistics = Hashtbl.statistics
|
||||
val stats : ('a, 'b) t -> statistics
|
||||
val to_seq : ('a,'b) t -> ('a * 'b) Seq.t
|
||||
|
|
|
@ -35,7 +35,7 @@ Uncaught exception Invalid_argument("index out of bounds")
|
|||
Raised by primitive operation at Backtrace2.run in file "backtrace2.ml", line 62, characters 14-22
|
||||
test_Not_found
|
||||
Uncaught exception Not_found
|
||||
Raised at Stdlib__hashtbl.find in file "hashtbl.ml", line 537, characters 13-28
|
||||
Raised at Stdlib__hashtbl.find in file "hashtbl.ml", line 538, characters 13-28
|
||||
Called from Backtrace2.test_Not_found in file "backtrace2.ml", line 43, characters 9-42
|
||||
Re-raised at Backtrace2.test_Not_found in file "backtrace2.ml", line 43, characters 61-70
|
||||
Called from Backtrace2.run in file "backtrace2.ml", line 62, characters 11-23
|
||||
|
@ -50,7 +50,7 @@ Called from CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", lin
|
|||
Re-raised at CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 36, characters 4-11
|
||||
Called from Backtrace2.run in file "backtrace2.ml", line 62, characters 11-23
|
||||
Uncaught exception Not_found
|
||||
Raised at Stdlib__hashtbl.find in file "hashtbl.ml", line 537, characters 13-28
|
||||
Raised at Stdlib__hashtbl.find in file "hashtbl.ml", line 538, characters 13-28
|
||||
Called from Backtrace2.test_lazy.exception_raised_internally in file "backtrace2.ml", line 50, characters 8-41
|
||||
Re-raised at CamlinternalLazy.force_lazy_block.(fun) in file "camlinternalLazy.ml", line 35, characters 56-63
|
||||
Called from CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 31, characters 17-27
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
(* TEST
|
||||
*)
|
||||
|
||||
let check_contents (h: (string, int) Hashtbl.t)
|
||||
(expected: (string * int) list) =
|
||||
List.iter
|
||||
(fun (k, v) -> assert (Hashtbl.find_opt h k = Some v))
|
||||
expected;
|
||||
List.iter
|
||||
(fun k -> assert (Hashtbl.find_opt h k = None))
|
||||
[""; "n"; "no"; "non"; "none"];
|
||||
Hashtbl.iter
|
||||
(fun k v -> assert (List.assoc_opt k expected = Some v))
|
||||
h
|
||||
|
||||
let check_failure (h: (string, int) Hashtbl.t) =
|
||||
try
|
||||
ignore (Hashtbl.find_opt h ""); assert false
|
||||
with Invalid_argument _ ->
|
||||
()
|
||||
|
||||
let check_table supported h expected =
|
||||
if supported
|
||||
then check_contents h expected
|
||||
else check_failure h;
|
||||
check_contents (Hashtbl.rebuild h) expected
|
||||
|
||||
(* Hash table version 1, produced with OCaml 3.12.1 *)
|
||||
let h1 : (string, int) Hashtbl.t =
|
||||
Marshal.from_string
|
||||
"\132\149\166\190\000\000\000/\000\000\000\n\000\000\000+\000\000\000)\
|
||||
\160D\b\000\0004\000@@@@@\176%threeC@@@@\176#twoB@@@\176$fourD\176#oneA@"
|
||||
0
|
||||
|
||||
(* Hash table version 2, produced with OCaml 4.09.0 *)
|
||||
let h2 : (string, int) Hashtbl.t =
|
||||
Marshal.from_string
|
||||
"\132\149\166\190\000\000\000;\000\000\000\012\000\000\0008\000\000\0004\
|
||||
\192E\b\000\000@\000@@@@@@@@@\176$septG\176#sixF@\176$cinqE@\176$neufI\
|
||||
\176$huitH@@@@@@P"
|
||||
0
|
||||
|
||||
let _ =
|
||||
check_table false h1 ["one", 1; "two", 2; "three", 3; "four", 4];
|
||||
check_table true h2 ["cinq", 5; "six", 6; "sept", 7; "huit", 8; "neuf", 9]
|
Loading…
Reference in New Issue