Merge pull request #9763 from xavierleroy/remove-old-hash-function

Remove pre-4.00 generic hash function
master
Nicolás Ojeda Bär 2020-07-20 08:10:18 +02:00 committed by GitHub
commit db9b707e0c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 120 additions and 175 deletions

View File

@ -140,6 +140,12 @@ Working version
- #9663: Extend Printexc API for raw backtrace entries.
(Stephen Dolan, review by Nicolás Ojeda Bär and Gabriel Scherer)
- #9763: Add function Hashtbl.rebuild to convert from old hash table
formats (that may have been saved to persistent storage) to the
current hash table format. Remove leftover support for the hash
table format and generic hash function that were in use before OCaml 4.00.
(Xavier Leroy, review by Nicolás Ojeda Bär)
### Other libraries:
* #9206, #9419: update documentation of the threads library;

Binary file not shown.

Binary file not shown.

View File

@ -25,8 +25,8 @@
#include "caml/memory.h"
#include "caml/hash.h"
/* The new implementation, based on MurmurHash 3,
http://code.google.com/p/smhasher/ */
/* The implementation based on MurmurHash 3,
https://github.com/aappleby/smhasher/ */
#define ROTL32(x,n) ((x) << n | (x) >> (32-n))
@ -301,146 +301,6 @@ CAMLprim value caml_hash(value count, value limit, value seed, value obj)
return Val_int(h & 0x3FFFFFFFU);
}
/* The old implementation */
struct hash_state {
uintnat accu;
intnat univ_limit, univ_count;
};
static void hash_aux(struct hash_state*, value obj);
CAMLprim value caml_hash_univ_param(value count, value limit, value obj)
{
struct hash_state h;
h.univ_limit = Long_val(limit);
h.univ_count = Long_val(count);
h.accu = 0;
hash_aux(&h, obj);
return Val_long(h.accu & 0x3FFFFFFF);
/* The & has two purposes: ensure that the return value is positive
and give the same result on 32 bit and 64 bit architectures. */
}
#define Alpha 65599
#define Beta 19
#define Combine(new) (h->accu = h->accu * Alpha + (new))
#define Combine_small(new) (h->accu = h->accu * Beta + (new))
static void hash_aux(struct hash_state* h, value obj)
{
unsigned char * p;
mlsize_t i, j;
tag_t tag;
h->univ_limit--;
if (h->univ_count < 0 || h->univ_limit < 0) return;
again:
if (Is_long(obj)) {
h->univ_count--;
Combine(Long_val(obj));
return;
}
if (! Is_in_value_area(obj)) {
/* obj is a pointer outside the heap, to an object with
a priori unknown structure. Use its physical address as hash key. */
Combine((intnat) obj);
return;
}
/* Pointers into the heap are well-structured blocks. So are atoms.
We can inspect the block contents. */
/* The code needs reindenting later. Leaving as is to facilitate review. */
tag = Tag_val(obj);
switch (tag) {
case String_tag:
h->univ_count--;
i = caml_string_length(obj);
for (p = &Byte_u(obj, 0); i > 0; i--, p++)
Combine_small(*p);
break;
case Double_tag:
/* For doubles, we inspect their binary representation, LSB first.
The results are consistent among all platforms with IEEE floats. */
h->univ_count--;
#ifdef ARCH_BIG_ENDIAN
for (p = &Byte_u(obj, sizeof(double) - 1), i = sizeof(double);
i > 0;
p--, i--)
#else
for (p = &Byte_u(obj, 0), i = sizeof(double);
i > 0;
p++, i--)
#endif
Combine_small(*p);
break;
case Double_array_tag:
h->univ_count--;
for (j = 0; j < Bosize_val(obj); j += sizeof(double)) {
#ifdef ARCH_BIG_ENDIAN
for (p = &Byte_u(obj, j + sizeof(double) - 1), i = sizeof(double);
i > 0;
p--, i--)
#else
for (p = &Byte_u(obj, j), i = sizeof(double);
i > 0;
p++, i--)
#endif
Combine_small(*p);
}
break;
case Abstract_tag:
/* We don't know anything about the contents of the block.
Better do nothing. */
break;
case Infix_tag:
hash_aux(h, obj - Infix_offset_val(obj));
break;
case Forward_tag:
obj = Forward_val (obj);
goto again;
case Object_tag:
h->univ_count--;
Combine(Oid_val(obj));
break;
case Custom_tag:
/* If no hashing function provided, do nothing */
if (Custom_ops_val(obj)->hash != NULL) {
h->univ_count--;
Combine(Custom_ops_val(obj)->hash(obj));
}
break;
#ifdef NO_NAKED_POINTERS
case Closure_tag:
h->univ_count--;
Combine_small(tag);
/* Recursively hash the environment fields */
i = Wosize_val(obj);
j = Start_env_closinfo(Closinfo_val(obj));
while (i > j) {
i--;
hash_aux(h, Field(obj, i));
}
/* Combine the code pointers, closure info fields, and infix headers */
while (i > 0) {
i--;
Combine(Field(obj, i));
h->univ_count--;
}
break;
#endif
default:
h->univ_count--;
Combine_small(tag);
i = Wosize_val(obj);
while (i != 0) {
i--;
hash_aux(h, Field(obj, i));
}
break;
}
}
/* Hashing variant tags */
CAMLexport value caml_hash_variant(char const * tag)

View File

@ -112,39 +112,43 @@ let copy h = { h with data = Array.map copy_bucketlist h.data }
let length h = h.size
let insert_all_buckets indexfun inplace odata ndata =
let nsize = Array.length ndata in
let ndata_tail = Array.make nsize Empty in
let rec insert_bucket = function
| Empty -> ()
| Cons {key; data; next} as cell ->
let cell =
if inplace then cell
else Cons {key; data; next = Empty}
in
let nidx = indexfun key in
begin match ndata_tail.(nidx) with
| Empty -> ndata.(nidx) <- cell;
| Cons tail -> tail.next <- cell;
end;
ndata_tail.(nidx) <- cell;
insert_bucket next
in
for i = 0 to Array.length odata - 1 do
insert_bucket odata.(i)
done;
if inplace then
for i = 0 to nsize - 1 do
match ndata_tail.(i) with
| Empty -> ()
| Cons tail -> tail.next <- Empty
done
let resize indexfun h =
let odata = h.data in
let osize = Array.length odata in
let nsize = osize * 2 in
if nsize < Sys.max_array_length then begin
let ndata = Array.make nsize Empty in
let ndata_tail = Array.make nsize Empty in
let inplace = not (ongoing_traversal h) in
h.data <- ndata; (* so that indexfun sees the new bucket count *)
let rec insert_bucket = function
| Empty -> ()
| Cons {key; data; next} as cell ->
let cell =
if inplace then cell
else Cons {key; data; next = Empty}
in
let nidx = indexfun h key in
begin match ndata_tail.(nidx) with
| Empty -> ndata.(nidx) <- cell;
| Cons tail -> tail.next <- cell;
end;
ndata_tail.(nidx) <- cell;
insert_bucket next
in
for i = 0 to osize - 1 do
insert_bucket odata.(i)
done;
if inplace then
for i = 0 to nsize - 1 do
match ndata_tail.(i) with
| Empty -> ()
| Cons tail -> tail.next <- Empty
done;
insert_all_buckets (indexfun h) inplace odata ndata
end
let iter f h =
@ -489,18 +493,15 @@ module Make(H: HashedType): (S with type key = H.t) =
external seeded_hash_param :
int -> int -> int -> 'a -> int = "caml_hash" [@@noalloc]
external old_hash_param :
int -> int -> 'a -> int = "caml_hash_univ_param" [@@noalloc]
let hash x = seeded_hash_param 10 100 0 x
let hash_param n1 n2 x = seeded_hash_param n1 n2 0 x
let seeded_hash seed x = seeded_hash_param 10 100 seed x
let key_index h key =
(* compatibility with old hash tables *)
if Obj.size (Obj.repr h) >= 3
if Obj.size (Obj.repr h) >= 4
then (seeded_hash_param 10 100 h.seed key) land (Array.length h.data - 1)
else (old_hash_param 10 100 key) mod (Array.length h.data)
else invalid_arg "Hashtbl: unsupported hash table format"
let add h key data =
let i = key_index h key in
@ -611,3 +612,18 @@ let of_seq i =
let tbl = create 16 in
replace_seq tbl i;
tbl
let rebuild ?(random = !randomized) h =
let s = power_2_above 16 (Array.length h.data) in
let seed =
if random then Random.State.bits (Lazy.force prng)
else if Obj.size (Obj.repr h) >= 4 then h.seed
else 0 in
let h' = {
size = h.size;
data = Array.make s Empty;
seed = seed;
initial_size = if Obj.size (Obj.repr h) >= 4 then h.initial_size else s
} in
insert_all_buckets (key_index h') false h.data h'.data;
h'

View File

@ -191,10 +191,27 @@ val randomize : unit -> unit
@since 4.00.0 *)
val is_randomized : unit -> bool
(** return if the tables are currently created in randomized mode by default
(** Return [true] if the tables are currently created in randomized mode
by default, [false] otherwise.
@since 4.03.0 *)
val rebuild : ?random:bool -> ('a, 'b) t -> ('a, 'b) t
(** Return a copy of the given hashtable. Unlike {!Hashtbl.copy},
[{!Hashtbl.rebuild} h] re-hashes all the (key, value) entries of
the original table [h]. The returned hash table is randomized if
[h] was randomized, or the optional [random] parameter is true, or
if the default is to create randomized hash tables; see
{!Hashtbl.create} for more information.
{!Hashtbl.rebuild} can safely be used to import a hash table built
by an old version of the {!Hashtbl} module, then marshaled to
persistent storage. After unmarshaling, apply {!Hashtbl.rebuild}
to produce a hash table for the current version of the {!Hashtbl}
module.
@since 4.12.0 *)
(** @since 4.00.0 *)
type statistics = {
num_bindings: int;

View File

@ -45,6 +45,7 @@ module Hashtbl : sig
val length : ('a, 'b) t -> int
val randomize : unit -> unit
val is_randomized : unit -> bool
val rebuild : ?random:bool -> ('a, 'b) t -> ('a, 'b) t
type statistics = Hashtbl.statistics
val stats : ('a, 'b) t -> statistics
val to_seq : ('a,'b) t -> ('a * 'b) Seq.t

View File

@ -35,7 +35,7 @@ Uncaught exception Invalid_argument("index out of bounds")
Raised by primitive operation at Backtrace2.run in file "backtrace2.ml", line 62, characters 14-22
test_Not_found
Uncaught exception Not_found
Raised at Stdlib__hashtbl.find in file "hashtbl.ml", line 537, characters 13-28
Raised at Stdlib__hashtbl.find in file "hashtbl.ml", line 538, characters 13-28
Called from Backtrace2.test_Not_found in file "backtrace2.ml", line 43, characters 9-42
Re-raised at Backtrace2.test_Not_found in file "backtrace2.ml", line 43, characters 61-70
Called from Backtrace2.run in file "backtrace2.ml", line 62, characters 11-23
@ -50,7 +50,7 @@ Called from CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", lin
Re-raised at CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 36, characters 4-11
Called from Backtrace2.run in file "backtrace2.ml", line 62, characters 11-23
Uncaught exception Not_found
Raised at Stdlib__hashtbl.find in file "hashtbl.ml", line 537, characters 13-28
Raised at Stdlib__hashtbl.find in file "hashtbl.ml", line 538, characters 13-28
Called from Backtrace2.test_lazy.exception_raised_internally in file "backtrace2.ml", line 50, characters 8-41
Re-raised at CamlinternalLazy.force_lazy_block.(fun) in file "camlinternalLazy.ml", line 35, characters 56-63
Called from CamlinternalLazy.force_lazy_block in file "camlinternalLazy.ml", line 31, characters 17-27

View File

@ -0,0 +1,45 @@
(* TEST
*)
let check_contents (h: (string, int) Hashtbl.t)
(expected: (string * int) list) =
List.iter
(fun (k, v) -> assert (Hashtbl.find_opt h k = Some v))
expected;
List.iter
(fun k -> assert (Hashtbl.find_opt h k = None))
[""; "n"; "no"; "non"; "none"];
Hashtbl.iter
(fun k v -> assert (List.assoc_opt k expected = Some v))
h
let check_failure (h: (string, int) Hashtbl.t) =
try
ignore (Hashtbl.find_opt h ""); assert false
with Invalid_argument _ ->
()
let check_table supported h expected =
if supported
then check_contents h expected
else check_failure h;
check_contents (Hashtbl.rebuild h) expected
(* Hash table version 1, produced with OCaml 3.12.1 *)
let h1 : (string, int) Hashtbl.t =
Marshal.from_string
"\132\149\166\190\000\000\000/\000\000\000\n\000\000\000+\000\000\000)\
\160D\b\000\0004\000@@@@@\176%threeC@@@@\176#twoB@@@\176$fourD\176#oneA@"
0
(* Hash table version 2, produced with OCaml 4.09.0 *)
let h2 : (string, int) Hashtbl.t =
Marshal.from_string
"\132\149\166\190\000\000\000;\000\000\000\012\000\000\0008\000\000\0004\
\192E\b\000\000@\000@@@@@@@@@\176$septG\176#sixF@\176$cinqE@\176$neufI\
\176$huitH@@@@@@P"
0
let _ =
check_table false h1 ["one", 1; "two", 2; "three", 3; "four", 4];
check_table true h2 ["cinq", 5; "six", 6; "sept", 7; "huit", 8; "neuf", 9]