631 lines
18 KiB
OCaml
631 lines
18 KiB
OCaml
(**************************************************************************)
|
|
(* *)
|
|
(* OCaml *)
|
|
(* *)
|
|
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
(* *)
|
|
(* Copyright 1996 Institut National de Recherche en Informatique et *)
|
|
(* en Automatique. *)
|
|
(* *)
|
|
(* All rights reserved. This file is distributed under the terms of *)
|
|
(* the GNU Lesser General Public License version 2.1, with the *)
|
|
(* special exception on linking described in the file LICENSE. *)
|
|
(* *)
|
|
(**************************************************************************)
|
|
|
|
(* Hash tables *)
|
|
|
|
(* We do dynamic hashing, and resize the table and rehash the elements
|
|
when buckets become too long. *)
|
|
|
|
type ('a, 'b) t =
|
|
{ mutable size: int; (* number of entries *)
|
|
mutable data: ('a, 'b) bucketlist array; (* the buckets *)
|
|
mutable seed: int; (* for randomization *)
|
|
mutable initial_size: int; (* initial array size *)
|
|
}
|
|
|
|
and ('a, 'b) bucketlist =
|
|
Empty
|
|
| Cons of { mutable key: 'a;
|
|
mutable data: 'b;
|
|
mutable next: ('a, 'b) bucketlist }
|
|
|
|
(* The sign of initial_size encodes the fact that a traversal is
|
|
ongoing or not.
|
|
|
|
This disables the efficient in place implementation of resizing.
|
|
*)
|
|
|
|
let ongoing_traversal h =
|
|
Obj.size (Obj.repr h) < 4 (* compatibility with old hash tables *)
|
|
|| h.initial_size < 0
|
|
|
|
let flip_ongoing_traversal h =
|
|
h.initial_size <- - h.initial_size
|
|
|
|
(* To pick random seeds if requested *)
|
|
|
|
let randomized_default =
|
|
let params =
|
|
try Sys.getenv "OCAMLRUNPARAM" with Not_found ->
|
|
try Sys.getenv "CAMLRUNPARAM" with Not_found -> "" in
|
|
String.contains params 'R'
|
|
|
|
let randomized = ref randomized_default
|
|
|
|
let randomize () = randomized := true
|
|
let is_randomized () = !randomized
|
|
|
|
let prng = lazy (Random.State.make_self_init())
|
|
|
|
(* Functions which appear before the functorial interface must either be
|
|
independent of the hash function or take it as a parameter (see #2202 and
|
|
code below the functor definitions. *)
|
|
|
|
(* Creating a fresh, empty table *)
|
|
|
|
let rec power_2_above x n =
|
|
if x >= n then x
|
|
else if x * 2 > Sys.max_array_length then x
|
|
else power_2_above (x * 2) n
|
|
|
|
let create ?(random = !randomized) initial_size =
|
|
let s = power_2_above 16 initial_size in
|
|
let seed = if random then Random.State.bits (Lazy.force prng) else 0 in
|
|
{ initial_size = s; size = 0; seed = seed; data = Array.make s Empty }
|
|
|
|
let clear h =
|
|
if h.size > 0 then begin
|
|
h.size <- 0;
|
|
Array.fill h.data 0 (Array.length h.data) Empty
|
|
end
|
|
|
|
let reset h =
|
|
let len = Array.length h.data in
|
|
if Obj.size (Obj.repr h) < 4 (* compatibility with old hash tables *)
|
|
|| len = abs h.initial_size then
|
|
clear h
|
|
else begin
|
|
h.size <- 0;
|
|
h.data <- Array.make (abs h.initial_size) Empty
|
|
end
|
|
|
|
let copy_bucketlist = function
|
|
| Empty -> Empty
|
|
| Cons {key; data; next} ->
|
|
let rec loop prec = function
|
|
| Empty -> ()
|
|
| Cons {key; data; next} ->
|
|
let r = Cons {key; data; next} in
|
|
begin match prec with
|
|
| Empty -> assert false
|
|
| Cons prec -> prec.next <- r
|
|
end;
|
|
loop r next
|
|
in
|
|
let r = Cons {key; data; next} in
|
|
loop r next;
|
|
r
|
|
|
|
let copy h = { h with data = Array.map copy_bucketlist h.data }
|
|
|
|
let length h = h.size
|
|
|
|
let insert_all_buckets indexfun inplace odata ndata =
|
|
let nsize = Array.length ndata in
|
|
let ndata_tail = Array.make nsize Empty in
|
|
let rec insert_bucket = function
|
|
| Empty -> ()
|
|
| Cons {key; data; next} as cell ->
|
|
let cell =
|
|
if inplace then cell
|
|
else Cons {key; data; next = Empty}
|
|
in
|
|
let nidx = indexfun key in
|
|
begin match ndata_tail.(nidx) with
|
|
| Empty -> ndata.(nidx) <- cell;
|
|
| Cons tail -> tail.next <- cell;
|
|
end;
|
|
ndata_tail.(nidx) <- cell;
|
|
insert_bucket next
|
|
in
|
|
for i = 0 to Array.length odata - 1 do
|
|
insert_bucket odata.(i)
|
|
done;
|
|
if inplace then
|
|
for i = 0 to nsize - 1 do
|
|
match ndata_tail.(i) with
|
|
| Empty -> ()
|
|
| Cons tail -> tail.next <- Empty
|
|
done
|
|
|
|
let resize indexfun h =
|
|
let odata = h.data in
|
|
let osize = Array.length odata in
|
|
let nsize = osize * 2 in
|
|
if nsize < Sys.max_array_length then begin
|
|
let ndata = Array.make nsize Empty in
|
|
let inplace = not (ongoing_traversal h) in
|
|
h.data <- ndata; (* so that indexfun sees the new bucket count *)
|
|
insert_all_buckets (indexfun h) inplace odata ndata
|
|
end
|
|
|
|
let iter f h =
|
|
let rec do_bucket = function
|
|
| Empty ->
|
|
()
|
|
| Cons{key; data; next} ->
|
|
f key data; do_bucket next in
|
|
let old_trav = ongoing_traversal h in
|
|
if not old_trav then flip_ongoing_traversal h;
|
|
try
|
|
let d = h.data in
|
|
for i = 0 to Array.length d - 1 do
|
|
do_bucket d.(i)
|
|
done;
|
|
if not old_trav then flip_ongoing_traversal h;
|
|
with exn when not old_trav ->
|
|
flip_ongoing_traversal h;
|
|
raise exn
|
|
|
|
let rec filter_map_inplace_bucket f h i prec = function
|
|
| Empty ->
|
|
begin match prec with
|
|
| Empty -> h.data.(i) <- Empty
|
|
| Cons c -> c.next <- Empty
|
|
end
|
|
| (Cons ({key; data; next} as c)) as slot ->
|
|
begin match f key data with
|
|
| None ->
|
|
h.size <- h.size - 1;
|
|
filter_map_inplace_bucket f h i prec next
|
|
| Some data ->
|
|
begin match prec with
|
|
| Empty -> h.data.(i) <- slot
|
|
| Cons c -> c.next <- slot
|
|
end;
|
|
c.data <- data;
|
|
filter_map_inplace_bucket f h i slot next
|
|
end
|
|
|
|
let filter_map_inplace f h =
|
|
let d = h.data in
|
|
let old_trav = ongoing_traversal h in
|
|
if not old_trav then flip_ongoing_traversal h;
|
|
try
|
|
for i = 0 to Array.length d - 1 do
|
|
filter_map_inplace_bucket f h i Empty h.data.(i)
|
|
done;
|
|
if not old_trav then flip_ongoing_traversal h
|
|
with exn when not old_trav ->
|
|
flip_ongoing_traversal h;
|
|
raise exn
|
|
|
|
let fold f h init =
|
|
let rec do_bucket b accu =
|
|
match b with
|
|
Empty ->
|
|
accu
|
|
| Cons{key; data; next} ->
|
|
do_bucket next (f key data accu) in
|
|
let old_trav = ongoing_traversal h in
|
|
if not old_trav then flip_ongoing_traversal h;
|
|
try
|
|
let d = h.data in
|
|
let accu = ref init in
|
|
for i = 0 to Array.length d - 1 do
|
|
accu := do_bucket d.(i) !accu
|
|
done;
|
|
if not old_trav then flip_ongoing_traversal h;
|
|
!accu
|
|
with exn when not old_trav ->
|
|
flip_ongoing_traversal h;
|
|
raise exn
|
|
|
|
type statistics = {
|
|
num_bindings: int;
|
|
num_buckets: int;
|
|
max_bucket_length: int;
|
|
bucket_histogram: int array
|
|
}
|
|
|
|
let rec bucket_length accu = function
|
|
| Empty -> accu
|
|
| Cons{next} -> bucket_length (accu + 1) next
|
|
|
|
let stats h =
|
|
let mbl =
|
|
Array.fold_left (fun m b -> max m (bucket_length 0 b)) 0 h.data in
|
|
let histo = Array.make (mbl + 1) 0 in
|
|
Array.iter
|
|
(fun b ->
|
|
let l = bucket_length 0 b in
|
|
histo.(l) <- histo.(l) + 1)
|
|
h.data;
|
|
{ num_bindings = h.size;
|
|
num_buckets = Array.length h.data;
|
|
max_bucket_length = mbl;
|
|
bucket_histogram = histo }
|
|
|
|
(** {1 Iterators} *)
|
|
|
|
let to_seq tbl =
|
|
(* capture current array, so that even if the table is resized we
|
|
keep iterating on the same array *)
|
|
let tbl_data = tbl.data in
|
|
(* state: index * next bucket to traverse *)
|
|
let rec aux i buck () = match buck with
|
|
| Empty ->
|
|
if i = Array.length tbl_data
|
|
then Seq.Nil
|
|
else aux(i+1) tbl_data.(i) ()
|
|
| Cons {key; data; next} ->
|
|
Seq.Cons ((key, data), aux i next)
|
|
in
|
|
aux 0 Empty
|
|
|
|
let to_seq_keys m = Seq.map fst (to_seq m)
|
|
|
|
let to_seq_values m = Seq.map snd (to_seq m)
|
|
|
|
(* Functorial interface *)
|
|
|
|
module type HashedType =
|
|
sig
|
|
type t
|
|
val equal: t -> t -> bool
|
|
val hash: t -> int
|
|
end
|
|
|
|
module type SeededHashedType =
|
|
sig
|
|
type t
|
|
val equal: t -> t -> bool
|
|
val hash: int -> t -> int
|
|
end
|
|
|
|
module type S =
|
|
sig
|
|
type key
|
|
type !'a t
|
|
val create: int -> 'a t
|
|
val clear : 'a t -> unit
|
|
val reset : 'a t -> unit
|
|
val copy: 'a t -> 'a t
|
|
val add: 'a t -> key -> 'a -> unit
|
|
val remove: 'a t -> key -> unit
|
|
val find: 'a t -> key -> 'a
|
|
val find_opt: 'a t -> key -> 'a option
|
|
val find_all: 'a t -> key -> 'a list
|
|
val replace : 'a t -> key -> 'a -> unit
|
|
val mem : 'a t -> key -> bool
|
|
val iter: (key -> 'a -> unit) -> 'a t -> unit
|
|
val filter_map_inplace: (key -> 'a -> 'a option) -> 'a t -> unit
|
|
val fold: (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b
|
|
val length: 'a t -> int
|
|
val stats: 'a t -> statistics
|
|
val to_seq : 'a t -> (key * 'a) Seq.t
|
|
val to_seq_keys : _ t -> key Seq.t
|
|
val to_seq_values : 'a t -> 'a Seq.t
|
|
val add_seq : 'a t -> (key * 'a) Seq.t -> unit
|
|
val replace_seq : 'a t -> (key * 'a) Seq.t -> unit
|
|
val of_seq : (key * 'a) Seq.t -> 'a t
|
|
end
|
|
|
|
module type SeededS =
|
|
sig
|
|
type key
|
|
type !'a t
|
|
val create : ?random:bool -> int -> 'a t
|
|
val clear : 'a t -> unit
|
|
val reset : 'a t -> unit
|
|
val copy : 'a t -> 'a t
|
|
val add : 'a t -> key -> 'a -> unit
|
|
val remove : 'a t -> key -> unit
|
|
val find : 'a t -> key -> 'a
|
|
val find_opt: 'a t -> key -> 'a option
|
|
val find_all : 'a t -> key -> 'a list
|
|
val replace : 'a t -> key -> 'a -> unit
|
|
val mem : 'a t -> key -> bool
|
|
val iter : (key -> 'a -> unit) -> 'a t -> unit
|
|
val filter_map_inplace: (key -> 'a -> 'a option) -> 'a t -> unit
|
|
val fold : (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b
|
|
val length : 'a t -> int
|
|
val stats: 'a t -> statistics
|
|
val to_seq : 'a t -> (key * 'a) Seq.t
|
|
val to_seq_keys : _ t -> key Seq.t
|
|
val to_seq_values : 'a t -> 'a Seq.t
|
|
val add_seq : 'a t -> (key * 'a) Seq.t -> unit
|
|
val replace_seq : 'a t -> (key * 'a) Seq.t -> unit
|
|
val of_seq : (key * 'a) Seq.t -> 'a t
|
|
end
|
|
|
|
module MakeSeeded(H: SeededHashedType): (SeededS with type key = H.t) =
|
|
struct
|
|
type key = H.t
|
|
type 'a hashtbl = (key, 'a) t
|
|
type 'a t = 'a hashtbl
|
|
let create = create
|
|
let clear = clear
|
|
let reset = reset
|
|
let copy = copy
|
|
|
|
let key_index h key =
|
|
(H.hash h.seed key) land (Array.length h.data - 1)
|
|
|
|
let add h key data =
|
|
let i = key_index h key in
|
|
let bucket = Cons{key; data; next=h.data.(i)} in
|
|
h.data.(i) <- bucket;
|
|
h.size <- h.size + 1;
|
|
if h.size > Array.length h.data lsl 1 then resize key_index h
|
|
|
|
let rec remove_bucket h i key prec = function
|
|
| Empty ->
|
|
()
|
|
| (Cons {key=k; next}) as c ->
|
|
if H.equal k key
|
|
then begin
|
|
h.size <- h.size - 1;
|
|
match prec with
|
|
| Empty -> h.data.(i) <- next
|
|
| Cons c -> c.next <- next
|
|
end
|
|
else remove_bucket h i key c next
|
|
|
|
let remove h key =
|
|
let i = key_index h key in
|
|
remove_bucket h i key Empty h.data.(i)
|
|
|
|
let rec find_rec key = function
|
|
| Empty ->
|
|
raise Not_found
|
|
| Cons{key=k; data; next} ->
|
|
if H.equal key k then data else find_rec key next
|
|
|
|
let find h key =
|
|
match h.data.(key_index h key) with
|
|
| Empty -> raise Not_found
|
|
| Cons{key=k1; data=d1; next=next1} ->
|
|
if H.equal key k1 then d1 else
|
|
match next1 with
|
|
| Empty -> raise Not_found
|
|
| Cons{key=k2; data=d2; next=next2} ->
|
|
if H.equal key k2 then d2 else
|
|
match next2 with
|
|
| Empty -> raise Not_found
|
|
| Cons{key=k3; data=d3; next=next3} ->
|
|
if H.equal key k3 then d3 else find_rec key next3
|
|
|
|
let rec find_rec_opt key = function
|
|
| Empty ->
|
|
None
|
|
| Cons{key=k; data; next} ->
|
|
if H.equal key k then Some data else find_rec_opt key next
|
|
|
|
let find_opt h key =
|
|
match h.data.(key_index h key) with
|
|
| Empty -> None
|
|
| Cons{key=k1; data=d1; next=next1} ->
|
|
if H.equal key k1 then Some d1 else
|
|
match next1 with
|
|
| Empty -> None
|
|
| Cons{key=k2; data=d2; next=next2} ->
|
|
if H.equal key k2 then Some d2 else
|
|
match next2 with
|
|
| Empty -> None
|
|
| Cons{key=k3; data=d3; next=next3} ->
|
|
if H.equal key k3 then Some d3 else find_rec_opt key next3
|
|
|
|
let find_all h key =
|
|
let rec find_in_bucket = function
|
|
| Empty ->
|
|
[]
|
|
| Cons{key=k; data=d; next} ->
|
|
if H.equal k key
|
|
then d :: find_in_bucket next
|
|
else find_in_bucket next in
|
|
find_in_bucket h.data.(key_index h key)
|
|
|
|
let rec replace_bucket key data = function
|
|
| Empty ->
|
|
true
|
|
| Cons ({key=k; next} as slot) ->
|
|
if H.equal k key
|
|
then (slot.key <- key; slot.data <- data; false)
|
|
else replace_bucket key data next
|
|
|
|
let replace h key data =
|
|
let i = key_index h key in
|
|
let l = h.data.(i) in
|
|
if replace_bucket key data l then begin
|
|
h.data.(i) <- Cons{key; data; next=l};
|
|
h.size <- h.size + 1;
|
|
if h.size > Array.length h.data lsl 1 then resize key_index h
|
|
end
|
|
|
|
let mem h key =
|
|
let rec mem_in_bucket = function
|
|
| Empty ->
|
|
false
|
|
| Cons{key=k; next} ->
|
|
H.equal k key || mem_in_bucket next in
|
|
mem_in_bucket h.data.(key_index h key)
|
|
|
|
let add_seq tbl i =
|
|
Seq.iter (fun (k,v) -> add tbl k v) i
|
|
|
|
let replace_seq tbl i =
|
|
Seq.iter (fun (k,v) -> replace tbl k v) i
|
|
|
|
let of_seq i =
|
|
let tbl = create 16 in
|
|
replace_seq tbl i;
|
|
tbl
|
|
|
|
let iter = iter
|
|
let filter_map_inplace = filter_map_inplace
|
|
let fold = fold
|
|
let length = length
|
|
let stats = stats
|
|
let to_seq = to_seq
|
|
let to_seq_keys = to_seq_keys
|
|
let to_seq_values = to_seq_values
|
|
end
|
|
|
|
module Make(H: HashedType): (S with type key = H.t) =
|
|
struct
|
|
include MakeSeeded(struct
|
|
type t = H.t
|
|
let equal = H.equal
|
|
let hash (_seed: int) x = H.hash x
|
|
end)
|
|
let create sz = create ~random:false sz
|
|
let of_seq i =
|
|
let tbl = create 16 in
|
|
replace_seq tbl i;
|
|
tbl
|
|
end
|
|
|
|
(* Polymorphic hash function-based tables *)
|
|
(* Code included below the functorial interface to guard against accidental
|
|
use - see #2202 *)
|
|
|
|
external seeded_hash_param :
|
|
int -> int -> int -> 'a -> int = "caml_hash" [@@noalloc]
|
|
|
|
let hash x = seeded_hash_param 10 100 0 x
|
|
let hash_param n1 n2 x = seeded_hash_param n1 n2 0 x
|
|
let seeded_hash seed x = seeded_hash_param 10 100 seed x
|
|
|
|
let key_index h key =
|
|
if Obj.size (Obj.repr h) >= 4
|
|
then (seeded_hash_param 10 100 h.seed key) land (Array.length h.data - 1)
|
|
else invalid_arg "Hashtbl: unsupported hash table format"
|
|
|
|
let add h key data =
|
|
let i = key_index h key in
|
|
let bucket = Cons{key; data; next=h.data.(i)} in
|
|
h.data.(i) <- bucket;
|
|
h.size <- h.size + 1;
|
|
if h.size > Array.length h.data lsl 1 then resize key_index h
|
|
|
|
let rec remove_bucket h i key prec = function
|
|
| Empty ->
|
|
()
|
|
| (Cons {key=k; next}) as c ->
|
|
if compare k key = 0
|
|
then begin
|
|
h.size <- h.size - 1;
|
|
match prec with
|
|
| Empty -> h.data.(i) <- next
|
|
| Cons c -> c.next <- next
|
|
end
|
|
else remove_bucket h i key c next
|
|
|
|
let remove h key =
|
|
let i = key_index h key in
|
|
remove_bucket h i key Empty h.data.(i)
|
|
|
|
let rec find_rec key = function
|
|
| Empty ->
|
|
raise Not_found
|
|
| Cons{key=k; data; next} ->
|
|
if compare key k = 0 then data else find_rec key next
|
|
|
|
let find h key =
|
|
match h.data.(key_index h key) with
|
|
| Empty -> raise Not_found
|
|
| Cons{key=k1; data=d1; next=next1} ->
|
|
if compare key k1 = 0 then d1 else
|
|
match next1 with
|
|
| Empty -> raise Not_found
|
|
| Cons{key=k2; data=d2; next=next2} ->
|
|
if compare key k2 = 0 then d2 else
|
|
match next2 with
|
|
| Empty -> raise Not_found
|
|
| Cons{key=k3; data=d3; next=next3} ->
|
|
if compare key k3 = 0 then d3 else find_rec key next3
|
|
|
|
let rec find_rec_opt key = function
|
|
| Empty ->
|
|
None
|
|
| Cons{key=k; data; next} ->
|
|
if compare key k = 0 then Some data else find_rec_opt key next
|
|
|
|
let find_opt h key =
|
|
match h.data.(key_index h key) with
|
|
| Empty -> None
|
|
| Cons{key=k1; data=d1; next=next1} ->
|
|
if compare key k1 = 0 then Some d1 else
|
|
match next1 with
|
|
| Empty -> None
|
|
| Cons{key=k2; data=d2; next=next2} ->
|
|
if compare key k2 = 0 then Some d2 else
|
|
match next2 with
|
|
| Empty -> None
|
|
| Cons{key=k3; data=d3; next=next3} ->
|
|
if compare key k3 = 0 then Some d3 else find_rec_opt key next3
|
|
|
|
let find_all h key =
|
|
let rec find_in_bucket = function
|
|
| Empty ->
|
|
[]
|
|
| Cons{key=k; data; next} ->
|
|
if compare k key = 0
|
|
then data :: find_in_bucket next
|
|
else find_in_bucket next in
|
|
find_in_bucket h.data.(key_index h key)
|
|
|
|
let rec replace_bucket key data = function
|
|
| Empty ->
|
|
true
|
|
| Cons ({key=k; next} as slot) ->
|
|
if compare k key = 0
|
|
then (slot.key <- key; slot.data <- data; false)
|
|
else replace_bucket key data next
|
|
|
|
let replace h key data =
|
|
let i = key_index h key in
|
|
let l = h.data.(i) in
|
|
if replace_bucket key data l then begin
|
|
h.data.(i) <- Cons{key; data; next=l};
|
|
h.size <- h.size + 1;
|
|
if h.size > Array.length h.data lsl 1 then resize key_index h
|
|
end
|
|
|
|
let mem h key =
|
|
let rec mem_in_bucket = function
|
|
| Empty ->
|
|
false
|
|
| Cons{key=k; next} ->
|
|
compare k key = 0 || mem_in_bucket next in
|
|
mem_in_bucket h.data.(key_index h key)
|
|
|
|
let add_seq tbl i =
|
|
Seq.iter (fun (k,v) -> add tbl k v) i
|
|
|
|
let replace_seq tbl i =
|
|
Seq.iter (fun (k,v) -> replace tbl k v) i
|
|
|
|
let of_seq i =
|
|
let tbl = create 16 in
|
|
replace_seq tbl i;
|
|
tbl
|
|
|
|
let rebuild ?(random = !randomized) h =
|
|
let s = power_2_above 16 (Array.length h.data) in
|
|
let seed =
|
|
if random then Random.State.bits (Lazy.force prng)
|
|
else if Obj.size (Obj.repr h) >= 4 then h.seed
|
|
else 0 in
|
|
let h' = {
|
|
size = h.size;
|
|
data = Array.make s Empty;
|
|
seed = seed;
|
|
initial_size = if Obj.size (Obj.repr h) >= 4 then h.initial_size else s
|
|
} in
|
|
insert_all_buckets (key_index h') false h.data h'.data;
|
|
h'
|