ocaml/stdlib/hashtbl.ml

631 lines
18 KiB
OCaml

(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
(* Hash tables *)
(* We do dynamic hashing, and resize the table and rehash the elements
when buckets become too long. *)
type ('a, 'b) t =
{ mutable size: int; (* number of entries *)
mutable data: ('a, 'b) bucketlist array; (* the buckets *)
mutable seed: int; (* for randomization *)
mutable initial_size: int; (* initial array size *)
}
and ('a, 'b) bucketlist =
Empty
| Cons of { mutable key: 'a;
mutable data: 'b;
mutable next: ('a, 'b) bucketlist }
(* The sign of initial_size encodes the fact that a traversal is
ongoing or not.
This disables the efficient in place implementation of resizing.
*)
let ongoing_traversal h =
Obj.size (Obj.repr h) < 4 (* compatibility with old hash tables *)
|| h.initial_size < 0
let flip_ongoing_traversal h =
h.initial_size <- - h.initial_size
(* To pick random seeds if requested *)
let randomized_default =
let params =
try Sys.getenv "OCAMLRUNPARAM" with Not_found ->
try Sys.getenv "CAMLRUNPARAM" with Not_found -> "" in
String.contains params 'R'
let randomized = ref randomized_default
let randomize () = randomized := true
let is_randomized () = !randomized
let prng = lazy (Random.State.make_self_init())
(* Functions which appear before the functorial interface must either be
independent of the hash function or take it as a parameter (see #2202 and
code below the functor definitions. *)
(* Creating a fresh, empty table *)
let rec power_2_above x n =
if x >= n then x
else if x * 2 > Sys.max_array_length then x
else power_2_above (x * 2) n
let create ?(random = !randomized) initial_size =
let s = power_2_above 16 initial_size in
let seed = if random then Random.State.bits (Lazy.force prng) else 0 in
{ initial_size = s; size = 0; seed = seed; data = Array.make s Empty }
let clear h =
if h.size > 0 then begin
h.size <- 0;
Array.fill h.data 0 (Array.length h.data) Empty
end
let reset h =
let len = Array.length h.data in
if Obj.size (Obj.repr h) < 4 (* compatibility with old hash tables *)
|| len = abs h.initial_size then
clear h
else begin
h.size <- 0;
h.data <- Array.make (abs h.initial_size) Empty
end
let copy_bucketlist = function
| Empty -> Empty
| Cons {key; data; next} ->
let rec loop prec = function
| Empty -> ()
| Cons {key; data; next} ->
let r = Cons {key; data; next} in
begin match prec with
| Empty -> assert false
| Cons prec -> prec.next <- r
end;
loop r next
in
let r = Cons {key; data; next} in
loop r next;
r
let copy h = { h with data = Array.map copy_bucketlist h.data }
let length h = h.size
let insert_all_buckets indexfun inplace odata ndata =
let nsize = Array.length ndata in
let ndata_tail = Array.make nsize Empty in
let rec insert_bucket = function
| Empty -> ()
| Cons {key; data; next} as cell ->
let cell =
if inplace then cell
else Cons {key; data; next = Empty}
in
let nidx = indexfun key in
begin match ndata_tail.(nidx) with
| Empty -> ndata.(nidx) <- cell;
| Cons tail -> tail.next <- cell;
end;
ndata_tail.(nidx) <- cell;
insert_bucket next
in
for i = 0 to Array.length odata - 1 do
insert_bucket odata.(i)
done;
if inplace then
for i = 0 to nsize - 1 do
match ndata_tail.(i) with
| Empty -> ()
| Cons tail -> tail.next <- Empty
done
let resize indexfun h =
let odata = h.data in
let osize = Array.length odata in
let nsize = osize * 2 in
if nsize < Sys.max_array_length then begin
let ndata = Array.make nsize Empty in
let inplace = not (ongoing_traversal h) in
h.data <- ndata; (* so that indexfun sees the new bucket count *)
insert_all_buckets (indexfun h) inplace odata ndata
end
let iter f h =
let rec do_bucket = function
| Empty ->
()
| Cons{key; data; next} ->
f key data; do_bucket next in
let old_trav = ongoing_traversal h in
if not old_trav then flip_ongoing_traversal h;
try
let d = h.data in
for i = 0 to Array.length d - 1 do
do_bucket d.(i)
done;
if not old_trav then flip_ongoing_traversal h;
with exn when not old_trav ->
flip_ongoing_traversal h;
raise exn
let rec filter_map_inplace_bucket f h i prec = function
| Empty ->
begin match prec with
| Empty -> h.data.(i) <- Empty
| Cons c -> c.next <- Empty
end
| (Cons ({key; data; next} as c)) as slot ->
begin match f key data with
| None ->
h.size <- h.size - 1;
filter_map_inplace_bucket f h i prec next
| Some data ->
begin match prec with
| Empty -> h.data.(i) <- slot
| Cons c -> c.next <- slot
end;
c.data <- data;
filter_map_inplace_bucket f h i slot next
end
let filter_map_inplace f h =
let d = h.data in
let old_trav = ongoing_traversal h in
if not old_trav then flip_ongoing_traversal h;
try
for i = 0 to Array.length d - 1 do
filter_map_inplace_bucket f h i Empty h.data.(i)
done;
if not old_trav then flip_ongoing_traversal h
with exn when not old_trav ->
flip_ongoing_traversal h;
raise exn
let fold f h init =
let rec do_bucket b accu =
match b with
Empty ->
accu
| Cons{key; data; next} ->
do_bucket next (f key data accu) in
let old_trav = ongoing_traversal h in
if not old_trav then flip_ongoing_traversal h;
try
let d = h.data in
let accu = ref init in
for i = 0 to Array.length d - 1 do
accu := do_bucket d.(i) !accu
done;
if not old_trav then flip_ongoing_traversal h;
!accu
with exn when not old_trav ->
flip_ongoing_traversal h;
raise exn
type statistics = {
num_bindings: int;
num_buckets: int;
max_bucket_length: int;
bucket_histogram: int array
}
let rec bucket_length accu = function
| Empty -> accu
| Cons{next} -> bucket_length (accu + 1) next
let stats h =
let mbl =
Array.fold_left (fun m b -> max m (bucket_length 0 b)) 0 h.data in
let histo = Array.make (mbl + 1) 0 in
Array.iter
(fun b ->
let l = bucket_length 0 b in
histo.(l) <- histo.(l) + 1)
h.data;
{ num_bindings = h.size;
num_buckets = Array.length h.data;
max_bucket_length = mbl;
bucket_histogram = histo }
(** {1 Iterators} *)
let to_seq tbl =
(* capture current array, so that even if the table is resized we
keep iterating on the same array *)
let tbl_data = tbl.data in
(* state: index * next bucket to traverse *)
let rec aux i buck () = match buck with
| Empty ->
if i = Array.length tbl_data
then Seq.Nil
else aux(i+1) tbl_data.(i) ()
| Cons {key; data; next} ->
Seq.Cons ((key, data), aux i next)
in
aux 0 Empty
let to_seq_keys m = Seq.map fst (to_seq m)
let to_seq_values m = Seq.map snd (to_seq m)
(* Functorial interface *)
module type HashedType =
sig
type t
val equal: t -> t -> bool
val hash: t -> int
end
module type SeededHashedType =
sig
type t
val equal: t -> t -> bool
val hash: int -> t -> int
end
module type S =
sig
type key
type !'a t
val create: int -> 'a t
val clear : 'a t -> unit
val reset : 'a t -> unit
val copy: 'a t -> 'a t
val add: 'a t -> key -> 'a -> unit
val remove: 'a t -> key -> unit
val find: 'a t -> key -> 'a
val find_opt: 'a t -> key -> 'a option
val find_all: 'a t -> key -> 'a list
val replace : 'a t -> key -> 'a -> unit
val mem : 'a t -> key -> bool
val iter: (key -> 'a -> unit) -> 'a t -> unit
val filter_map_inplace: (key -> 'a -> 'a option) -> 'a t -> unit
val fold: (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b
val length: 'a t -> int
val stats: 'a t -> statistics
val to_seq : 'a t -> (key * 'a) Seq.t
val to_seq_keys : _ t -> key Seq.t
val to_seq_values : 'a t -> 'a Seq.t
val add_seq : 'a t -> (key * 'a) Seq.t -> unit
val replace_seq : 'a t -> (key * 'a) Seq.t -> unit
val of_seq : (key * 'a) Seq.t -> 'a t
end
module type SeededS =
sig
type key
type !'a t
val create : ?random:bool -> int -> 'a t
val clear : 'a t -> unit
val reset : 'a t -> unit
val copy : 'a t -> 'a t
val add : 'a t -> key -> 'a -> unit
val remove : 'a t -> key -> unit
val find : 'a t -> key -> 'a
val find_opt: 'a t -> key -> 'a option
val find_all : 'a t -> key -> 'a list
val replace : 'a t -> key -> 'a -> unit
val mem : 'a t -> key -> bool
val iter : (key -> 'a -> unit) -> 'a t -> unit
val filter_map_inplace: (key -> 'a -> 'a option) -> 'a t -> unit
val fold : (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b
val length : 'a t -> int
val stats: 'a t -> statistics
val to_seq : 'a t -> (key * 'a) Seq.t
val to_seq_keys : _ t -> key Seq.t
val to_seq_values : 'a t -> 'a Seq.t
val add_seq : 'a t -> (key * 'a) Seq.t -> unit
val replace_seq : 'a t -> (key * 'a) Seq.t -> unit
val of_seq : (key * 'a) Seq.t -> 'a t
end
module MakeSeeded(H: SeededHashedType): (SeededS with type key = H.t) =
struct
type key = H.t
type 'a hashtbl = (key, 'a) t
type 'a t = 'a hashtbl
let create = create
let clear = clear
let reset = reset
let copy = copy
let key_index h key =
(H.hash h.seed key) land (Array.length h.data - 1)
let add h key data =
let i = key_index h key in
let bucket = Cons{key; data; next=h.data.(i)} in
h.data.(i) <- bucket;
h.size <- h.size + 1;
if h.size > Array.length h.data lsl 1 then resize key_index h
let rec remove_bucket h i key prec = function
| Empty ->
()
| (Cons {key=k; next}) as c ->
if H.equal k key
then begin
h.size <- h.size - 1;
match prec with
| Empty -> h.data.(i) <- next
| Cons c -> c.next <- next
end
else remove_bucket h i key c next
let remove h key =
let i = key_index h key in
remove_bucket h i key Empty h.data.(i)
let rec find_rec key = function
| Empty ->
raise Not_found
| Cons{key=k; data; next} ->
if H.equal key k then data else find_rec key next
let find h key =
match h.data.(key_index h key) with
| Empty -> raise Not_found
| Cons{key=k1; data=d1; next=next1} ->
if H.equal key k1 then d1 else
match next1 with
| Empty -> raise Not_found
| Cons{key=k2; data=d2; next=next2} ->
if H.equal key k2 then d2 else
match next2 with
| Empty -> raise Not_found
| Cons{key=k3; data=d3; next=next3} ->
if H.equal key k3 then d3 else find_rec key next3
let rec find_rec_opt key = function
| Empty ->
None
| Cons{key=k; data; next} ->
if H.equal key k then Some data else find_rec_opt key next
let find_opt h key =
match h.data.(key_index h key) with
| Empty -> None
| Cons{key=k1; data=d1; next=next1} ->
if H.equal key k1 then Some d1 else
match next1 with
| Empty -> None
| Cons{key=k2; data=d2; next=next2} ->
if H.equal key k2 then Some d2 else
match next2 with
| Empty -> None
| Cons{key=k3; data=d3; next=next3} ->
if H.equal key k3 then Some d3 else find_rec_opt key next3
let find_all h key =
let rec find_in_bucket = function
| Empty ->
[]
| Cons{key=k; data=d; next} ->
if H.equal k key
then d :: find_in_bucket next
else find_in_bucket next in
find_in_bucket h.data.(key_index h key)
let rec replace_bucket key data = function
| Empty ->
true
| Cons ({key=k; next} as slot) ->
if H.equal k key
then (slot.key <- key; slot.data <- data; false)
else replace_bucket key data next
let replace h key data =
let i = key_index h key in
let l = h.data.(i) in
if replace_bucket key data l then begin
h.data.(i) <- Cons{key; data; next=l};
h.size <- h.size + 1;
if h.size > Array.length h.data lsl 1 then resize key_index h
end
let mem h key =
let rec mem_in_bucket = function
| Empty ->
false
| Cons{key=k; next} ->
H.equal k key || mem_in_bucket next in
mem_in_bucket h.data.(key_index h key)
let add_seq tbl i =
Seq.iter (fun (k,v) -> add tbl k v) i
let replace_seq tbl i =
Seq.iter (fun (k,v) -> replace tbl k v) i
let of_seq i =
let tbl = create 16 in
replace_seq tbl i;
tbl
let iter = iter
let filter_map_inplace = filter_map_inplace
let fold = fold
let length = length
let stats = stats
let to_seq = to_seq
let to_seq_keys = to_seq_keys
let to_seq_values = to_seq_values
end
module Make(H: HashedType): (S with type key = H.t) =
struct
include MakeSeeded(struct
type t = H.t
let equal = H.equal
let hash (_seed: int) x = H.hash x
end)
let create sz = create ~random:false sz
let of_seq i =
let tbl = create 16 in
replace_seq tbl i;
tbl
end
(* Polymorphic hash function-based tables *)
(* Code included below the functorial interface to guard against accidental
use - see #2202 *)
external seeded_hash_param :
int -> int -> int -> 'a -> int = "caml_hash" [@@noalloc]
let hash x = seeded_hash_param 10 100 0 x
let hash_param n1 n2 x = seeded_hash_param n1 n2 0 x
let seeded_hash seed x = seeded_hash_param 10 100 seed x
let key_index h key =
if Obj.size (Obj.repr h) >= 4
then (seeded_hash_param 10 100 h.seed key) land (Array.length h.data - 1)
else invalid_arg "Hashtbl: unsupported hash table format"
let add h key data =
let i = key_index h key in
let bucket = Cons{key; data; next=h.data.(i)} in
h.data.(i) <- bucket;
h.size <- h.size + 1;
if h.size > Array.length h.data lsl 1 then resize key_index h
let rec remove_bucket h i key prec = function
| Empty ->
()
| (Cons {key=k; next}) as c ->
if compare k key = 0
then begin
h.size <- h.size - 1;
match prec with
| Empty -> h.data.(i) <- next
| Cons c -> c.next <- next
end
else remove_bucket h i key c next
let remove h key =
let i = key_index h key in
remove_bucket h i key Empty h.data.(i)
let rec find_rec key = function
| Empty ->
raise Not_found
| Cons{key=k; data; next} ->
if compare key k = 0 then data else find_rec key next
let find h key =
match h.data.(key_index h key) with
| Empty -> raise Not_found
| Cons{key=k1; data=d1; next=next1} ->
if compare key k1 = 0 then d1 else
match next1 with
| Empty -> raise Not_found
| Cons{key=k2; data=d2; next=next2} ->
if compare key k2 = 0 then d2 else
match next2 with
| Empty -> raise Not_found
| Cons{key=k3; data=d3; next=next3} ->
if compare key k3 = 0 then d3 else find_rec key next3
let rec find_rec_opt key = function
| Empty ->
None
| Cons{key=k; data; next} ->
if compare key k = 0 then Some data else find_rec_opt key next
let find_opt h key =
match h.data.(key_index h key) with
| Empty -> None
| Cons{key=k1; data=d1; next=next1} ->
if compare key k1 = 0 then Some d1 else
match next1 with
| Empty -> None
| Cons{key=k2; data=d2; next=next2} ->
if compare key k2 = 0 then Some d2 else
match next2 with
| Empty -> None
| Cons{key=k3; data=d3; next=next3} ->
if compare key k3 = 0 then Some d3 else find_rec_opt key next3
let find_all h key =
let rec find_in_bucket = function
| Empty ->
[]
| Cons{key=k; data; next} ->
if compare k key = 0
then data :: find_in_bucket next
else find_in_bucket next in
find_in_bucket h.data.(key_index h key)
let rec replace_bucket key data = function
| Empty ->
true
| Cons ({key=k; next} as slot) ->
if compare k key = 0
then (slot.key <- key; slot.data <- data; false)
else replace_bucket key data next
let replace h key data =
let i = key_index h key in
let l = h.data.(i) in
if replace_bucket key data l then begin
h.data.(i) <- Cons{key; data; next=l};
h.size <- h.size + 1;
if h.size > Array.length h.data lsl 1 then resize key_index h
end
let mem h key =
let rec mem_in_bucket = function
| Empty ->
false
| Cons{key=k; next} ->
compare k key = 0 || mem_in_bucket next in
mem_in_bucket h.data.(key_index h key)
let add_seq tbl i =
Seq.iter (fun (k,v) -> add tbl k v) i
let replace_seq tbl i =
Seq.iter (fun (k,v) -> replace tbl k v) i
let of_seq i =
let tbl = create 16 in
replace_seq tbl i;
tbl
let rebuild ?(random = !randomized) h =
let s = power_2_above 16 (Array.length h.data) in
let seed =
if random then Random.State.bits (Lazy.force prng)
else if Obj.size (Obj.repr h) >= 4 then h.seed
else 0 in
let h' = {
size = h.size;
data = Array.make s Empty;
seed = seed;
initial_size = if Obj.size (Obj.repr h) >= 4 then h.initial_size else s
} in
insert_all_buckets (key_index h') false h.data h'.data;
h'