357 lines
10 KiB
OCaml
357 lines
10 KiB
OCaml
(***********************************************************************)
|
|
(* *)
|
|
(* OCaml *)
|
|
(* *)
|
|
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
(* *)
|
|
(* Copyright 1996 Institut National de Recherche en Informatique et *)
|
|
(* en Automatique. All rights reserved. This file is distributed *)
|
|
(* under the terms of the GNU Library General Public License, with *)
|
|
(* the special exception on linking described in file ../LICENSE. *)
|
|
(* *)
|
|
(***********************************************************************)
|
|
|
|
(* $Id$ *)
|
|
|
|
(* Hash tables *)
|
|
|
|
external seeded_hash_param : int -> int -> int -> 'a -> int = "caml_hash" "noalloc"
|
|
external old_hash_param : int -> int -> 'a -> int = "caml_hash_univ_param" "noalloc"
|
|
|
|
let hash x = seeded_hash_param 10 100 0 x
|
|
let hash_param n1 n2 x = seeded_hash_param n1 n2 0 x
|
|
let seeded_hash seed x = seeded_hash_param 10 100 seed x
|
|
|
|
(* We do dynamic hashing, and resize the table and rehash the elements
|
|
when buckets become too long. *)
|
|
|
|
type ('a, 'b) t =
|
|
{ mutable size: int; (* number of entries *)
|
|
mutable data: ('a, 'b) bucketlist array; (* the buckets *)
|
|
mutable seed: int } (* for randomization *)
|
|
|
|
and ('a, 'b) bucketlist =
|
|
Empty
|
|
| Cons of 'a * 'b * ('a, 'b) bucketlist
|
|
|
|
let rec power_2_above x n =
|
|
if x >= n then x
|
|
else if x * 2 > Sys.max_array_length then x
|
|
else power_2_above (x * 2) n
|
|
|
|
let create ?(seed = 0) initial_size =
|
|
let s = power_2_above 16 initial_size in
|
|
{ size = 0; seed = seed; data = Array.make s Empty }
|
|
|
|
let clear h =
|
|
for i = 0 to Array.length h.data - 1 do
|
|
h.data.(i) <- Empty
|
|
done;
|
|
h.size <- 0
|
|
|
|
let copy h = { h with data = Array.copy h.data }
|
|
|
|
let length h = h.size
|
|
|
|
let resize indexfun h =
|
|
let odata = h.data in
|
|
let osize = Array.length odata in
|
|
let nsize = osize * 2 in
|
|
if nsize < Sys.max_array_length then begin
|
|
let ndata = Array.create nsize Empty in
|
|
h.data <- ndata; (* so that indexfun sees the new bucket count *)
|
|
let rec insert_bucket = function
|
|
Empty -> ()
|
|
| Cons(key, data, rest) ->
|
|
insert_bucket rest; (* preserve original order of elements *)
|
|
let nidx = indexfun h key in
|
|
ndata.(nidx) <- Cons(key, data, ndata.(nidx)) in
|
|
for i = 0 to osize - 1 do
|
|
insert_bucket odata.(i)
|
|
done
|
|
end
|
|
|
|
let key_index h key =
|
|
(* compatibility with old hash tables *)
|
|
if Obj.size (Obj.repr h) = 3
|
|
then (seeded_hash_param 10 100 h.seed key) land (Array.length h.data - 1)
|
|
else (old_hash_param 10 100 key) mod (Array.length h.data)
|
|
|
|
let add h key info =
|
|
let i = key_index h key in
|
|
let bucket = Cons(key, info, h.data.(i)) in
|
|
h.data.(i) <- bucket;
|
|
h.size <- h.size + 1;
|
|
if h.size > Array.length h.data lsl 1 then resize key_index h
|
|
|
|
let remove h key =
|
|
let rec remove_bucket = function
|
|
| Empty ->
|
|
Empty
|
|
| Cons(k, i, next) ->
|
|
if compare k key = 0
|
|
then begin h.size <- h.size - 1; next end
|
|
else Cons(k, i, remove_bucket next) in
|
|
let i = key_index h key in
|
|
h.data.(i) <- remove_bucket h.data.(i)
|
|
|
|
let rec find_rec key = function
|
|
| Empty ->
|
|
raise Not_found
|
|
| Cons(k, d, rest) ->
|
|
if compare key k = 0 then d else find_rec key rest
|
|
|
|
let find h key =
|
|
match h.data.(key_index h key) with
|
|
| Empty -> raise Not_found
|
|
| Cons(k1, d1, rest1) ->
|
|
if compare key k1 = 0 then d1 else
|
|
match rest1 with
|
|
| Empty -> raise Not_found
|
|
| Cons(k2, d2, rest2) ->
|
|
if compare key k2 = 0 then d2 else
|
|
match rest2 with
|
|
| Empty -> raise Not_found
|
|
| Cons(k3, d3, rest3) ->
|
|
if compare key k3 = 0 then d3 else find_rec key rest3
|
|
|
|
let find_all h key =
|
|
let rec find_in_bucket = function
|
|
| Empty ->
|
|
[]
|
|
| Cons(k, d, rest) ->
|
|
if compare k key = 0
|
|
then d :: find_in_bucket rest
|
|
else find_in_bucket rest in
|
|
find_in_bucket h.data.(key_index h key)
|
|
|
|
let replace h key info =
|
|
let rec replace_bucket = function
|
|
| Empty ->
|
|
raise Not_found
|
|
| Cons(k, i, next) ->
|
|
if compare k key = 0
|
|
then Cons(key, info, next)
|
|
else Cons(k, i, replace_bucket next) in
|
|
let i = key_index h key in
|
|
let l = h.data.(i) in
|
|
try
|
|
h.data.(i) <- replace_bucket l
|
|
with Not_found ->
|
|
h.data.(i) <- Cons(key, info, l);
|
|
h.size <- h.size + 1;
|
|
if h.size > Array.length h.data lsl 1 then resize key_index h
|
|
|
|
let mem h key =
|
|
let rec mem_in_bucket = function
|
|
| Empty ->
|
|
false
|
|
| Cons(k, d, rest) ->
|
|
compare k key = 0 || mem_in_bucket rest in
|
|
mem_in_bucket h.data.(key_index h key)
|
|
|
|
let iter f h =
|
|
let rec do_bucket = function
|
|
| Empty ->
|
|
()
|
|
| Cons(k, d, rest) ->
|
|
f k d; do_bucket rest in
|
|
let d = h.data in
|
|
for i = 0 to Array.length d - 1 do
|
|
do_bucket d.(i)
|
|
done
|
|
|
|
let fold f h init =
|
|
let rec do_bucket b accu =
|
|
match b with
|
|
Empty ->
|
|
accu
|
|
| Cons(k, d, rest) ->
|
|
do_bucket rest (f k d accu) in
|
|
let d = h.data in
|
|
let accu = ref init in
|
|
for i = 0 to Array.length d - 1 do
|
|
accu := do_bucket d.(i) !accu
|
|
done;
|
|
!accu
|
|
|
|
type statistics = {
|
|
num_bindings: int;
|
|
num_buckets: int;
|
|
max_bucket_length: int;
|
|
bucket_histogram: int array
|
|
}
|
|
|
|
let rec bucket_length accu = function
|
|
| Empty -> accu
|
|
| Cons(_, _, rest) -> bucket_length (accu + 1) rest
|
|
|
|
let stats h =
|
|
let mbl =
|
|
Array.fold_left (fun m b -> max m (bucket_length 0 b)) 0 h.data in
|
|
let histo = Array.make (mbl + 1) 0 in
|
|
Array.iter
|
|
(fun b ->
|
|
let l = bucket_length 0 b in
|
|
histo.(l) <- histo.(l) + 1)
|
|
h.data;
|
|
{ num_bindings = h.size;
|
|
num_buckets = Array.length h.data;
|
|
max_bucket_length = mbl;
|
|
bucket_histogram = histo }
|
|
|
|
(* Functorial interface *)
|
|
|
|
module type HashedType =
|
|
sig
|
|
type t
|
|
val equal: t -> t -> bool
|
|
val hash: t -> int
|
|
end
|
|
|
|
module type SeededHashedType =
|
|
sig
|
|
type t
|
|
val equal: t -> t -> bool
|
|
val hash: int -> t -> int
|
|
end
|
|
|
|
module type S =
|
|
sig
|
|
type key
|
|
type 'a t
|
|
val create: int -> 'a t
|
|
val clear: 'a t -> unit
|
|
val copy: 'a t -> 'a t
|
|
val add: 'a t -> key -> 'a -> unit
|
|
val remove: 'a t -> key -> unit
|
|
val find: 'a t -> key -> 'a
|
|
val find_all: 'a t -> key -> 'a list
|
|
val replace : 'a t -> key -> 'a -> unit
|
|
val mem : 'a t -> key -> bool
|
|
val iter: (key -> 'a -> unit) -> 'a t -> unit
|
|
val fold: (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b
|
|
val length: 'a t -> int
|
|
val stats: 'a t -> statistics
|
|
end
|
|
|
|
module type SeededS =
|
|
sig
|
|
type key
|
|
type 'a t
|
|
val create : ?seed:int -> int -> 'a t
|
|
val clear : 'a t -> unit
|
|
val copy : 'a t -> 'a t
|
|
val add : 'a t -> key -> 'a -> unit
|
|
val remove : 'a t -> key -> unit
|
|
val find : 'a t -> key -> 'a
|
|
val find_all : 'a t -> key -> 'a list
|
|
val replace : 'a t -> key -> 'a -> unit
|
|
val mem : 'a t -> key -> bool
|
|
val iter : (key -> 'a -> unit) -> 'a t -> unit
|
|
val fold : (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b
|
|
val length : 'a t -> int
|
|
val stats: 'a t -> statistics
|
|
end
|
|
|
|
module MakeSeeded(H: SeededHashedType): (SeededS with type key = H.t) =
|
|
struct
|
|
type key = H.t
|
|
type 'a hashtbl = (key, 'a) t
|
|
type 'a t = 'a hashtbl
|
|
let create = create
|
|
let clear = clear
|
|
let copy = copy
|
|
|
|
let key_index h key =
|
|
(H.hash h.seed key) land (Array.length h.data - 1)
|
|
|
|
let add h key info =
|
|
let i = key_index h key in
|
|
let bucket = Cons(key, info, h.data.(i)) in
|
|
h.data.(i) <- bucket;
|
|
h.size <- h.size + 1;
|
|
if h.size > Array.length h.data lsl 1 then resize key_index h
|
|
|
|
let remove h key =
|
|
let rec remove_bucket = function
|
|
| Empty ->
|
|
Empty
|
|
| Cons(k, i, next) ->
|
|
if H.equal k key
|
|
then begin h.size <- h.size - 1; next end
|
|
else Cons(k, i, remove_bucket next) in
|
|
let i = key_index h key in
|
|
h.data.(i) <- remove_bucket h.data.(i)
|
|
|
|
let rec find_rec key = function
|
|
| Empty ->
|
|
raise Not_found
|
|
| Cons(k, d, rest) ->
|
|
if H.equal key k then d else find_rec key rest
|
|
|
|
let find h key =
|
|
match h.data.(key_index h key) with
|
|
| Empty -> raise Not_found
|
|
| Cons(k1, d1, rest1) ->
|
|
if H.equal key k1 then d1 else
|
|
match rest1 with
|
|
| Empty -> raise Not_found
|
|
| Cons(k2, d2, rest2) ->
|
|
if H.equal key k2 then d2 else
|
|
match rest2 with
|
|
| Empty -> raise Not_found
|
|
| Cons(k3, d3, rest3) ->
|
|
if H.equal key k3 then d3 else find_rec key rest3
|
|
|
|
let find_all h key =
|
|
let rec find_in_bucket = function
|
|
| Empty ->
|
|
[]
|
|
| Cons(k, d, rest) ->
|
|
if H.equal k key
|
|
then d :: find_in_bucket rest
|
|
else find_in_bucket rest in
|
|
find_in_bucket h.data.(key_index h key)
|
|
|
|
let replace h key info =
|
|
let rec replace_bucket = function
|
|
| Empty ->
|
|
raise Not_found
|
|
| Cons(k, i, next) ->
|
|
if H.equal k key
|
|
then Cons(key, info, next)
|
|
else Cons(k, i, replace_bucket next) in
|
|
let i = key_index h key in
|
|
let l = h.data.(i) in
|
|
try
|
|
h.data.(i) <- replace_bucket l
|
|
with Not_found ->
|
|
h.data.(i) <- Cons(key, info, l);
|
|
h.size <- h.size + 1;
|
|
if h.size > Array.length h.data lsl 1 then resize key_index h
|
|
|
|
let mem h key =
|
|
let rec mem_in_bucket = function
|
|
| Empty ->
|
|
false
|
|
| Cons(k, d, rest) ->
|
|
H.equal k key || mem_in_bucket rest in
|
|
mem_in_bucket h.data.(key_index h key)
|
|
|
|
let iter = iter
|
|
let fold = fold
|
|
let length = length
|
|
let stats = stats
|
|
end
|
|
|
|
module Make(H: HashedType): (S with type key = H.t) =
|
|
struct
|
|
include MakeSeeded(struct
|
|
type t = H.t
|
|
let equal = H.equal
|
|
let hash (seed: int) x = H.hash x
|
|
end)
|
|
let create sz = create ~seed:0 sz
|
|
end
|