315 lines
8.7 KiB
OCaml
315 lines
8.7 KiB
OCaml
(***********************************************************************)
|
|
(* *)
|
|
(* OCaml *)
|
|
(* *)
|
|
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
(* *)
|
|
(* Copyright 1996 Institut National de Recherche en Informatique et *)
|
|
(* en Automatique. All rights reserved. This file is distributed *)
|
|
(* under the terms of the Q Public License version 1.0. *)
|
|
(* *)
|
|
(***********************************************************************)
|
|
|
|
(* Errors *)
|
|
|
|
exception Fatal_error
|
|
|
|
let fatal_error msg =
|
|
prerr_string ">> Fatal error: "; prerr_endline msg; raise Fatal_error
|
|
|
|
(* Exceptions *)
|
|
|
|
let try_finally work cleanup =
|
|
let result = (try work () with e -> cleanup (); raise e) in
|
|
cleanup ();
|
|
result
|
|
;;
|
|
|
|
(* List functions *)
|
|
|
|
let rec map_end f l1 l2 =
|
|
match l1 with
|
|
[] -> l2
|
|
| hd::tl -> f hd :: map_end f tl l2
|
|
|
|
let rec map_left_right f = function
|
|
[] -> []
|
|
| hd::tl -> let res = f hd in res :: map_left_right f tl
|
|
|
|
let rec for_all2 pred l1 l2 =
|
|
match (l1, l2) with
|
|
([], []) -> true
|
|
| (hd1::tl1, hd2::tl2) -> pred hd1 hd2 && for_all2 pred tl1 tl2
|
|
| (_, _) -> false
|
|
|
|
let rec replicate_list elem n =
|
|
if n <= 0 then [] else elem :: replicate_list elem (n-1)
|
|
|
|
let rec list_remove x = function
|
|
[] -> []
|
|
| hd :: tl ->
|
|
if hd = x then tl else hd :: list_remove x tl
|
|
|
|
let rec split_last = function
|
|
[] -> assert false
|
|
| [x] -> ([], x)
|
|
| hd :: tl ->
|
|
let (lst, last) = split_last tl in
|
|
(hd :: lst, last)
|
|
|
|
let rec samelist pred l1 l2 =
|
|
match (l1, l2) with
|
|
| ([], []) -> true
|
|
| (hd1 :: tl1, hd2 :: tl2) -> pred hd1 hd2 && samelist pred tl1 tl2
|
|
| (_, _) -> false
|
|
|
|
(* Options *)
|
|
|
|
let may f = function
|
|
Some x -> f x
|
|
| None -> ()
|
|
|
|
let may_map f = function
|
|
Some x -> Some (f x)
|
|
| None -> None
|
|
|
|
(* File functions *)
|
|
|
|
let find_in_path path name =
|
|
if not (Filename.is_implicit name) then
|
|
if Sys.file_exists name then name else raise Not_found
|
|
else begin
|
|
let rec try_dir = function
|
|
[] -> raise Not_found
|
|
| dir::rem ->
|
|
let fullname = Filename.concat dir name in
|
|
if Sys.file_exists fullname then fullname else try_dir rem
|
|
in try_dir path
|
|
end
|
|
|
|
let find_in_path_uncap path name =
|
|
let uname = String.uncapitalize name in
|
|
let rec try_dir = function
|
|
[] -> raise Not_found
|
|
| dir::rem ->
|
|
let fullname = Filename.concat dir name
|
|
and ufullname = Filename.concat dir uname in
|
|
if Sys.file_exists ufullname then ufullname
|
|
else if Sys.file_exists fullname then fullname
|
|
else try_dir rem
|
|
in try_dir path
|
|
|
|
let remove_file filename =
|
|
try
|
|
Sys.remove filename
|
|
with Sys_error msg ->
|
|
()
|
|
|
|
(* Expand a -I option: if it starts with +, make it relative to the standard
|
|
library directory *)
|
|
|
|
let expand_directory alt s =
|
|
if String.length s > 0 && s.[0] = '+'
|
|
then Filename.concat alt
|
|
(String.sub s 1 (String.length s - 1))
|
|
else s
|
|
|
|
(* Hashtable functions *)
|
|
|
|
let create_hashtable size init =
|
|
let tbl = Hashtbl.create size in
|
|
List.iter (fun (key, data) -> Hashtbl.add tbl key data) init;
|
|
tbl
|
|
|
|
(* File copy *)
|
|
|
|
let copy_file ic oc =
|
|
let buff = String.create 0x1000 in
|
|
let rec copy () =
|
|
let n = input ic buff 0 0x1000 in
|
|
if n = 0 then () else (output oc buff 0 n; copy())
|
|
in copy()
|
|
|
|
let copy_file_chunk ic oc len =
|
|
let buff = String.create 0x1000 in
|
|
let rec copy n =
|
|
if n <= 0 then () else begin
|
|
let r = input ic buff 0 (min n 0x1000) in
|
|
if r = 0 then raise End_of_file else (output oc buff 0 r; copy(n-r))
|
|
end
|
|
in copy len
|
|
|
|
let string_of_file ic =
|
|
let b = Buffer.create 0x10000 in
|
|
let buff = String.create 0x1000 in
|
|
let rec copy () =
|
|
let n = input ic buff 0 0x1000 in
|
|
if n = 0 then Buffer.contents b else
|
|
(Buffer.add_substring b buff 0 n; copy())
|
|
in copy()
|
|
|
|
|
|
|
|
(* Reading from a channel *)
|
|
|
|
let input_bytes ic n =
|
|
let result = String.create n in
|
|
really_input ic result 0 n;
|
|
result
|
|
;;
|
|
|
|
(* Integer operations *)
|
|
|
|
let rec log2 n =
|
|
if n <= 1 then 0 else 1 + log2(n asr 1)
|
|
|
|
let align n a =
|
|
if n >= 0 then (n + a - 1) land (-a) else n land (-a)
|
|
|
|
let no_overflow_add a b = (a lxor b) lor (a lxor (lnot (a+b))) < 0
|
|
|
|
let no_overflow_sub a b = (a lxor (lnot b)) lor (b lxor (a-b)) < 0
|
|
|
|
let no_overflow_lsl a = min_int asr 1 <= a && a <= max_int asr 1
|
|
|
|
(* String operations *)
|
|
|
|
let chop_extension_if_any fname =
|
|
try Filename.chop_extension fname with Invalid_argument _ -> fname
|
|
|
|
let chop_extensions file =
|
|
let dirname = Filename.dirname file and basename = Filename.basename file in
|
|
try
|
|
let pos = String.index basename '.' in
|
|
let basename = String.sub basename 0 pos in
|
|
if Filename.is_implicit file && dirname = Filename.current_dir_name then
|
|
basename
|
|
else
|
|
Filename.concat dirname basename
|
|
with Not_found -> file
|
|
|
|
let search_substring pat str start =
|
|
let rec search i j =
|
|
if j >= String.length pat then i
|
|
else if i + j >= String.length str then raise Not_found
|
|
else if str.[i + j] = pat.[j] then search i (j+1)
|
|
else search (i+1) 0
|
|
in search start 0
|
|
|
|
let rev_split_words s =
|
|
let rec split1 res i =
|
|
if i >= String.length s then res else begin
|
|
match s.[i] with
|
|
' ' | '\t' | '\r' | '\n' -> split1 res (i+1)
|
|
| _ -> split2 res i (i+1)
|
|
end
|
|
and split2 res i j =
|
|
if j >= String.length s then String.sub s i (j-i) :: res else begin
|
|
match s.[j] with
|
|
' ' | '\t' | '\r' | '\n' -> split1 (String.sub s i (j-i) :: res) (j+1)
|
|
| _ -> split2 res i (j+1)
|
|
end
|
|
in split1 [] 0
|
|
|
|
let get_ref r =
|
|
let v = !r in
|
|
r := []; v
|
|
|
|
let fst3 (x, _, _) = x
|
|
let snd3 (_,x,_) = x
|
|
let thd3 (_,_,x) = x
|
|
|
|
let fst4 (x, _, _, _) = x
|
|
let snd4 (_,x,_, _) = x
|
|
let thd4 (_,_,x,_) = x
|
|
|
|
|
|
module LongString = struct
|
|
type t = string array
|
|
|
|
let create str_size =
|
|
let tbl_size = str_size / Sys.max_string_length + 1 in
|
|
let tbl = Array.make tbl_size "" in
|
|
for i = 0 to tbl_size - 2 do
|
|
tbl.(i) <- String.create Sys.max_string_length;
|
|
done;
|
|
tbl.(tbl_size - 1) <- String.create (str_size mod Sys.max_string_length);
|
|
tbl
|
|
|
|
let length tbl =
|
|
let tbl_size = Array.length tbl in
|
|
Sys.max_string_length * (tbl_size - 1) + String.length tbl.(tbl_size - 1)
|
|
|
|
let get tbl ind =
|
|
tbl.(ind / Sys.max_string_length).[ind mod Sys.max_string_length]
|
|
|
|
let set tbl ind c =
|
|
tbl.(ind / Sys.max_string_length).[ind mod Sys.max_string_length] <- c
|
|
|
|
let blit src srcoff dst dstoff len =
|
|
for i = 0 to len - 1 do
|
|
set dst (dstoff + i) (get src (srcoff + i))
|
|
done
|
|
|
|
let output oc tbl pos len =
|
|
for i = pos to pos + len - 1 do
|
|
output_char oc (get tbl i)
|
|
done
|
|
|
|
let unsafe_blit_to_string src srcoff dst dstoff len =
|
|
for i = 0 to len - 1 do
|
|
String.unsafe_set dst (dstoff + i) (get src (srcoff + i))
|
|
done
|
|
|
|
let input_bytes ic len =
|
|
let tbl = create len in
|
|
Array.iter (fun str -> really_input ic str 0 (String.length str)) tbl;
|
|
tbl
|
|
end
|
|
|
|
|
|
let edit_distance a b cutoff =
|
|
let la, lb = String.length a, String.length b in
|
|
let cutoff =
|
|
(* using max_int for cutoff would cause overflows in (i + cutoff + 1);
|
|
we bring it back to the (max la lb) worstcase *)
|
|
min (max la lb) cutoff in
|
|
if abs (la - lb) > cutoff then None
|
|
else begin
|
|
(* initialize with 'cutoff + 1' so that not-yet-written-to cases have
|
|
the worst possible cost; this is useful when computing the cost of
|
|
a case just at the boundary of the cutoff diagonal. *)
|
|
let m = Array.make_matrix (la + 1) (lb + 1) (cutoff + 1) in
|
|
m.(0).(0) <- 0;
|
|
for i = 1 to la do
|
|
m.(i).(0) <- i;
|
|
done;
|
|
for j = 1 to lb do
|
|
m.(0).(j) <- j;
|
|
done;
|
|
for i = 1 to la do
|
|
for j = max 1 (i - cutoff - 1) to min lb (i + cutoff + 1) do
|
|
let cost = if a.[i-1] = b.[j-1] then 0 else 1 in
|
|
let best =
|
|
(* insert, delete or substitute *)
|
|
min (1 + min m.(i-1).(j) m.(i).(j-1)) (m.(i-1).(j-1) + cost)
|
|
in
|
|
let best =
|
|
(* swap two adjacent letters; we use "cost" again in case of
|
|
a swap between two identical letters; this is slightly
|
|
redundant as this is a double-substitution case, but it
|
|
was done this way in most online implementations and
|
|
imitation has its virtues *)
|
|
if not (i > 1 && j > 1 && a.[i-1] = b.[j-2] && a.[i-2] = b.[j-1])
|
|
then best
|
|
else min best (m.(i-2).(j-2) + cost)
|
|
in
|
|
m.(i).(j) <- best
|
|
done;
|
|
done;
|
|
let result = m.(la).(lb) in
|
|
if result > cutoff
|
|
then None
|
|
else Some result
|
|
end
|