ocaml/asmcomp/strmatch.ml

398 lines
11 KiB
OCaml

(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
(* Translation of string matching from closed lambda to C-- *)
open Lambda
open Cmm
module V = Backend_var
module VP = Backend_var.With_provenance
module type I = sig
val string_block_length : Cmm.expression -> Cmm.expression
val transl_switch :
Debuginfo.t -> Cmm.expression -> int -> int ->
(int * Cmm.expression) list -> Cmm.expression ->
Cmm.expression
end
module Make(I:I) = struct
(* Debug *)
let dbg = false
let mask =
let open Nativeint in
sub (shift_left one 8) one
let pat_as_string p =
let rec digits k n p =
if n <= 0 then k
else
let d = Nativeint.to_int (Nativeint.logand mask p) in
let d = Char.escaped (Char.chr d) in
digits (d::k) (n-1) (Nativeint.shift_right_logical p 8) in
let ds = digits [] Arch.size_addr p in
let ds =
if Arch.big_endian then ds else List.rev ds in
String.concat "" ds
let do_pp_cases chan cases =
List.iter
(fun (ps,_) ->
Printf.fprintf chan " [%s]\n"
(String.concat "; " (List.map pat_as_string ps)))
cases
let pp_cases chan tag cases =
Printf.eprintf "%s:\n" tag ;
do_pp_cases chan cases
let pp_match chan tag idxs cases =
Printf.eprintf
"%s: idx=[%s]\n" tag
(String.concat "; " (List.map Int.to_string idxs)) ;
do_pp_cases chan cases
(* Utilities *)
let gen_cell_id () = V.create_local "cell"
let gen_size_id () = V.create_local "size"
let mk_let_cell id str ind body =
let dbg = Debuginfo.none in
let cell =
Cop(Cload (Word_int, Asttypes.Mutable),
[Cop(Cadda,[str;Cconst_int(Arch.size_int*ind, dbg)], dbg)],
dbg) in
Clet(id, cell, body)
let mk_let_size id str body =
let size = I.string_block_length str in
Clet(id, size, body)
let mk_cmp_gen cmp_op id nat ifso ifnot =
let dbg = Debuginfo.none in
let test =
Cop (Ccmpi cmp_op, [ Cvar id; Cconst_natint (nat, dbg) ], dbg)
in
Cifthenelse (test, dbg, ifso, dbg, ifnot, dbg)
let mk_lt = mk_cmp_gen Clt
let mk_eq = mk_cmp_gen Ceq
module IntArg =
struct
type t = int
let compare (x:int) (y:int) =
if x < y then -1
else if x > y then 1
else 0
end
let interval m0 n =
let rec do_rec m =
if m >= n then []
else m::do_rec (m+1) in
do_rec m0
(*****************************************************)
(* Compile strings to a lists of words [native ints] *)
(*****************************************************)
let pat_of_string str =
let len = String.length str in
let n = len / Arch.size_addr + 1 in
let get_byte i =
if i < len then int_of_char str.[i]
else if i < n * Arch.size_addr - 1 then 0
else n * Arch.size_addr - 1 - len in
let mk_word ind =
let w = ref 0n in
let imin = ind * Arch.size_addr
and imax = (ind + 1) * Arch.size_addr - 1 in
if Arch.big_endian then
for i = imin to imax do
w := Nativeint.logor (Nativeint.shift_left !w 8)
(Nativeint.of_int (get_byte i));
done
else
for i = imax downto imin do
w := Nativeint.logor (Nativeint.shift_left !w 8)
(Nativeint.of_int (get_byte i));
done;
!w in
let rec mk_words ind =
if ind >= n then []
else mk_word ind::mk_words (ind+1) in
mk_words 0
(*****************************)
(* Discriminating heuristics *)
(*****************************)
module IntSet = Set.Make(IntArg)
module NativeSet = Set.Make(Nativeint)
let rec add_one sets ps = match sets,ps with
| [],[] -> []
| set::sets,p::ps ->
let sets = add_one sets ps in
NativeSet.add p set::sets
| _,_ -> assert false
let count_arities cases = match cases with
| [] -> assert false
| (ps,_)::_ ->
let sets =
List.fold_left
(fun sets (ps,_) -> add_one sets ps)
(List.map (fun _ -> NativeSet.empty) ps) cases in
List.map NativeSet.cardinal sets
let count_arities_first cases =
let set =
List.fold_left
(fun set case -> match case with
| (p::_,_) -> NativeSet.add p set
| _ -> assert false)
NativeSet.empty cases in
NativeSet.cardinal set
let count_arities_length cases =
let set =
List.fold_left
(fun set (ps,_) -> IntSet.add (List.length ps) set)
IntSet.empty cases in
IntSet.cardinal set
let best_col =
let rec do_rec kbest best k = function
| [] -> kbest
| x::xs ->
if x < best then
do_rec k x (k+1) xs
else
do_rec kbest best (k+1) xs in
let smallest = do_rec (-1) max_int 0 in
fun cases ->
let ars = count_arities cases in
smallest ars
let swap_list =
let rec do_rec k xs = match xs with
| [] -> assert false
| x::xs ->
if k <= 0 then [],x,xs
else
let xs,mid,ys = do_rec (k-1) xs in
x::xs,mid,ys in
fun k xs ->
let xs,x,ys = do_rec k xs in
x::xs @ ys
let swap k idxs cases =
if k = 0 then idxs,cases
else
let idxs = swap_list k idxs
and cases =
List.map
(fun (ps,act) -> swap_list k ps,act)
cases in
if dbg then begin
pp_match stderr "SWAP" idxs cases
end ;
idxs,cases
let best_first idxs cases = match idxs with
| []|[_] -> idxs,cases (* optimisation: one column only *)
| _ ->
let k = best_col cases in
swap k idxs cases
(************************************)
(* Divide according to first column *)
(************************************)
module Divide(O:Set.OrderedType) = struct
module OMap = Map.Make(O)
let divide cases =
let env =
List.fold_left
(fun env (p,psact) ->
let old =
try OMap.find p env
with Not_found -> [] in
OMap.add p ((psact)::old) env)
OMap.empty cases in
let r = OMap.fold (fun key v k -> (key,v)::k) env [] in
List.rev r (* Now sorted *)
end
(***************)
(* Compilation *)
(***************)
(* Group by cell *)
module DivideNative = Divide(Nativeint)
let by_cell cases =
DivideNative.divide
(List.map
(fun case -> match case with
| (p::ps),act -> p,(ps,act)
| [],_ -> assert false)
cases)
(* Split into two halves *)
let rec do_split idx env = match env with
| [] -> assert false
| (midkey,_ as x)::rem ->
if idx <= 0 then [],midkey,env
else
let lt,midkey,ge = do_split (idx-1) rem in
x::lt,midkey,ge
let split_env len env = do_split (len/2) env
(* Switch according to one cell *)
(*
Emit the switch, here as a comparison tree.
Argument compile_rec is to be called to compile the rest of patterns,
as match_on_cell can be called in two different contexts :
from do_compile_pats and top_compile below.
*)
let match_oncell compile_rec str default idx env =
let id = gen_cell_id () in
let rec comp_rec env =
let len = List.length env in
if len <= 3 then
List.fold_right
(fun (key,cases) ifnot ->
mk_eq id key
(compile_rec str default cases)
ifnot)
env default
else
let lt,midkey,ge = split_env len env in
mk_lt id midkey (comp_rec lt) (comp_rec ge) in
mk_let_cell (VP.create id) str idx (comp_rec env)
(*
Recursive 'list of cells' compile function:
- choose the matched cell and switch on it
- notice: patterns (and idx) all have the same length
*)
let rec do_compile_pats idxs str default cases =
if dbg then begin
pp_match stderr "COMPILE" idxs cases
end ;
match idxs with
| [] ->
begin match cases with
| [] -> default
| (_,e)::_ -> e
end
| _::_ ->
let idxs,cases = best_first idxs cases in
begin match idxs with
| [] -> assert false
| idx::idxs ->
match_oncell
(do_compile_pats idxs) str default idx (by_cell cases)
end
(* Group by size *)
module DivideInt = Divide(IntArg)
let by_size cases =
DivideInt.divide
(List.map
(fun (ps,_ as case) -> List.length ps,case)
cases)
(*
Switch according to pattern size
Argument from_ind is the starting index, it can be zero
or one (when the switch on the cell 0 has already been performed.
In that latter case pattern len is string length-1 and is corrected.
*)
let compile_by_size dbg from_ind str default cases =
let size_cases =
List.map
(fun (len,cases) ->
let len = len+from_ind in
let act =
do_compile_pats
(interval from_ind len)
str default cases in
(len,act))
(by_size cases) in
let id = gen_size_id () in
let switch = I.transl_switch dbg (Cvar id) 1 max_int size_cases default in
mk_let_size (VP.create id) str switch
(*
Compilation entry point: we choose to switch
either on size or on first cell, using the
'least discriminant' heuristics.
*)
let top_compile debuginfo str default cases =
let a_len = count_arities_length cases
and a_fst = count_arities_first cases in
if a_len <= a_fst then begin
if dbg then pp_cases stderr "SIZE" cases ;
compile_by_size debuginfo 0 str default cases
end else begin
if dbg then pp_cases stderr "FIRST COL" cases ;
let compile_size_rest str default cases =
compile_by_size debuginfo 1 str default cases in
match_oncell compile_size_rest str default 0 (by_cell cases)
end
(* Module entry point *)
let catch dbg arg k = match arg with
| Cexit (_e,[]) -> k arg
| _ ->
let e = next_raise_count () in
ccatch (e,[],k (Cexit (e,[])),arg,dbg)
let compile dbg str default cases =
(* We do not attempt to really optimise default=None *)
let cases,default = match cases,default with
| (_,e)::cases,None
| cases,Some e -> cases,e
| [],None -> assert false in
let cases =
List.rev_map
(fun (s,act) -> pat_of_string s,act)
cases in
catch dbg default (fun default -> top_compile dbg str default cases)
end