ocaml/lex/compact.ml

233 lines
7.3 KiB
OCaml

(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
(* Compaction of an automata *)
open Lexgen
(* Code for memory actions *)
let code = Table.create 0
(* instructions are 2 8-bits integers, a 0xff byte means return *)
let emit_int i = Table.emit code i
let ins_mem i c = match i with
| Copy (dst, src) -> dst::src::c
| Set dst -> dst::0xff::c
let ins_tag i c = match i with
| SetTag (dst, src) -> dst::src::c
| EraseTag dst -> dst::0xff::c
let do_emit_code c =
let r = Table.size code in
List.iter emit_int c ;
emit_int 0xff ;
r
let memory = Hashtbl.create 101
let mem_emit_code c =
try Hashtbl.find memory c with
| Not_found ->
let r = do_emit_code c in
Hashtbl.add memory c r ;
r
(* Code address 0 is the empty code (ie do nothing) *)
let _ = mem_emit_code []
let emit_tag_code c = mem_emit_code (List.fold_right ins_tag c [])
and emit_mem_code c =mem_emit_code (List.fold_right ins_mem c [])
(*******************************************)
(* Compact the transition and check arrays *)
(*******************************************)
(* Determine the integer occurring most frequently in an array *)
let most_frequent_elt v =
let frequencies = Hashtbl.create 17 in
let max_freq = ref 0 in
let most_freq = ref (v.(0)) in
for i = 0 to Array.length v - 1 do
let e = v.(i) in
let r =
try
Hashtbl.find frequencies e
with Not_found ->
let r = ref 1 in Hashtbl.add frequencies e r; r in
incr r;
if !r > !max_freq then begin max_freq := !r; most_freq := e end
done;
!most_freq
(* Transform an array into a list of (position, non-default element) *)
let non_default_elements def v =
let rec nondef i =
if i >= Array.length v then [] else begin
let e = v.(i) in
if e = def then nondef(i+1) else (i, e) :: nondef(i+1)
end in
nondef 0
type t_compact =
{mutable c_trans : int array ;
mutable c_check : int array ;
mutable c_last_used : int ; }
let create_compact () =
{ c_trans = Array.make 1024 0 ;
c_check = Array.make 1024 (-1) ;
c_last_used = 0 ; }
let reset_compact c =
c.c_trans <- Array.make 1024 0 ;
c.c_check <- Array.make 1024 (-1) ;
c.c_last_used <- 0
(* One compacted table for transitions, one other for memory actions *)
let trans = create_compact ()
and moves = create_compact ()
let grow_compact c =
let old_trans = c.c_trans
and old_check = c.c_check in
let n = Array.length old_trans in
c.c_trans <- Array.make (2*n) 0;
Array.blit old_trans 0 c.c_trans 0 c.c_last_used;
c.c_check <- Array.make (2*n) (-1);
Array.blit old_check 0 c.c_check 0 c.c_last_used
let do_pack state_num orig compact =
let default = most_frequent_elt orig in
let nondef = non_default_elements default orig in
let rec pack_from b =
while
b + 257 > Array.length compact.c_trans
do
grow_compact compact
done;
let rec try_pack = function
[] -> b
| (pos, v) :: rem ->
if compact.c_check.(b + pos) = -1 then
try_pack rem
else pack_from (b+1) in
try_pack nondef in
let base = pack_from 0 in
List.iter
(fun (pos, v) ->
compact.c_trans.(base + pos) <- v;
compact.c_check.(base + pos) <- state_num)
nondef;
if base + 257 > compact.c_last_used then
compact.c_last_used <- base + 257;
(base, default)
let pack_moves state_num move_t =
let move_v = Array.make 257 0
and move_m = Array.make 257 0 in
for i = 0 to 256 do
let act,c = move_t.(i) in
move_v.(i) <- (match act with Backtrack -> -1 | Goto n -> n) ;
move_m.(i) <- emit_mem_code c
done ;
let pk_trans = do_pack state_num move_v trans
and pk_moves = do_pack state_num move_m moves in
pk_trans, pk_moves
(* Build the tables *)
type lex_tables =
{ tbl_base: int array; (* Perform / Shift *)
tbl_backtrk: int array; (* No_remember / Remember *)
tbl_default: int array; (* Default transition *)
tbl_trans: int array; (* Transitions (compacted) *)
tbl_check: int array; (* Check (compacted) *)
(* code addresses are managed in a similar fashion as transitions *)
tbl_base_code : int array; (* code ptr / base for Shift *)
tbl_backtrk_code : int array; (* nothing / code when Remember *)
(* moves to execute before transitions (compacted) *)
tbl_default_code : int array;
tbl_trans_code : int array;
tbl_check_code : int array;
(* byte code itself *)
tbl_code: int array;}
let compact_tables state_v =
let n = Array.length state_v in
let base = Array.make n 0
and backtrk = Array.make n (-1)
and default = Array.make n 0
and base_code = Array.make n 0
and backtrk_code = Array.make n 0
and default_code = Array.make n 0 in
for i = 0 to n - 1 do
match state_v.(i) with
| Perform (n,c) ->
base.(i) <- -(n+1) ;
base_code.(i) <- emit_tag_code c
| Shift(trans, move) ->
begin match trans with
| No_remember -> ()
| Remember (n,c) ->
backtrk.(i) <- n ;
backtrk_code.(i) <- emit_tag_code c
end;
let (b_trans, d_trans),(b_moves,d_moves) = pack_moves i move in
base.(i) <- b_trans; default.(i) <- d_trans ;
base_code.(i) <- b_moves; default_code.(i) <- d_moves ;
done;
let code = Table.trim code in
let tables =
if Array.length code > 1 then
{ tbl_base = base;
tbl_backtrk = backtrk;
tbl_default = default;
tbl_trans = Array.sub trans.c_trans 0 trans.c_last_used;
tbl_check = Array.sub trans.c_check 0 trans.c_last_used;
tbl_base_code = base_code ;
tbl_backtrk_code = backtrk_code;
tbl_default_code = default_code;
tbl_trans_code = Array.sub moves.c_trans 0 moves.c_last_used;
tbl_check_code = Array.sub moves.c_check 0 moves.c_last_used;
tbl_code = code}
else (* when no memory moves, do not emit related tables *)
{ tbl_base = base;
tbl_backtrk = backtrk;
tbl_default = default;
tbl_trans = Array.sub trans.c_trans 0 trans.c_last_used;
tbl_check = Array.sub trans.c_check 0 trans.c_last_used;
tbl_base_code = [||] ;
tbl_backtrk_code = [||];
tbl_default_code = [||];
tbl_trans_code = [||];
tbl_check_code = [||];
tbl_code = [||]}
in
reset_compact trans ;
reset_compact moves ;
tables