csllex utilise un automate a pile
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@667 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02master
parent
ede06e157d
commit
22bc127a92
2
Makefile
2
Makefile
|
@ -406,7 +406,7 @@ realclean::
|
|||
|
||||
csltools:
|
||||
cd tools; $(MAKE) all
|
||||
realclean::
|
||||
clean::
|
||||
cd tools; $(MAKE) clean
|
||||
alldepend::
|
||||
cd tools; $(MAKE) depend
|
||||
|
|
|
@ -7,12 +7,13 @@ DFLAGS=-g -DDEBUG $(BYTECCCOMPOPTS)
|
|||
OBJS=interp.o misc.o stacks.o fix_code.o main.o fail.o signals.o \
|
||||
freelist.o major_gc.o minor_gc.o memory.o alloc.o roots.o \
|
||||
compare.o ints.o floats.o str.o array.o io.o extern.o intern.o \
|
||||
hash.o sys.o meta.o parsing.o gc_ctrl.o terminfo.o md5.o obj.o
|
||||
hash.o sys.o meta.o parsing.o gc_ctrl.o terminfo.o md5.o obj.o \
|
||||
lexing.o
|
||||
|
||||
DOBJS=$(OBJS:.o=.d.o) instrtrace.d.o
|
||||
|
||||
PRIMS=array.c compare.c extern.c floats.c gc_ctrl.c hash.c \
|
||||
intern.c interp.c ints.c io.c md5.c meta.c obj.c parsing.c \
|
||||
intern.c interp.c ints.c io.c lexing.c md5.c meta.c obj.c parsing.c \
|
||||
signals.c str.c sys.c terminfo.c
|
||||
|
||||
all: cslrun
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
# #
|
||||
#*********************************************************************#
|
||||
|
||||
# $Id
|
||||
# $Id$
|
||||
|
||||
bindir=/usr/local/bin
|
||||
libdir=/usr/local/lib/camlsl
|
||||
|
@ -58,14 +58,17 @@ echo " manual pages.............. $mandir (with extension .$manext)"
|
|||
|
||||
# Determine the system type
|
||||
|
||||
case "$host_type" in
|
||||
unknown)
|
||||
if host_type=`./config.guess`; then :; else
|
||||
echo "Cannot guess host type"
|
||||
echo "You must specify one with the -host option"
|
||||
fi;;
|
||||
esac
|
||||
host=`./config.sub $host_type`
|
||||
if test "$host_type" = "unknown"; then
|
||||
if host_type=`./config.guess`; then :; else
|
||||
echo "Cannot guess host type"
|
||||
echo "You must specify one with the -host option"
|
||||
exit 2
|
||||
fi
|
||||
fi
|
||||
if host=`./config.sub $host_type`; then :; else
|
||||
echo "Please specify the correct host type with the -host option"
|
||||
exit 2
|
||||
fi
|
||||
echo "Configuring for a $host ..."
|
||||
|
||||
# Do we have gcc?
|
||||
|
|
27
lex/.depend
27
lex/.depend
|
@ -1,12 +1,17 @@
|
|||
compact.cmi: lexgen.cmi
|
||||
lexer.cmi: parser.cmi
|
||||
parser.cmi: syntax.cmo
|
||||
lexer.cmo: parser.cmi syntax.cmo lexer.cmi
|
||||
lexer.cmx: parser.cmx syntax.cmx lexer.cmi
|
||||
lexgen.cmo: syntax.cmo
|
||||
lexgen.cmx: syntax.cmx
|
||||
main.cmo: lexer.cmi lexgen.cmo output.cmo parser.cmi syntax.cmo
|
||||
main.cmx: lexer.cmx lexgen.cmx output.cmx parser.cmx syntax.cmx
|
||||
output.cmo: syntax.cmo
|
||||
output.cmx: syntax.cmx
|
||||
parser.cmo: syntax.cmo parser.cmi
|
||||
parser.cmx: syntax.cmx parser.cmi
|
||||
lexgen.cmi: syntax.cmi
|
||||
output.cmi: compact.cmi lexgen.cmi syntax.cmi
|
||||
parser.cmi: syntax.cmi
|
||||
compact.cmo: lexgen.cmi compact.cmi
|
||||
compact.cmx: lexgen.cmx compact.cmi
|
||||
lexer.cmo: parser.cmi syntax.cmi lexer.cmi
|
||||
lexer.cmx: parser.cmx syntax.cmi lexer.cmi
|
||||
lexgen.cmo: syntax.cmi lexgen.cmi
|
||||
lexgen.cmx: syntax.cmi lexgen.cmi
|
||||
main.cmo: compact.cmi lexer.cmi lexgen.cmi output.cmi parser.cmi syntax.cmi
|
||||
main.cmx: compact.cmx lexer.cmx lexgen.cmx output.cmx parser.cmx syntax.cmi
|
||||
output.cmo: compact.cmi lexgen.cmi syntax.cmi output.cmi
|
||||
output.cmx: compact.cmx lexgen.cmx syntax.cmi output.cmi
|
||||
parser.cmo: syntax.cmi parser.cmi
|
||||
parser.cmx: syntax.cmi parser.cmi
|
||||
|
|
|
@ -9,7 +9,7 @@ CAMLLEX=../boot/cslrun ../boot/csllex
|
|||
CAMLDEP=../boot/cslrun ../tools/csldep
|
||||
DEPFLAGS=
|
||||
|
||||
OBJS=syntax.cmo parser.cmo lexer.cmo lexgen.cmo output.cmo main.cmo
|
||||
OBJS=parser.cmo lexer.cmo lexgen.cmo compact.cmo output.cmo main.cmo
|
||||
|
||||
all: csllex
|
||||
|
||||
|
|
|
@ -0,0 +1,120 @@
|
|||
(***********************************************************************)
|
||||
(* *)
|
||||
(* Caml Special Light *)
|
||||
(* *)
|
||||
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
||||
(* *)
|
||||
(* Copyright 1995 Institut National de Recherche en Informatique et *)
|
||||
(* Automatique. Distributed only by permission. *)
|
||||
(* *)
|
||||
(***********************************************************************)
|
||||
|
||||
(* $Id$ *)
|
||||
|
||||
(* Compaction of an automata *)
|
||||
|
||||
open Lexgen
|
||||
|
||||
(* Determine the integer occurring most frequently in an array *)
|
||||
|
||||
let most_frequent_elt v =
|
||||
let frequencies = Hashtbl.new 17 in
|
||||
let max_freq = ref 0 in
|
||||
let most_freq = ref (v.(0)) in
|
||||
for i = 0 to Array.length v - 1 do
|
||||
let e = v.(i) in
|
||||
let r =
|
||||
try
|
||||
Hashtbl.find frequencies e
|
||||
with Not_found ->
|
||||
let r = ref 1 in Hashtbl.add frequencies e r; r in
|
||||
incr r;
|
||||
if !r > !max_freq then begin max_freq := !r; most_freq := e end
|
||||
done;
|
||||
!most_freq
|
||||
|
||||
(* Transform an array into a list of (position, non-default element) *)
|
||||
|
||||
let non_default_elements def v =
|
||||
let rec nondef i =
|
||||
if i >= Array.length v then [] else begin
|
||||
let e = v.(i) in
|
||||
if e = def then nondef(i+1) else (i, e) :: nondef(i+1)
|
||||
end in
|
||||
nondef 0
|
||||
|
||||
(* Compact the transition and check arrays *)
|
||||
|
||||
let trans = ref(Array.new 1024 0)
|
||||
and check = ref(Array.new 1024 (-1))
|
||||
and last_used = ref 0
|
||||
|
||||
let grow_transitions () =
|
||||
let old_trans = !trans
|
||||
and old_check = !check in
|
||||
let n = Array.length old_trans in
|
||||
trans := Array.new (2*n) 0;
|
||||
Array.blit old_trans 0 !trans 0 !last_used;
|
||||
check := Array.new (2*n) (-1);
|
||||
Array.blit old_check 0 !check 0 !last_used
|
||||
|
||||
let pack_moves state_num move_t =
|
||||
let move_v = Array.new 256 0 in
|
||||
for i = 0 to 255 do
|
||||
move_v.(i) <-
|
||||
(match move_t.(i) with
|
||||
Backtrack -> -1
|
||||
| Goto n -> n)
|
||||
done;
|
||||
let default = most_frequent_elt move_v in
|
||||
let nondef = non_default_elements default move_v in
|
||||
let rec pack_from b =
|
||||
while b + 256 > Array.length !trans do grow_transitions() done;
|
||||
let rec try_pack = function
|
||||
[] -> b
|
||||
| (pos, v) :: rem ->
|
||||
if !check.(b + pos) = -1 then try_pack rem else pack_from (b+1) in
|
||||
try_pack nondef in
|
||||
let base = pack_from 0 in
|
||||
List.iter
|
||||
(fun (pos, v) ->
|
||||
!trans.(base + pos) <- v;
|
||||
!check.(base + pos) <- state_num)
|
||||
nondef;
|
||||
if base + 256 > !last_used then last_used := base + 256;
|
||||
(base, default)
|
||||
|
||||
(* Build the tables *)
|
||||
|
||||
type lex_tables =
|
||||
{ tbl_base: int array; (* Perform / Shift *)
|
||||
tbl_backtrk: int array; (* No_remember / Remember *)
|
||||
tbl_default: int array; (* Default transition *)
|
||||
tbl_trans: int array; (* Transitions (compacted) *)
|
||||
tbl_check: int array } (* Check (compacted) *)
|
||||
|
||||
let compact_tables state_v =
|
||||
let n = Array.length state_v in
|
||||
let base = Array.new n 0
|
||||
and backtrk = Array.new n (-1)
|
||||
and default = Array.new n 0 in
|
||||
for i = 0 to n - 1 do
|
||||
match state_v.(i) with
|
||||
Perform n ->
|
||||
base.(i) <- -(n+1)
|
||||
| Shift(trans, move) ->
|
||||
begin match trans with
|
||||
No_remember -> ()
|
||||
| Remember n -> backtrk.(i) <- n
|
||||
end;
|
||||
let (b, d) = pack_moves i move in
|
||||
base.(i) <- b;
|
||||
default.(i) <- d
|
||||
done;
|
||||
{ tbl_base = base;
|
||||
tbl_backtrk = backtrk;
|
||||
tbl_default = default;
|
||||
tbl_trans = Array.sub !trans 0 !last_used;
|
||||
tbl_check = Array.sub !check 0 !last_used }
|
||||
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
(***********************************************************************)
|
||||
(* *)
|
||||
(* Caml Special Light *)
|
||||
(* *)
|
||||
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
||||
(* *)
|
||||
(* Copyright 1995 Institut National de Recherche en Informatique et *)
|
||||
(* Automatique. Distributed only by permission. *)
|
||||
(* *)
|
||||
(***********************************************************************)
|
||||
|
||||
(* $Id$ *)
|
||||
|
||||
(* Compaction of an automata *)
|
||||
|
||||
type lex_tables =
|
||||
{ tbl_base: int array; (* Perform / Shift *)
|
||||
tbl_backtrk: int array; (* No_remember / Remember *)
|
||||
tbl_default: int array; (* Default transition *)
|
||||
tbl_trans: int array; (* Transitions (compacted) *)
|
||||
tbl_check: int array } (* Check (compacted) *)
|
||||
|
||||
val compact_tables: Lexgen.automata array -> lex_tables
|
221
lex/lexgen.ml
221
lex/lexgen.ml
|
@ -25,6 +25,30 @@ type regexp =
|
|||
| Alt of regexp * regexp
|
||||
| Star of regexp
|
||||
|
||||
type lexer_entry =
|
||||
{ lex_name: string;
|
||||
lex_regexp: regexp;
|
||||
lex_actions: (int * location) list }
|
||||
|
||||
(* Representation of automata *)
|
||||
|
||||
type automata =
|
||||
Perform of int
|
||||
| Shift of automata_trans * automata_move array
|
||||
and automata_trans =
|
||||
No_remember
|
||||
| Remember of int
|
||||
and automata_move =
|
||||
Backtrack
|
||||
| Goto of int
|
||||
|
||||
(* Representation of entry points *)
|
||||
|
||||
type automata_entry =
|
||||
{ auto_name: string;
|
||||
auto_initial_state: int;
|
||||
auto_actions: (int * location) list }
|
||||
|
||||
(* From shallow to deep syntax *)
|
||||
|
||||
let chars = ref ([] : char list list)
|
||||
|
@ -46,29 +70,33 @@ let rec encode_regexp = function
|
|||
| Repetition r ->
|
||||
Star (encode_regexp r)
|
||||
|
||||
let encode_casedef =
|
||||
let encode_casedef casedef =
|
||||
List.fold_left
|
||||
(fun reg (expr,act) ->
|
||||
(fun reg (expr, act) ->
|
||||
let act_num = !actions_count in
|
||||
incr actions_count;
|
||||
actions := (act_num, act) :: !actions;
|
||||
Alt(reg, Seq(encode_regexp expr, Action act_num)))
|
||||
Empty
|
||||
Empty
|
||||
casedef
|
||||
|
||||
let encode_lexdef def =
|
||||
chars := [];
|
||||
chars_count := 0;
|
||||
actions := [];
|
||||
actions_count := 0;
|
||||
let name_regexp_list =
|
||||
let entry_list =
|
||||
List.map
|
||||
(fun (name, casedef) -> (name, encode_casedef casedef))
|
||||
(fun (entry_name, casedef) ->
|
||||
actions := [];
|
||||
actions_count := 0;
|
||||
let re = encode_casedef casedef in
|
||||
{ lex_name = entry_name;
|
||||
lex_regexp = re;
|
||||
lex_actions = List.rev !actions })
|
||||
def.entrypoints in
|
||||
let chr = Array.of_list (List.rev !chars)
|
||||
and act = !actions in
|
||||
let chr = Array.of_list (List.rev !chars) in
|
||||
chars := [];
|
||||
actions := [];
|
||||
(chr, name_regexp_list, act)
|
||||
(chr, entry_list)
|
||||
|
||||
(* To generate directly a NFA from a regular expression.
|
||||
Confer Aho-Sethi-Ullman, dragon book, chap. 3 *)
|
||||
|
@ -77,22 +105,8 @@ type transition =
|
|||
OnChars of int
|
||||
| ToAction of int
|
||||
|
||||
let rec merge_trans s1 s2 =
|
||||
match (s1, s2) with
|
||||
([], _) -> s2
|
||||
| (_, []) -> s1
|
||||
| ((OnChars n1 as t1) :: r1, (OnChars n2 as t2) :: r2) ->
|
||||
if n1 == n2 then t1 :: merge_trans r1 r2 else
|
||||
if n1 < n2 then t1 :: merge_trans r1 s2 else
|
||||
t2 :: merge_trans s1 r2
|
||||
| ((ToAction n1 as t1) :: r1, (ToAction n2 as t2) :: r2) ->
|
||||
if n1 == n2 then t1 :: merge_trans r1 r2 else
|
||||
if n1 < n2 then t1 :: merge_trans r1 s2 else
|
||||
t2 :: merge_trans s1 r2
|
||||
| ((OnChars n1 as t1) :: r1, (ToAction n2 as t2) :: r2) ->
|
||||
t1 :: merge_trans r1 s2
|
||||
| ((ToAction n1 as t1) :: r1, (OnChars n2 as t2) :: r2) ->
|
||||
t2 :: merge_trans s1 r2
|
||||
module TransSet =
|
||||
Set.Make(struct type t = transition let compare = compare end)
|
||||
|
||||
let rec nullable = function
|
||||
Empty -> true
|
||||
|
@ -103,90 +117,99 @@ let rec nullable = function
|
|||
| Star r -> true
|
||||
|
||||
let rec firstpos = function
|
||||
Empty -> []
|
||||
| Chars pos -> [OnChars pos]
|
||||
| Action act -> [ToAction act]
|
||||
Empty -> TransSet.empty
|
||||
| Chars pos -> TransSet.add (OnChars pos) TransSet.empty
|
||||
| Action act -> TransSet.add (ToAction act) TransSet.empty
|
||||
| Seq(r1,r2) -> if nullable r1
|
||||
then merge_trans (firstpos r1) (firstpos r2)
|
||||
then TransSet.union (firstpos r1) (firstpos r2)
|
||||
else firstpos r1
|
||||
| Alt(r1,r2) -> merge_trans (firstpos r1) (firstpos r2)
|
||||
| Alt(r1,r2) -> TransSet.union (firstpos r1) (firstpos r2)
|
||||
| Star r -> firstpos r
|
||||
|
||||
let rec lastpos = function
|
||||
Empty -> []
|
||||
| Chars pos -> [OnChars pos]
|
||||
| Action act -> [ToAction act]
|
||||
Empty -> TransSet.empty
|
||||
| Chars pos -> TransSet.add (OnChars pos) TransSet.empty
|
||||
| Action act -> TransSet.add (ToAction act) TransSet.empty
|
||||
| Seq(r1,r2) -> if nullable r2
|
||||
then merge_trans (lastpos r1) (lastpos r2)
|
||||
then TransSet.union (lastpos r1) (lastpos r2)
|
||||
else lastpos r2
|
||||
| Alt(r1,r2) -> merge_trans (lastpos r1) (lastpos r2)
|
||||
| Alt(r1,r2) -> TransSet.union (lastpos r1) (lastpos r2)
|
||||
| Star r -> lastpos r
|
||||
|
||||
let followpos size name_regexp_list =
|
||||
let v = Array.new size [] in
|
||||
let fill_pos first = function
|
||||
OnChars pos -> v.(pos) <- merge_trans first v.(pos); ()
|
||||
| ToAction _ -> () in
|
||||
let rec fill = function
|
||||
Seq(r1,r2) ->
|
||||
fill r1; fill r2;
|
||||
List.iter (fill_pos (firstpos r2)) (lastpos r1)
|
||||
| Alt(r1,r2) ->
|
||||
fill r1; fill r2
|
||||
| Star r ->
|
||||
fill r;
|
||||
List.iter (fill_pos (firstpos r)) (lastpos r)
|
||||
| _ -> () in
|
||||
List.iter (fun (name, regexp) -> fill regexp) name_regexp_list;
|
||||
v
|
||||
let followpos size entry_list =
|
||||
let v = Array.new size TransSet.empty in
|
||||
let fill_pos first = function
|
||||
OnChars pos -> v.(pos) <- TransSet.union first v.(pos)
|
||||
| ToAction _ -> () in
|
||||
let rec fill = function
|
||||
Seq(r1,r2) ->
|
||||
fill r1; fill r2;
|
||||
TransSet.iter (fill_pos (firstpos r2)) (lastpos r1)
|
||||
| Alt(r1,r2) ->
|
||||
fill r1; fill r2
|
||||
| Star r ->
|
||||
fill r;
|
||||
TransSet.iter (fill_pos (firstpos r)) (lastpos r)
|
||||
| _ -> () in
|
||||
List.iter (fun entry -> fill entry.lex_regexp) entry_list;
|
||||
v
|
||||
|
||||
let no_action = 32767
|
||||
let no_action = max_int
|
||||
|
||||
let split_trans_set = List.fold_left
|
||||
(fun (act, pos_set as act_pos_set) ->
|
||||
function OnChars pos -> (act, pos :: pos_set)
|
||||
| ToAction act1 -> if act1 < act then (act1, pos_set)
|
||||
else act_pos_set)
|
||||
(no_action, [])
|
||||
let split_trans_set trans_set =
|
||||
TransSet.fold
|
||||
(fun trans (act, pos_set as act_pos_set) ->
|
||||
match trans with
|
||||
OnChars pos -> (act, pos :: pos_set)
|
||||
| ToAction act1 -> if act1 < act then (act1, pos_set) else act_pos_set)
|
||||
trans_set
|
||||
(no_action, [])
|
||||
|
||||
let memory = (Hashtbl.new 131 : (transition list, int) Hashtbl.t)
|
||||
and todo = ref ([] : (transition list * int) list)
|
||||
and next = ref 0
|
||||
module StateMap =
|
||||
Map.Make(struct type t = TransSet.t let compare = TransSet.compare end)
|
||||
|
||||
let state_map = ref (StateMap.empty: int StateMap.t)
|
||||
let todo = (Stack.new() : (TransSet.t * int) Stack.t)
|
||||
let next_state_num = ref 0
|
||||
|
||||
let reset_state_mem () =
|
||||
Hashtbl.clear memory; todo := []; next := 0; ()
|
||||
state_map := StateMap.empty;
|
||||
Stack.clear todo;
|
||||
next_state_num := 0
|
||||
|
||||
let get_state st =
|
||||
try
|
||||
Hashtbl.find memory st
|
||||
StateMap.find st !state_map
|
||||
with Not_found ->
|
||||
let nbr = !next in
|
||||
incr next;
|
||||
Hashtbl.add memory st nbr;
|
||||
todo := (st, nbr) :: !todo;
|
||||
nbr
|
||||
let num = !next_state_num in
|
||||
incr next_state_num;
|
||||
state_map := StateMap.add st num !state_map;
|
||||
Stack.push (st, num) todo;
|
||||
num
|
||||
|
||||
let rec map_on_states f =
|
||||
match !todo with
|
||||
[] -> []
|
||||
| (st,i)::r -> todo := r; let res = f st in (res,i) :: map_on_states f
|
||||
let map_on_all_states f =
|
||||
let res = ref [] in
|
||||
begin try
|
||||
while true do
|
||||
let (st, i) = Stack.pop todo in
|
||||
let r = f st in
|
||||
res := (r, i) :: !res
|
||||
done
|
||||
with Stack.Empty -> ()
|
||||
end;
|
||||
!res
|
||||
|
||||
let number_of_states () =
|
||||
!next
|
||||
|
||||
let goto_state = function
|
||||
[] -> Backtrack
|
||||
| ps -> Goto (get_state ps)
|
||||
let goto_state st =
|
||||
if TransSet.is_empty st then Backtrack else Goto (get_state st)
|
||||
|
||||
let transition_from chars follow pos_set =
|
||||
let tr = Array.new 256 []
|
||||
and shift = Array.new 256 Backtrack in
|
||||
let tr = Array.new 256 TransSet.empty in
|
||||
let shift = Array.new 256 Backtrack in
|
||||
List.iter
|
||||
(fun pos ->
|
||||
List.iter
|
||||
(fun c ->
|
||||
tr.(Char.code c) <-
|
||||
merge_trans tr.(Char.code c) follow.(pos))
|
||||
tr.(Char.code c) <- TransSet.union tr.(Char.code c) follow.(pos))
|
||||
chars.(pos))
|
||||
pos_set;
|
||||
for i = 0 to 255 do
|
||||
|
@ -196,23 +219,23 @@ let transition_from chars follow pos_set =
|
|||
|
||||
let translate_state chars follow state =
|
||||
match split_trans_set state with
|
||||
n, [] -> Perform n
|
||||
| n, ps -> Shift( (if n == no_action then No_remember else Remember n),
|
||||
transition_from chars follow ps)
|
||||
(n, []) -> Perform n
|
||||
| (n, ps) -> Shift((if n = no_action then No_remember else Remember n),
|
||||
transition_from chars follow ps)
|
||||
|
||||
let make_dfa lexdef =
|
||||
let (chars, name_regexp_list, actions) =
|
||||
encode_lexdef lexdef in
|
||||
let follow =
|
||||
followpos (Array.length chars) name_regexp_list in
|
||||
let (chars, entry_list) = encode_lexdef lexdef in
|
||||
let follow = followpos (Array.length chars) entry_list in
|
||||
reset_state_mem();
|
||||
let initial_states =
|
||||
List.map (fun (name, regexp) -> (name, get_state(firstpos regexp)))
|
||||
name_regexp_list in
|
||||
let states =
|
||||
map_on_states (translate_state chars follow) in
|
||||
let v =
|
||||
Array.new (number_of_states()) (Perform 0) in
|
||||
List.iter (fun (auto, i) -> v.(i) <- auto) states;
|
||||
List.map
|
||||
(fun le ->
|
||||
{ auto_name = le.lex_name;
|
||||
auto_initial_state = get_state(firstpos le.lex_regexp);
|
||||
auto_actions = le.lex_actions })
|
||||
entry_list in
|
||||
let states = map_on_all_states (translate_state chars follow) in
|
||||
let actions = Array.new !next_state_num (Perform 0) in
|
||||
List.iter (fun (act, i) -> actions.(i) <- act) states;
|
||||
reset_state_mem();
|
||||
(initial_states, v, actions)
|
||||
(initial_states, actions)
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
(***********************************************************************)
|
||||
(* *)
|
||||
(* Caml Special Light *)
|
||||
(* *)
|
||||
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
||||
(* *)
|
||||
(* Copyright 1995 Institut National de Recherche en Informatique et *)
|
||||
(* Automatique. Distributed only by permission. *)
|
||||
(* *)
|
||||
(***********************************************************************)
|
||||
|
||||
(* $Id$ *)
|
||||
|
||||
(* Representation of automata *)
|
||||
|
||||
type automata =
|
||||
Perform of int
|
||||
| Shift of automata_trans * automata_move array
|
||||
and automata_trans =
|
||||
No_remember
|
||||
| Remember of int
|
||||
and automata_move =
|
||||
Backtrack
|
||||
| Goto of int
|
||||
|
||||
(* Representation of entry points *)
|
||||
|
||||
type automata_entry =
|
||||
{ auto_name: string;
|
||||
auto_initial_state: int;
|
||||
auto_actions: (int * Syntax.location) list }
|
||||
|
||||
(* The entry point *)
|
||||
|
||||
val make_dfa: Syntax.lexer_definition -> automata_entry list * automata array
|
18
lex/main.ml
18
lex/main.ml
|
@ -28,15 +28,14 @@ let main () =
|
|||
Filename.chop_suffix source_name ".mll" ^ ".ml"
|
||||
else
|
||||
source_name ^ ".ml" in
|
||||
ic := open_in_bin source_name;
|
||||
oc := open_out dest_name;
|
||||
let lexbuf =
|
||||
Lexing.from_channel !ic in
|
||||
let ic = open_in_bin source_name in
|
||||
let oc = open_out dest_name in
|
||||
let lexbuf = Lexing.from_channel ic in
|
||||
let def =
|
||||
try
|
||||
Parser.lexer_definition Lexer.main lexbuf
|
||||
with exn ->
|
||||
close_out !oc;
|
||||
close_out oc;
|
||||
Sys.remove dest_name;
|
||||
begin match exn with
|
||||
Parsing.Parse_error ->
|
||||
|
@ -52,10 +51,11 @@ let main () =
|
|||
| _ -> raise exn
|
||||
end;
|
||||
exit 2 in
|
||||
let ((init, states, acts) as dfa) = make_dfa def in
|
||||
output_lexdef def.header dfa def.trailer;
|
||||
close_in !ic;
|
||||
close_out !oc
|
||||
let (entries, transitions) = Lexgen.make_dfa def in
|
||||
let tables = Compact.compact_tables transitions in
|
||||
Output.output_lexdef ic oc def.header tables entries def.trailer;
|
||||
close_in ic;
|
||||
close_out oc
|
||||
|
||||
let _ = Printexc.catch main (); exit 0
|
||||
|
||||
|
|
207
lex/output.ml
207
lex/output.ml
|
@ -11,151 +11,88 @@
|
|||
|
||||
(* $Id$ *)
|
||||
|
||||
(* Generating a DFA as a set of mutually recursive functions *)
|
||||
(* Output the DFA tables and its entry points *)
|
||||
|
||||
open Printf
|
||||
open Syntax
|
||||
open Lexgen
|
||||
open Compact
|
||||
|
||||
let ic = ref stdin
|
||||
and oc = ref stdout
|
||||
|
||||
(* 1- Generating the actions *)
|
||||
(* To copy the ML code fragments *)
|
||||
|
||||
let copy_buffer = String.create 1024
|
||||
|
||||
let copy_chunk (Location(start,stop)) =
|
||||
let rec copy s =
|
||||
if s <= 0 then () else
|
||||
let n = if s < 1024 then s else 1024 in
|
||||
let m = input !ic copy_buffer 0 n in
|
||||
output !oc copy_buffer 0 m;
|
||||
copy (s - m)
|
||||
in
|
||||
seek_in !ic start;
|
||||
copy (stop - start)
|
||||
let copy_chunk ic oc (Location(start,stop)) =
|
||||
seek_in ic start;
|
||||
let n = ref (stop - start) in
|
||||
while !n > 0 do
|
||||
let m = input ic copy_buffer 0 (min !n 1024) in
|
||||
output oc copy_buffer 0 m;
|
||||
n := !n - m
|
||||
done
|
||||
|
||||
let output_action (i,act) =
|
||||
output_string !oc ("action_" ^ string_of_int i ^ " lexbuf = (\n");
|
||||
copy_chunk act;
|
||||
output_string !oc ")\nand ";
|
||||
()
|
||||
(* To output an array of short ints, encoded as a string *)
|
||||
|
||||
(* 2- Generating the states *)
|
||||
let output_byte oc b =
|
||||
output_char oc '\\';
|
||||
output_char oc (Char.chr(48 + b / 100));
|
||||
output_char oc (Char.chr(48 + (b / 10) mod 10));
|
||||
output_char oc (Char.chr(48 + b mod 10))
|
||||
|
||||
let states = ref ([||] : automata array)
|
||||
|
||||
let enumerate_vect v =
|
||||
let rec enum env pos =
|
||||
if pos >= Array.length v then env else
|
||||
try
|
||||
let pl = List.assoc v.(pos) env in
|
||||
pl := pos :: !pl; enum env (succ pos)
|
||||
with Not_found ->
|
||||
enum ((v.(pos), ref [pos]) :: env) (succ pos) in
|
||||
Sort.list
|
||||
(fun (e1, pl1) (e2, pl2) -> List.length !pl1 >= List.length !pl2)
|
||||
(enum [] 0)
|
||||
|
||||
let output_move = function
|
||||
Backtrack ->
|
||||
output_string !oc "backtrack lexbuf"
|
||||
| Goto dest ->
|
||||
match !states.(dest) with
|
||||
Perform act_num ->
|
||||
output_string !oc ("action_" ^ string_of_int act_num ^ " lexbuf")
|
||||
| _ ->
|
||||
output_string !oc ("state_" ^ string_of_int dest ^ " lexbuf")
|
||||
|
||||
let output_char_for_read oc = function
|
||||
'\'' -> output_string oc "\\'"
|
||||
| '\\' -> output_string oc "\\\\"
|
||||
| '\n' -> output_string oc "\\n"
|
||||
| '\t' -> output_string oc "\\t"
|
||||
| c ->
|
||||
let n = Char.code c in
|
||||
if n >= 32 & n < 127 then
|
||||
output_char oc c
|
||||
else begin
|
||||
output_char oc '\\';
|
||||
output_char oc (Char.chr (48 + n / 100));
|
||||
output_char oc (Char.chr (48 + (n / 10) mod 10));
|
||||
output_char oc (Char.chr (48 + n mod 10))
|
||||
end
|
||||
|
||||
let rec output_chars = function
|
||||
[] ->
|
||||
failwith "output_chars"
|
||||
| [c] ->
|
||||
output_string !oc "'";
|
||||
output_char_for_read !oc (Char.chr c);
|
||||
output_string !oc "'"
|
||||
| c::cl ->
|
||||
output_string !oc "'";
|
||||
output_char_for_read !oc (Char.chr c);
|
||||
output_string !oc "'|";
|
||||
output_chars cl
|
||||
|
||||
let output_one_trans (dest, chars) =
|
||||
output_chars !chars;
|
||||
output_string !oc " -> ";
|
||||
output_move dest;
|
||||
output_string !oc "\n | ";
|
||||
()
|
||||
|
||||
let output_all_trans trans =
|
||||
output_string !oc " match get_next_char lexbuf with\n ";
|
||||
match enumerate_vect trans with
|
||||
[] ->
|
||||
failwith "output_all_trans"
|
||||
| (default, _) :: rest ->
|
||||
List.iter output_one_trans rest;
|
||||
output_string !oc "_ -> ";
|
||||
output_move default;
|
||||
output_string !oc "\nand ";
|
||||
()
|
||||
|
||||
let output_state state_num = function
|
||||
Perform i ->
|
||||
()
|
||||
| Shift(what_to_do, moves) ->
|
||||
output_string !oc
|
||||
("state_" ^ string_of_int state_num ^ " lexbuf =\n");
|
||||
begin match what_to_do with
|
||||
No_remember -> ()
|
||||
| Remember i ->
|
||||
output_string !oc " lexbuf.lex_last_pos <- lexbuf.lex_curr_pos;\n";
|
||||
output_string !oc (" lexbuf.lex_last_action <- Obj.magic action_" ^
|
||||
string_of_int i ^ ";\n")
|
||||
end;
|
||||
output_all_trans moves
|
||||
|
||||
(* 3- Generating the entry points *)
|
||||
|
||||
let rec output_entries = function
|
||||
[] -> failwith "output_entries"
|
||||
| (name,state_num) :: rest ->
|
||||
output_string !oc (name ^ " lexbuf =\n");
|
||||
output_string !oc " start_lexing lexbuf;\n";
|
||||
output_string !oc (" state_" ^ string_of_int state_num ^ " lexbuf\n");
|
||||
match rest with
|
||||
[] -> output_string !oc "\n\n"; ()
|
||||
| _ -> output_string !oc "\nand "; output_entries rest
|
||||
|
||||
(* All together *)
|
||||
|
||||
let output_lexdef header (initial_st, st, actions) trailer =
|
||||
print_int (Array.length st); print_string " states, ";
|
||||
print_int (List.length actions); print_string " actions.";
|
||||
print_newline();
|
||||
output_string !oc "open Obj\nopen Lexing\n\n";
|
||||
copy_chunk header;
|
||||
output_string !oc "\nlet rec ";
|
||||
states := st;
|
||||
List.iter output_action actions;
|
||||
for i = 0 to Array.length st - 1 do
|
||||
output_state i st.(i)
|
||||
let output_array oc v =
|
||||
output_string oc " \"";
|
||||
for i = 0 to Array.length v - 1 do
|
||||
output_byte oc (v.(i) land 0xFF);
|
||||
output_byte oc ((v.(i) asr 8) land 0xFF);
|
||||
if i land 7 = 7 then output_string oc "\\\n "
|
||||
done;
|
||||
output_entries initial_st;
|
||||
copy_chunk trailer
|
||||
output_string oc "\""
|
||||
|
||||
(* Output the tables *)
|
||||
|
||||
let output_tables oc tbl =
|
||||
output_string oc "let lex_tables = {\n";
|
||||
fprintf oc " Lexing.lex_base = \n%a;\n" output_array tbl.tbl_base;
|
||||
fprintf oc " Lexing.lex_backtrk = \n%a;\n" output_array tbl.tbl_backtrk;
|
||||
fprintf oc " Lexing.lex_default = \n%a;\n" output_array tbl.tbl_default;
|
||||
fprintf oc " Lexing.lex_trans = \n%a;\n" output_array tbl.tbl_trans;
|
||||
fprintf oc " Lexing.lex_check = \n%a\n" output_array tbl.tbl_check;
|
||||
output_string oc "}\n\n"
|
||||
|
||||
(* Output the entries *)
|
||||
|
||||
let output_entry ic oc e =
|
||||
fprintf oc "%s lexbuf =\n" e.auto_name;
|
||||
fprintf oc " match Lexing.engine lex_tables %d lexbuf with\n "
|
||||
e.auto_initial_state;
|
||||
let first = ref true in
|
||||
List.iter
|
||||
(fun (num, loc) ->
|
||||
if !first then first := false else fprintf oc " | ";
|
||||
fprintf oc "%d -> (" num;
|
||||
copy_chunk ic oc loc;
|
||||
fprintf oc ")\n")
|
||||
e.auto_actions;
|
||||
fprintf oc " | _ -> failwith \"%s: empty token\"\n\n" e.auto_name
|
||||
|
||||
(* Main output function *)
|
||||
|
||||
let output_lexdef ic oc header tables entry_points trailer =
|
||||
Printf.printf "%d states, %d transitions, table size %d bytes\n"
|
||||
(Array.length tables.tbl_base)
|
||||
(Array.length tables.tbl_trans)
|
||||
(2 * (Array.length tables.tbl_base + Array.length tables.tbl_backtrk +
|
||||
Array.length tables.tbl_default + Array.length tables.tbl_trans +
|
||||
Array.length tables.tbl_check));
|
||||
flush stdout;
|
||||
copy_chunk ic oc header;
|
||||
output_tables oc tables;
|
||||
begin match entry_points with
|
||||
[] -> ()
|
||||
| entry1 :: entries ->
|
||||
output_string oc "let rec "; output_entry ic oc entry1;
|
||||
List.iter
|
||||
(fun e -> output_string oc "and "; output_entry ic oc e)
|
||||
entries
|
||||
end;
|
||||
copy_chunk ic oc trailer
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
(***********************************************************************)
|
||||
(* *)
|
||||
(* Caml Special Light *)
|
||||
(* *)
|
||||
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
||||
(* *)
|
||||
(* Copyright 1995 Institut National de Recherche en Informatique et *)
|
||||
(* Automatique. Distributed only by permission. *)
|
||||
(* *)
|
||||
(***********************************************************************)
|
||||
|
||||
(* $Id$ *)
|
||||
|
||||
(* Output the DFA tables and its entry points *)
|
||||
|
||||
val output_lexdef:
|
||||
in_channel -> out_channel ->
|
||||
Syntax.location ->
|
||||
Compact.lex_tables ->
|
||||
Lexgen.automata_entry list ->
|
||||
Syntax.location ->
|
||||
unit
|
|
@ -28,14 +28,3 @@ type lexer_definition =
|
|||
entrypoints: (string * (regular_expression * location) list) list;
|
||||
trailer: location }
|
||||
|
||||
(* Representation of automata *)
|
||||
|
||||
type automata =
|
||||
Perform of int
|
||||
| Shift of automata_trans * automata_move array
|
||||
and automata_trans =
|
||||
No_remember
|
||||
| Remember of int
|
||||
and automata_move =
|
||||
Backtrack
|
||||
| Goto of int
|
|
@ -164,7 +164,8 @@ rule token = parse
|
|||
let string_start = Lexing.lexeme_start lexbuf in
|
||||
start_pos := string_start;
|
||||
string lexbuf;
|
||||
lexbuf.lex_start_pos <- string_start - lexbuf.lex_abs_pos;
|
||||
lexbuf.Lexing.lex_start_pos <-
|
||||
string_start - lexbuf.Lexing.lex_abs_pos;
|
||||
STRING (get_stored_string()) }
|
||||
| "'" [^ '\\' '\''] "'"
|
||||
{ CHAR(Lexing.lexeme_char lexbuf 1) }
|
||||
|
|
|
@ -20,8 +20,16 @@ type lexbuf =
|
|||
mutable lex_abs_pos : int;
|
||||
mutable lex_start_pos : int;
|
||||
mutable lex_curr_pos : int;
|
||||
mutable lex_last_pos : int;
|
||||
mutable lex_last_action : lexbuf -> Obj.t }
|
||||
mutable lex_last_pos : int }
|
||||
|
||||
type lex_tables =
|
||||
{ lex_base: string;
|
||||
lex_backtrk: string;
|
||||
lex_default: string;
|
||||
lex_trans: string;
|
||||
lex_check: string }
|
||||
|
||||
external engine: lex_tables -> int -> lexbuf -> int = "lex_engine"
|
||||
|
||||
let lex_aux_buffer = String.create 1024
|
||||
|
||||
|
@ -55,8 +63,6 @@ let lex_refill read_fun lexbuf =
|
|||
lexbuf.lex_start_pos <- lexbuf.lex_start_pos - n;
|
||||
lexbuf.lex_last_pos <- lexbuf.lex_last_pos - n
|
||||
|
||||
let dummy_action x = failwith "lexing: empty token"
|
||||
|
||||
let from_function f =
|
||||
{ refill_buff = lex_refill f;
|
||||
lex_buffer = String.create 2048;
|
||||
|
@ -64,8 +70,7 @@ let from_function f =
|
|||
lex_abs_pos = - 2048;
|
||||
lex_start_pos = 2048;
|
||||
lex_curr_pos = 2048;
|
||||
lex_last_pos = 2048;
|
||||
lex_last_action = dummy_action }
|
||||
lex_last_pos = 2048 }
|
||||
|
||||
let from_channel ic =
|
||||
from_function (fun buf n -> input ic buf 0 n)
|
||||
|
@ -78,22 +83,7 @@ let from_string s =
|
|||
lex_abs_pos = 0;
|
||||
lex_start_pos = 0;
|
||||
lex_curr_pos = 0;
|
||||
lex_last_pos = 0;
|
||||
lex_last_action = dummy_action }
|
||||
|
||||
let get_next_char lexbuf =
|
||||
let p = lexbuf.lex_curr_pos in
|
||||
if p < lexbuf.lex_buffer_len then begin
|
||||
let c = String.unsafe_get lexbuf.lex_buffer p in
|
||||
lexbuf.lex_curr_pos <- p + 1;
|
||||
c
|
||||
end else begin
|
||||
lexbuf.refill_buff lexbuf;
|
||||
let p = lexbuf.lex_curr_pos in
|
||||
let c = String.unsafe_get lexbuf.lex_buffer p in
|
||||
lexbuf.lex_curr_pos <- p + 1;
|
||||
c
|
||||
end
|
||||
lex_last_pos = 0 }
|
||||
|
||||
let lexeme lexbuf =
|
||||
let len = lexbuf.lex_curr_pos - lexbuf.lex_start_pos in
|
||||
|
@ -104,14 +94,6 @@ let lexeme lexbuf =
|
|||
let lexeme_char lexbuf i =
|
||||
String.get lexbuf.lex_buffer (lexbuf.lex_start_pos + i)
|
||||
|
||||
let start_lexing lexbuf =
|
||||
lexbuf.lex_start_pos <- lexbuf.lex_curr_pos;
|
||||
lexbuf.lex_last_action <- dummy_action
|
||||
|
||||
let backtrack lexbuf =
|
||||
lexbuf.lex_curr_pos <- lexbuf.lex_last_pos;
|
||||
Obj.magic(lexbuf.lex_last_action lexbuf)
|
||||
|
||||
let lexeme_start lexbuf =
|
||||
lexbuf.lex_abs_pos + lexbuf.lex_start_pos
|
||||
and lexeme_end lexbuf =
|
||||
|
|
|
@ -22,8 +22,7 @@ type lexbuf =
|
|||
mutable lex_abs_pos : int;
|
||||
mutable lex_start_pos : int;
|
||||
mutable lex_curr_pos : int;
|
||||
mutable lex_last_pos : int;
|
||||
mutable lex_last_action : lexbuf -> Obj.t }
|
||||
mutable lex_last_pos : int }
|
||||
(* The type of lexer buffers. A lexer buffer is the argument passed
|
||||
to the scanning functions defined by the generated scanners.
|
||||
The lexer buffer holds the current state of the scanner, plus
|
||||
|
@ -77,6 +76,11 @@ val lexeme_end : lexbuf -> int
|
|||
(* The following definitions are used by the generated scanners only.
|
||||
They are not intended to be used by user programs. *)
|
||||
|
||||
val start_lexing : lexbuf -> unit
|
||||
val get_next_char : lexbuf -> char
|
||||
val backtrack : lexbuf -> 'a
|
||||
type lex_tables =
|
||||
{ lex_base: string;
|
||||
lex_backtrk: string;
|
||||
lex_default: string;
|
||||
lex_trans: string;
|
||||
lex_check: string }
|
||||
|
||||
external engine: lex_tables -> int -> lexbuf -> int = "lex_engine"
|
||||
|
|
Loading…
Reference in New Issue