csllex utilise un automate a pile

git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@667 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
master
Xavier Leroy 1996-02-25 14:45:47 +00:00
parent ede06e157d
commit 22bc127a92
16 changed files with 459 additions and 314 deletions

View File

@ -406,7 +406,7 @@ realclean::
csltools:
cd tools; $(MAKE) all
realclean::
clean::
cd tools; $(MAKE) clean
alldepend::
cd tools; $(MAKE) depend

View File

@ -7,12 +7,13 @@ DFLAGS=-g -DDEBUG $(BYTECCCOMPOPTS)
OBJS=interp.o misc.o stacks.o fix_code.o main.o fail.o signals.o \
freelist.o major_gc.o minor_gc.o memory.o alloc.o roots.o \
compare.o ints.o floats.o str.o array.o io.o extern.o intern.o \
hash.o sys.o meta.o parsing.o gc_ctrl.o terminfo.o md5.o obj.o
hash.o sys.o meta.o parsing.o gc_ctrl.o terminfo.o md5.o obj.o \
lexing.o
DOBJS=$(OBJS:.o=.d.o) instrtrace.d.o
PRIMS=array.c compare.c extern.c floats.c gc_ctrl.c hash.c \
intern.c interp.c ints.c io.c md5.c meta.c obj.c parsing.c \
intern.c interp.c ints.c io.c lexing.c md5.c meta.c obj.c parsing.c \
signals.c str.c sys.c terminfo.c
all: cslrun

21
configure vendored
View File

@ -11,7 +11,7 @@
# #
#*********************************************************************#
# $Id
# $Id$
bindir=/usr/local/bin
libdir=/usr/local/lib/camlsl
@ -58,14 +58,17 @@ echo " manual pages.............. $mandir (with extension .$manext)"
# Determine the system type
case "$host_type" in
unknown)
if host_type=`./config.guess`; then :; else
echo "Cannot guess host type"
echo "You must specify one with the -host option"
fi;;
esac
host=`./config.sub $host_type`
if test "$host_type" = "unknown"; then
if host_type=`./config.guess`; then :; else
echo "Cannot guess host type"
echo "You must specify one with the -host option"
exit 2
fi
fi
if host=`./config.sub $host_type`; then :; else
echo "Please specify the correct host type with the -host option"
exit 2
fi
echo "Configuring for a $host ..."
# Do we have gcc?

View File

@ -1,12 +1,17 @@
compact.cmi: lexgen.cmi
lexer.cmi: parser.cmi
parser.cmi: syntax.cmo
lexer.cmo: parser.cmi syntax.cmo lexer.cmi
lexer.cmx: parser.cmx syntax.cmx lexer.cmi
lexgen.cmo: syntax.cmo
lexgen.cmx: syntax.cmx
main.cmo: lexer.cmi lexgen.cmo output.cmo parser.cmi syntax.cmo
main.cmx: lexer.cmx lexgen.cmx output.cmx parser.cmx syntax.cmx
output.cmo: syntax.cmo
output.cmx: syntax.cmx
parser.cmo: syntax.cmo parser.cmi
parser.cmx: syntax.cmx parser.cmi
lexgen.cmi: syntax.cmi
output.cmi: compact.cmi lexgen.cmi syntax.cmi
parser.cmi: syntax.cmi
compact.cmo: lexgen.cmi compact.cmi
compact.cmx: lexgen.cmx compact.cmi
lexer.cmo: parser.cmi syntax.cmi lexer.cmi
lexer.cmx: parser.cmx syntax.cmi lexer.cmi
lexgen.cmo: syntax.cmi lexgen.cmi
lexgen.cmx: syntax.cmi lexgen.cmi
main.cmo: compact.cmi lexer.cmi lexgen.cmi output.cmi parser.cmi syntax.cmi
main.cmx: compact.cmx lexer.cmx lexgen.cmx output.cmx parser.cmx syntax.cmi
output.cmo: compact.cmi lexgen.cmi syntax.cmi output.cmi
output.cmx: compact.cmx lexgen.cmx syntax.cmi output.cmi
parser.cmo: syntax.cmi parser.cmi
parser.cmx: syntax.cmi parser.cmi

View File

@ -9,7 +9,7 @@ CAMLLEX=../boot/cslrun ../boot/csllex
CAMLDEP=../boot/cslrun ../tools/csldep
DEPFLAGS=
OBJS=syntax.cmo parser.cmo lexer.cmo lexgen.cmo output.cmo main.cmo
OBJS=parser.cmo lexer.cmo lexgen.cmo compact.cmo output.cmo main.cmo
all: csllex

120
lex/compact.ml Normal file
View File

@ -0,0 +1,120 @@
(***********************************************************************)
(* *)
(* Caml Special Light *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1995 Institut National de Recherche en Informatique et *)
(* Automatique. Distributed only by permission. *)
(* *)
(***********************************************************************)
(* $Id$ *)
(* Compaction of an automata *)
open Lexgen
(* Determine the integer occurring most frequently in an array *)
let most_frequent_elt v =
let frequencies = Hashtbl.new 17 in
let max_freq = ref 0 in
let most_freq = ref (v.(0)) in
for i = 0 to Array.length v - 1 do
let e = v.(i) in
let r =
try
Hashtbl.find frequencies e
with Not_found ->
let r = ref 1 in Hashtbl.add frequencies e r; r in
incr r;
if !r > !max_freq then begin max_freq := !r; most_freq := e end
done;
!most_freq
(* Transform an array into a list of (position, non-default element) *)
let non_default_elements def v =
let rec nondef i =
if i >= Array.length v then [] else begin
let e = v.(i) in
if e = def then nondef(i+1) else (i, e) :: nondef(i+1)
end in
nondef 0
(* Compact the transition and check arrays *)
let trans = ref(Array.new 1024 0)
and check = ref(Array.new 1024 (-1))
and last_used = ref 0
let grow_transitions () =
let old_trans = !trans
and old_check = !check in
let n = Array.length old_trans in
trans := Array.new (2*n) 0;
Array.blit old_trans 0 !trans 0 !last_used;
check := Array.new (2*n) (-1);
Array.blit old_check 0 !check 0 !last_used
let pack_moves state_num move_t =
let move_v = Array.new 256 0 in
for i = 0 to 255 do
move_v.(i) <-
(match move_t.(i) with
Backtrack -> -1
| Goto n -> n)
done;
let default = most_frequent_elt move_v in
let nondef = non_default_elements default move_v in
let rec pack_from b =
while b + 256 > Array.length !trans do grow_transitions() done;
let rec try_pack = function
[] -> b
| (pos, v) :: rem ->
if !check.(b + pos) = -1 then try_pack rem else pack_from (b+1) in
try_pack nondef in
let base = pack_from 0 in
List.iter
(fun (pos, v) ->
!trans.(base + pos) <- v;
!check.(base + pos) <- state_num)
nondef;
if base + 256 > !last_used then last_used := base + 256;
(base, default)
(* Build the tables *)
type lex_tables =
{ tbl_base: int array; (* Perform / Shift *)
tbl_backtrk: int array; (* No_remember / Remember *)
tbl_default: int array; (* Default transition *)
tbl_trans: int array; (* Transitions (compacted) *)
tbl_check: int array } (* Check (compacted) *)
let compact_tables state_v =
let n = Array.length state_v in
let base = Array.new n 0
and backtrk = Array.new n (-1)
and default = Array.new n 0 in
for i = 0 to n - 1 do
match state_v.(i) with
Perform n ->
base.(i) <- -(n+1)
| Shift(trans, move) ->
begin match trans with
No_remember -> ()
| Remember n -> backtrk.(i) <- n
end;
let (b, d) = pack_moves i move in
base.(i) <- b;
default.(i) <- d
done;
{ tbl_base = base;
tbl_backtrk = backtrk;
tbl_default = default;
tbl_trans = Array.sub !trans 0 !last_used;
tbl_check = Array.sub !check 0 !last_used }

23
lex/compact.mli Normal file
View File

@ -0,0 +1,23 @@
(***********************************************************************)
(* *)
(* Caml Special Light *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1995 Institut National de Recherche en Informatique et *)
(* Automatique. Distributed only by permission. *)
(* *)
(***********************************************************************)
(* $Id$ *)
(* Compaction of an automata *)
type lex_tables =
{ tbl_base: int array; (* Perform / Shift *)
tbl_backtrk: int array; (* No_remember / Remember *)
tbl_default: int array; (* Default transition *)
tbl_trans: int array; (* Transitions (compacted) *)
tbl_check: int array } (* Check (compacted) *)
val compact_tables: Lexgen.automata array -> lex_tables

View File

@ -25,6 +25,30 @@ type regexp =
| Alt of regexp * regexp
| Star of regexp
type lexer_entry =
{ lex_name: string;
lex_regexp: regexp;
lex_actions: (int * location) list }
(* Representation of automata *)
type automata =
Perform of int
| Shift of automata_trans * automata_move array
and automata_trans =
No_remember
| Remember of int
and automata_move =
Backtrack
| Goto of int
(* Representation of entry points *)
type automata_entry =
{ auto_name: string;
auto_initial_state: int;
auto_actions: (int * location) list }
(* From shallow to deep syntax *)
let chars = ref ([] : char list list)
@ -46,29 +70,33 @@ let rec encode_regexp = function
| Repetition r ->
Star (encode_regexp r)
let encode_casedef =
let encode_casedef casedef =
List.fold_left
(fun reg (expr,act) ->
(fun reg (expr, act) ->
let act_num = !actions_count in
incr actions_count;
actions := (act_num, act) :: !actions;
Alt(reg, Seq(encode_regexp expr, Action act_num)))
Empty
Empty
casedef
let encode_lexdef def =
chars := [];
chars_count := 0;
actions := [];
actions_count := 0;
let name_regexp_list =
let entry_list =
List.map
(fun (name, casedef) -> (name, encode_casedef casedef))
(fun (entry_name, casedef) ->
actions := [];
actions_count := 0;
let re = encode_casedef casedef in
{ lex_name = entry_name;
lex_regexp = re;
lex_actions = List.rev !actions })
def.entrypoints in
let chr = Array.of_list (List.rev !chars)
and act = !actions in
let chr = Array.of_list (List.rev !chars) in
chars := [];
actions := [];
(chr, name_regexp_list, act)
(chr, entry_list)
(* To generate directly a NFA from a regular expression.
Confer Aho-Sethi-Ullman, dragon book, chap. 3 *)
@ -77,22 +105,8 @@ type transition =
OnChars of int
| ToAction of int
let rec merge_trans s1 s2 =
match (s1, s2) with
([], _) -> s2
| (_, []) -> s1
| ((OnChars n1 as t1) :: r1, (OnChars n2 as t2) :: r2) ->
if n1 == n2 then t1 :: merge_trans r1 r2 else
if n1 < n2 then t1 :: merge_trans r1 s2 else
t2 :: merge_trans s1 r2
| ((ToAction n1 as t1) :: r1, (ToAction n2 as t2) :: r2) ->
if n1 == n2 then t1 :: merge_trans r1 r2 else
if n1 < n2 then t1 :: merge_trans r1 s2 else
t2 :: merge_trans s1 r2
| ((OnChars n1 as t1) :: r1, (ToAction n2 as t2) :: r2) ->
t1 :: merge_trans r1 s2
| ((ToAction n1 as t1) :: r1, (OnChars n2 as t2) :: r2) ->
t2 :: merge_trans s1 r2
module TransSet =
Set.Make(struct type t = transition let compare = compare end)
let rec nullable = function
Empty -> true
@ -103,90 +117,99 @@ let rec nullable = function
| Star r -> true
let rec firstpos = function
Empty -> []
| Chars pos -> [OnChars pos]
| Action act -> [ToAction act]
Empty -> TransSet.empty
| Chars pos -> TransSet.add (OnChars pos) TransSet.empty
| Action act -> TransSet.add (ToAction act) TransSet.empty
| Seq(r1,r2) -> if nullable r1
then merge_trans (firstpos r1) (firstpos r2)
then TransSet.union (firstpos r1) (firstpos r2)
else firstpos r1
| Alt(r1,r2) -> merge_trans (firstpos r1) (firstpos r2)
| Alt(r1,r2) -> TransSet.union (firstpos r1) (firstpos r2)
| Star r -> firstpos r
let rec lastpos = function
Empty -> []
| Chars pos -> [OnChars pos]
| Action act -> [ToAction act]
Empty -> TransSet.empty
| Chars pos -> TransSet.add (OnChars pos) TransSet.empty
| Action act -> TransSet.add (ToAction act) TransSet.empty
| Seq(r1,r2) -> if nullable r2
then merge_trans (lastpos r1) (lastpos r2)
then TransSet.union (lastpos r1) (lastpos r2)
else lastpos r2
| Alt(r1,r2) -> merge_trans (lastpos r1) (lastpos r2)
| Alt(r1,r2) -> TransSet.union (lastpos r1) (lastpos r2)
| Star r -> lastpos r
let followpos size name_regexp_list =
let v = Array.new size [] in
let fill_pos first = function
OnChars pos -> v.(pos) <- merge_trans first v.(pos); ()
| ToAction _ -> () in
let rec fill = function
Seq(r1,r2) ->
fill r1; fill r2;
List.iter (fill_pos (firstpos r2)) (lastpos r1)
| Alt(r1,r2) ->
fill r1; fill r2
| Star r ->
fill r;
List.iter (fill_pos (firstpos r)) (lastpos r)
| _ -> () in
List.iter (fun (name, regexp) -> fill regexp) name_regexp_list;
v
let followpos size entry_list =
let v = Array.new size TransSet.empty in
let fill_pos first = function
OnChars pos -> v.(pos) <- TransSet.union first v.(pos)
| ToAction _ -> () in
let rec fill = function
Seq(r1,r2) ->
fill r1; fill r2;
TransSet.iter (fill_pos (firstpos r2)) (lastpos r1)
| Alt(r1,r2) ->
fill r1; fill r2
| Star r ->
fill r;
TransSet.iter (fill_pos (firstpos r)) (lastpos r)
| _ -> () in
List.iter (fun entry -> fill entry.lex_regexp) entry_list;
v
let no_action = 32767
let no_action = max_int
let split_trans_set = List.fold_left
(fun (act, pos_set as act_pos_set) ->
function OnChars pos -> (act, pos :: pos_set)
| ToAction act1 -> if act1 < act then (act1, pos_set)
else act_pos_set)
(no_action, [])
let split_trans_set trans_set =
TransSet.fold
(fun trans (act, pos_set as act_pos_set) ->
match trans with
OnChars pos -> (act, pos :: pos_set)
| ToAction act1 -> if act1 < act then (act1, pos_set) else act_pos_set)
trans_set
(no_action, [])
let memory = (Hashtbl.new 131 : (transition list, int) Hashtbl.t)
and todo = ref ([] : (transition list * int) list)
and next = ref 0
module StateMap =
Map.Make(struct type t = TransSet.t let compare = TransSet.compare end)
let state_map = ref (StateMap.empty: int StateMap.t)
let todo = (Stack.new() : (TransSet.t * int) Stack.t)
let next_state_num = ref 0
let reset_state_mem () =
Hashtbl.clear memory; todo := []; next := 0; ()
state_map := StateMap.empty;
Stack.clear todo;
next_state_num := 0
let get_state st =
try
Hashtbl.find memory st
StateMap.find st !state_map
with Not_found ->
let nbr = !next in
incr next;
Hashtbl.add memory st nbr;
todo := (st, nbr) :: !todo;
nbr
let num = !next_state_num in
incr next_state_num;
state_map := StateMap.add st num !state_map;
Stack.push (st, num) todo;
num
let rec map_on_states f =
match !todo with
[] -> []
| (st,i)::r -> todo := r; let res = f st in (res,i) :: map_on_states f
let map_on_all_states f =
let res = ref [] in
begin try
while true do
let (st, i) = Stack.pop todo in
let r = f st in
res := (r, i) :: !res
done
with Stack.Empty -> ()
end;
!res
let number_of_states () =
!next
let goto_state = function
[] -> Backtrack
| ps -> Goto (get_state ps)
let goto_state st =
if TransSet.is_empty st then Backtrack else Goto (get_state st)
let transition_from chars follow pos_set =
let tr = Array.new 256 []
and shift = Array.new 256 Backtrack in
let tr = Array.new 256 TransSet.empty in
let shift = Array.new 256 Backtrack in
List.iter
(fun pos ->
List.iter
(fun c ->
tr.(Char.code c) <-
merge_trans tr.(Char.code c) follow.(pos))
tr.(Char.code c) <- TransSet.union tr.(Char.code c) follow.(pos))
chars.(pos))
pos_set;
for i = 0 to 255 do
@ -196,23 +219,23 @@ let transition_from chars follow pos_set =
let translate_state chars follow state =
match split_trans_set state with
n, [] -> Perform n
| n, ps -> Shift( (if n == no_action then No_remember else Remember n),
transition_from chars follow ps)
(n, []) -> Perform n
| (n, ps) -> Shift((if n = no_action then No_remember else Remember n),
transition_from chars follow ps)
let make_dfa lexdef =
let (chars, name_regexp_list, actions) =
encode_lexdef lexdef in
let follow =
followpos (Array.length chars) name_regexp_list in
let (chars, entry_list) = encode_lexdef lexdef in
let follow = followpos (Array.length chars) entry_list in
reset_state_mem();
let initial_states =
List.map (fun (name, regexp) -> (name, get_state(firstpos regexp)))
name_regexp_list in
let states =
map_on_states (translate_state chars follow) in
let v =
Array.new (number_of_states()) (Perform 0) in
List.iter (fun (auto, i) -> v.(i) <- auto) states;
List.map
(fun le ->
{ auto_name = le.lex_name;
auto_initial_state = get_state(firstpos le.lex_regexp);
auto_actions = le.lex_actions })
entry_list in
let states = map_on_all_states (translate_state chars follow) in
let actions = Array.new !next_state_num (Perform 0) in
List.iter (fun (act, i) -> actions.(i) <- act) states;
reset_state_mem();
(initial_states, v, actions)
(initial_states, actions)

35
lex/lexgen.mli Normal file
View File

@ -0,0 +1,35 @@
(***********************************************************************)
(* *)
(* Caml Special Light *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1995 Institut National de Recherche en Informatique et *)
(* Automatique. Distributed only by permission. *)
(* *)
(***********************************************************************)
(* $Id$ *)
(* Representation of automata *)
type automata =
Perform of int
| Shift of automata_trans * automata_move array
and automata_trans =
No_remember
| Remember of int
and automata_move =
Backtrack
| Goto of int
(* Representation of entry points *)
type automata_entry =
{ auto_name: string;
auto_initial_state: int;
auto_actions: (int * Syntax.location) list }
(* The entry point *)
val make_dfa: Syntax.lexer_definition -> automata_entry list * automata array

View File

@ -28,15 +28,14 @@ let main () =
Filename.chop_suffix source_name ".mll" ^ ".ml"
else
source_name ^ ".ml" in
ic := open_in_bin source_name;
oc := open_out dest_name;
let lexbuf =
Lexing.from_channel !ic in
let ic = open_in_bin source_name in
let oc = open_out dest_name in
let lexbuf = Lexing.from_channel ic in
let def =
try
Parser.lexer_definition Lexer.main lexbuf
with exn ->
close_out !oc;
close_out oc;
Sys.remove dest_name;
begin match exn with
Parsing.Parse_error ->
@ -52,10 +51,11 @@ let main () =
| _ -> raise exn
end;
exit 2 in
let ((init, states, acts) as dfa) = make_dfa def in
output_lexdef def.header dfa def.trailer;
close_in !ic;
close_out !oc
let (entries, transitions) = Lexgen.make_dfa def in
let tables = Compact.compact_tables transitions in
Output.output_lexdef ic oc def.header tables entries def.trailer;
close_in ic;
close_out oc
let _ = Printexc.catch main (); exit 0

View File

@ -11,151 +11,88 @@
(* $Id$ *)
(* Generating a DFA as a set of mutually recursive functions *)
(* Output the DFA tables and its entry points *)
open Printf
open Syntax
open Lexgen
open Compact
let ic = ref stdin
and oc = ref stdout
(* 1- Generating the actions *)
(* To copy the ML code fragments *)
let copy_buffer = String.create 1024
let copy_chunk (Location(start,stop)) =
let rec copy s =
if s <= 0 then () else
let n = if s < 1024 then s else 1024 in
let m = input !ic copy_buffer 0 n in
output !oc copy_buffer 0 m;
copy (s - m)
in
seek_in !ic start;
copy (stop - start)
let copy_chunk ic oc (Location(start,stop)) =
seek_in ic start;
let n = ref (stop - start) in
while !n > 0 do
let m = input ic copy_buffer 0 (min !n 1024) in
output oc copy_buffer 0 m;
n := !n - m
done
let output_action (i,act) =
output_string !oc ("action_" ^ string_of_int i ^ " lexbuf = (\n");
copy_chunk act;
output_string !oc ")\nand ";
()
(* To output an array of short ints, encoded as a string *)
(* 2- Generating the states *)
let output_byte oc b =
output_char oc '\\';
output_char oc (Char.chr(48 + b / 100));
output_char oc (Char.chr(48 + (b / 10) mod 10));
output_char oc (Char.chr(48 + b mod 10))
let states = ref ([||] : automata array)
let enumerate_vect v =
let rec enum env pos =
if pos >= Array.length v then env else
try
let pl = List.assoc v.(pos) env in
pl := pos :: !pl; enum env (succ pos)
with Not_found ->
enum ((v.(pos), ref [pos]) :: env) (succ pos) in
Sort.list
(fun (e1, pl1) (e2, pl2) -> List.length !pl1 >= List.length !pl2)
(enum [] 0)
let output_move = function
Backtrack ->
output_string !oc "backtrack lexbuf"
| Goto dest ->
match !states.(dest) with
Perform act_num ->
output_string !oc ("action_" ^ string_of_int act_num ^ " lexbuf")
| _ ->
output_string !oc ("state_" ^ string_of_int dest ^ " lexbuf")
let output_char_for_read oc = function
'\'' -> output_string oc "\\'"
| '\\' -> output_string oc "\\\\"
| '\n' -> output_string oc "\\n"
| '\t' -> output_string oc "\\t"
| c ->
let n = Char.code c in
if n >= 32 & n < 127 then
output_char oc c
else begin
output_char oc '\\';
output_char oc (Char.chr (48 + n / 100));
output_char oc (Char.chr (48 + (n / 10) mod 10));
output_char oc (Char.chr (48 + n mod 10))
end
let rec output_chars = function
[] ->
failwith "output_chars"
| [c] ->
output_string !oc "'";
output_char_for_read !oc (Char.chr c);
output_string !oc "'"
| c::cl ->
output_string !oc "'";
output_char_for_read !oc (Char.chr c);
output_string !oc "'|";
output_chars cl
let output_one_trans (dest, chars) =
output_chars !chars;
output_string !oc " -> ";
output_move dest;
output_string !oc "\n | ";
()
let output_all_trans trans =
output_string !oc " match get_next_char lexbuf with\n ";
match enumerate_vect trans with
[] ->
failwith "output_all_trans"
| (default, _) :: rest ->
List.iter output_one_trans rest;
output_string !oc "_ -> ";
output_move default;
output_string !oc "\nand ";
()
let output_state state_num = function
Perform i ->
()
| Shift(what_to_do, moves) ->
output_string !oc
("state_" ^ string_of_int state_num ^ " lexbuf =\n");
begin match what_to_do with
No_remember -> ()
| Remember i ->
output_string !oc " lexbuf.lex_last_pos <- lexbuf.lex_curr_pos;\n";
output_string !oc (" lexbuf.lex_last_action <- Obj.magic action_" ^
string_of_int i ^ ";\n")
end;
output_all_trans moves
(* 3- Generating the entry points *)
let rec output_entries = function
[] -> failwith "output_entries"
| (name,state_num) :: rest ->
output_string !oc (name ^ " lexbuf =\n");
output_string !oc " start_lexing lexbuf;\n";
output_string !oc (" state_" ^ string_of_int state_num ^ " lexbuf\n");
match rest with
[] -> output_string !oc "\n\n"; ()
| _ -> output_string !oc "\nand "; output_entries rest
(* All together *)
let output_lexdef header (initial_st, st, actions) trailer =
print_int (Array.length st); print_string " states, ";
print_int (List.length actions); print_string " actions.";
print_newline();
output_string !oc "open Obj\nopen Lexing\n\n";
copy_chunk header;
output_string !oc "\nlet rec ";
states := st;
List.iter output_action actions;
for i = 0 to Array.length st - 1 do
output_state i st.(i)
let output_array oc v =
output_string oc " \"";
for i = 0 to Array.length v - 1 do
output_byte oc (v.(i) land 0xFF);
output_byte oc ((v.(i) asr 8) land 0xFF);
if i land 7 = 7 then output_string oc "\\\n "
done;
output_entries initial_st;
copy_chunk trailer
output_string oc "\""
(* Output the tables *)
let output_tables oc tbl =
output_string oc "let lex_tables = {\n";
fprintf oc " Lexing.lex_base = \n%a;\n" output_array tbl.tbl_base;
fprintf oc " Lexing.lex_backtrk = \n%a;\n" output_array tbl.tbl_backtrk;
fprintf oc " Lexing.lex_default = \n%a;\n" output_array tbl.tbl_default;
fprintf oc " Lexing.lex_trans = \n%a;\n" output_array tbl.tbl_trans;
fprintf oc " Lexing.lex_check = \n%a\n" output_array tbl.tbl_check;
output_string oc "}\n\n"
(* Output the entries *)
let output_entry ic oc e =
fprintf oc "%s lexbuf =\n" e.auto_name;
fprintf oc " match Lexing.engine lex_tables %d lexbuf with\n "
e.auto_initial_state;
let first = ref true in
List.iter
(fun (num, loc) ->
if !first then first := false else fprintf oc " | ";
fprintf oc "%d -> (" num;
copy_chunk ic oc loc;
fprintf oc ")\n")
e.auto_actions;
fprintf oc " | _ -> failwith \"%s: empty token\"\n\n" e.auto_name
(* Main output function *)
let output_lexdef ic oc header tables entry_points trailer =
Printf.printf "%d states, %d transitions, table size %d bytes\n"
(Array.length tables.tbl_base)
(Array.length tables.tbl_trans)
(2 * (Array.length tables.tbl_base + Array.length tables.tbl_backtrk +
Array.length tables.tbl_default + Array.length tables.tbl_trans +
Array.length tables.tbl_check));
flush stdout;
copy_chunk ic oc header;
output_tables oc tables;
begin match entry_points with
[] -> ()
| entry1 :: entries ->
output_string oc "let rec "; output_entry ic oc entry1;
List.iter
(fun e -> output_string oc "and "; output_entry ic oc e)
entries
end;
copy_chunk ic oc trailer

22
lex/output.mli Normal file
View File

@ -0,0 +1,22 @@
(***********************************************************************)
(* *)
(* Caml Special Light *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1995 Institut National de Recherche en Informatique et *)
(* Automatique. Distributed only by permission. *)
(* *)
(***********************************************************************)
(* $Id$ *)
(* Output the DFA tables and its entry points *)
val output_lexdef:
in_channel -> out_channel ->
Syntax.location ->
Compact.lex_tables ->
Lexgen.automata_entry list ->
Syntax.location ->
unit

View File

@ -28,14 +28,3 @@ type lexer_definition =
entrypoints: (string * (regular_expression * location) list) list;
trailer: location }
(* Representation of automata *)
type automata =
Perform of int
| Shift of automata_trans * automata_move array
and automata_trans =
No_remember
| Remember of int
and automata_move =
Backtrack
| Goto of int

View File

@ -164,7 +164,8 @@ rule token = parse
let string_start = Lexing.lexeme_start lexbuf in
start_pos := string_start;
string lexbuf;
lexbuf.lex_start_pos <- string_start - lexbuf.lex_abs_pos;
lexbuf.Lexing.lex_start_pos <-
string_start - lexbuf.Lexing.lex_abs_pos;
STRING (get_stored_string()) }
| "'" [^ '\\' '\''] "'"
{ CHAR(Lexing.lexeme_char lexbuf 1) }

View File

@ -20,8 +20,16 @@ type lexbuf =
mutable lex_abs_pos : int;
mutable lex_start_pos : int;
mutable lex_curr_pos : int;
mutable lex_last_pos : int;
mutable lex_last_action : lexbuf -> Obj.t }
mutable lex_last_pos : int }
type lex_tables =
{ lex_base: string;
lex_backtrk: string;
lex_default: string;
lex_trans: string;
lex_check: string }
external engine: lex_tables -> int -> lexbuf -> int = "lex_engine"
let lex_aux_buffer = String.create 1024
@ -55,8 +63,6 @@ let lex_refill read_fun lexbuf =
lexbuf.lex_start_pos <- lexbuf.lex_start_pos - n;
lexbuf.lex_last_pos <- lexbuf.lex_last_pos - n
let dummy_action x = failwith "lexing: empty token"
let from_function f =
{ refill_buff = lex_refill f;
lex_buffer = String.create 2048;
@ -64,8 +70,7 @@ let from_function f =
lex_abs_pos = - 2048;
lex_start_pos = 2048;
lex_curr_pos = 2048;
lex_last_pos = 2048;
lex_last_action = dummy_action }
lex_last_pos = 2048 }
let from_channel ic =
from_function (fun buf n -> input ic buf 0 n)
@ -78,22 +83,7 @@ let from_string s =
lex_abs_pos = 0;
lex_start_pos = 0;
lex_curr_pos = 0;
lex_last_pos = 0;
lex_last_action = dummy_action }
let get_next_char lexbuf =
let p = lexbuf.lex_curr_pos in
if p < lexbuf.lex_buffer_len then begin
let c = String.unsafe_get lexbuf.lex_buffer p in
lexbuf.lex_curr_pos <- p + 1;
c
end else begin
lexbuf.refill_buff lexbuf;
let p = lexbuf.lex_curr_pos in
let c = String.unsafe_get lexbuf.lex_buffer p in
lexbuf.lex_curr_pos <- p + 1;
c
end
lex_last_pos = 0 }
let lexeme lexbuf =
let len = lexbuf.lex_curr_pos - lexbuf.lex_start_pos in
@ -104,14 +94,6 @@ let lexeme lexbuf =
let lexeme_char lexbuf i =
String.get lexbuf.lex_buffer (lexbuf.lex_start_pos + i)
let start_lexing lexbuf =
lexbuf.lex_start_pos <- lexbuf.lex_curr_pos;
lexbuf.lex_last_action <- dummy_action
let backtrack lexbuf =
lexbuf.lex_curr_pos <- lexbuf.lex_last_pos;
Obj.magic(lexbuf.lex_last_action lexbuf)
let lexeme_start lexbuf =
lexbuf.lex_abs_pos + lexbuf.lex_start_pos
and lexeme_end lexbuf =

View File

@ -22,8 +22,7 @@ type lexbuf =
mutable lex_abs_pos : int;
mutable lex_start_pos : int;
mutable lex_curr_pos : int;
mutable lex_last_pos : int;
mutable lex_last_action : lexbuf -> Obj.t }
mutable lex_last_pos : int }
(* The type of lexer buffers. A lexer buffer is the argument passed
to the scanning functions defined by the generated scanners.
The lexer buffer holds the current state of the scanner, plus
@ -77,6 +76,11 @@ val lexeme_end : lexbuf -> int
(* The following definitions are used by the generated scanners only.
They are not intended to be used by user programs. *)
val start_lexing : lexbuf -> unit
val get_next_char : lexbuf -> char
val backtrack : lexbuf -> 'a
type lex_tables =
{ lex_base: string;
lex_backtrk: string;
lex_default: string;
lex_trans: string;
lex_check: string }
external engine: lex_tables -> int -> lexbuf -> int = "lex_engine"