169 lines
4.8 KiB
OCaml
169 lines
4.8 KiB
OCaml
(***********************************************************************)
|
|
(* *)
|
|
(* Objective Caml *)
|
|
(* *)
|
|
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
(* *)
|
|
(* Copyright 1996 Institut National de Recherche en Informatique et *)
|
|
(* Automatique. Distributed only by permission. *)
|
|
(* *)
|
|
(***********************************************************************)
|
|
|
|
(* $Id$ *)
|
|
|
|
(* Generating a DFA as a set of mutually recursive functions *)
|
|
|
|
open Syntax
|
|
|
|
let ic = ref stdin
|
|
let oc = ref stdout
|
|
|
|
(* 1- Generating the actions *)
|
|
|
|
let copy_buffer = String.create 1024
|
|
|
|
let copy_chunk (Location(start,stop)) =
|
|
seek_in !ic start;
|
|
let tocopy = ref(stop - start) in
|
|
while !tocopy > 0 do
|
|
let m =
|
|
input !ic copy_buffer 0 (min !tocopy (String.length copy_buffer)) in
|
|
output !oc copy_buffer 0 m;
|
|
tocopy := !tocopy - m
|
|
done
|
|
|
|
|
|
let output_action (i,act) =
|
|
output_string !oc ("action_" ^ string_of_int i ^ " lexbuf = (\n");
|
|
copy_chunk act;
|
|
output_string !oc ")\nand "
|
|
|
|
|
|
(* 2- Generating the states *)
|
|
|
|
let states = ref ([||] : automata array)
|
|
|
|
type occurrence =
|
|
{ mutable pos: int list;
|
|
mutable freq: int }
|
|
|
|
let enumerate_vect v =
|
|
let env = ref [] in
|
|
for pos = 0 to Array.length v - 1 do
|
|
try
|
|
let occ = List.assoc v.(pos) !env in
|
|
occ.pos <- pos :: occ.pos;
|
|
occ.freq <- occ.freq + 1
|
|
with Not_found ->
|
|
env := (v.(pos), {pos = [pos]; freq = 1 }) :: !env
|
|
done;
|
|
Sort.list (fun (e1, occ1) (e2, occ2) -> occ1.freq >= occ2.freq) !env
|
|
|
|
|
|
let output_move = function
|
|
Backtrack ->
|
|
output_string !oc "lexing.backtrack lexbuf"
|
|
| Goto dest ->
|
|
match !states.(dest) with
|
|
Perform act_num ->
|
|
output_string !oc ("action_" ^ string_of_int act_num ^ " lexbuf")
|
|
| _ ->
|
|
output_string !oc ("state_" ^ string_of_int dest ^ " lexbuf")
|
|
|
|
|
|
(* Cannot use standard char_for_read because the characters to escape
|
|
are not the same in CL6 and CL1999. *)
|
|
|
|
let output_char_lit oc = function
|
|
'\'' -> output_string oc "\\'"
|
|
| '\\' -> output_string oc "\\\\"
|
|
| '\n' -> output_string oc "\\n"
|
|
| '\t' -> output_string oc "\\t"
|
|
| c -> if Char.code c >= 32 & Char.code c < 128 then
|
|
output_char oc c
|
|
else begin
|
|
let n = Char.code c in
|
|
output_char oc '\\';
|
|
output_char oc (Char.chr (48 + n / 100));
|
|
output_char oc (Char.chr (48 + (n / 10) mod 10));
|
|
output_char oc (Char.chr (48 + n mod 10))
|
|
end
|
|
|
|
let rec output_chars = function
|
|
[] ->
|
|
failwith "output_chars"
|
|
| [c] ->
|
|
output_string !oc "'";
|
|
output_char_lit !oc (Char.chr c);
|
|
output_string !oc "'"
|
|
| c::cl ->
|
|
output_string !oc "'";
|
|
output_char_lit !oc (Char.chr c);
|
|
output_string !oc "'|";
|
|
output_chars cl
|
|
|
|
let output_one_trans (dest, occ) =
|
|
output_chars occ.pos;
|
|
output_string !oc " -> ";
|
|
output_move dest;
|
|
output_string !oc "\n | "
|
|
|
|
let output_all_trans trans =
|
|
output_string !oc " match lexing.next_char lexbuf with\n ";
|
|
match enumerate_vect trans with
|
|
[] ->
|
|
failwith "output_all_trans"
|
|
| (default, _) :: rest ->
|
|
List.iter output_one_trans rest;
|
|
output_string !oc "_ -> ";
|
|
output_move default;
|
|
output_string !oc "\nand "
|
|
|
|
let output_state state_num = function
|
|
Perform i ->
|
|
()
|
|
| Shift(what_to_do, moves) ->
|
|
output_string !oc
|
|
("state_" ^ string_of_int state_num ^ " lexbuf =\n");
|
|
begin match what_to_do with
|
|
No_remember -> ()
|
|
| Remember i ->
|
|
output_string !oc
|
|
(" Lexing.set_backtrack lexbuf action_" ^
|
|
string_of_int i ^ ";\n")
|
|
end;
|
|
output_all_trans moves
|
|
|
|
|
|
(* 3- Generating the entry points *)
|
|
|
|
let rec output_entries = function
|
|
[] -> failwith "output_entries"
|
|
| (name,state_num) :: rest ->
|
|
output_string !oc (name ^ " lexbuf =\n");
|
|
output_string !oc " Lexing.init lexbuf;\n";
|
|
output_string !oc (" state_" ^ string_of_int state_num ^
|
|
" lexbuf\n");
|
|
match rest with
|
|
[] -> output_string !oc "\n"; ()
|
|
| _ -> output_string !oc "\nand "; output_entries rest
|
|
|
|
|
|
(* All together *)
|
|
|
|
let output_lexdef header (initial_st, st, actions) =
|
|
prerr_int (Array.length st); prerr_string " states, ";
|
|
prerr_int (List.length actions); prerr_string " actions.";
|
|
prerr_newline();
|
|
copy_chunk header;
|
|
output_string !oc "\nlet rec ";
|
|
states := st;
|
|
List.iter output_action actions;
|
|
for i = 0 to Array.length st - 1 do
|
|
output_state i st.(i)
|
|
done;
|
|
output_entries initial_st
|
|
|
|
|
|
|