563 lines
17 KiB
OCaml
563 lines
17 KiB
OCaml
(* camlp4r *)
|
|
(* $Id$ *)
|
|
|
|
#load "q_MLast.cmo";
|
|
|
|
open Gramext;
|
|
|
|
value strict_parsing = ref False;
|
|
value keywords = ref [];
|
|
|
|
value loc = (0, 0);
|
|
|
|
(* Watch the segmentation faults here! the compiled file must have been
|
|
loaded in camlp4 with the option pa_extend.cmo -meta_action. *)
|
|
value magic_act (act : Obj.t) : MLast.expr = Obj.magic act;
|
|
|
|
(* Names of symbols for error messages; code borrowed to grammar.ml *)
|
|
|
|
value rec name_of_symbol entry =
|
|
fun
|
|
[ Snterm e -> "[" ^ e.ename ^ "]"
|
|
| Snterml e l -> "[" ^ e.ename ^ " level " ^ l ^ "]"
|
|
| Sself | Snext -> "[" ^ entry.ename ^ "]"
|
|
| Stoken tok -> entry.egram.glexer.Token.tok_text tok
|
|
| _ -> "???" ]
|
|
;
|
|
|
|
value rec name_of_symbol_failed entry =
|
|
fun
|
|
[ Slist0 s -> name_of_symbol_failed entry s
|
|
| Slist0sep s _ -> name_of_symbol_failed entry s
|
|
| Slist1 s -> name_of_symbol_failed entry s
|
|
| Slist1sep s _ -> name_of_symbol_failed entry s
|
|
| Sopt s -> name_of_symbol_failed entry s
|
|
| Stree t -> name_of_tree_failed entry t
|
|
| s -> name_of_symbol entry s ]
|
|
and name_of_tree_failed entry =
|
|
fun
|
|
[ Node {node = s; brother = bro; son = son} ->
|
|
let txt = name_of_symbol_failed entry s in
|
|
let txt =
|
|
match (s, son) with
|
|
[ (Sopt _, Node _) -> txt ^ " or " ^ name_of_tree_failed entry son
|
|
| _ -> txt ]
|
|
in
|
|
let txt =
|
|
match bro with
|
|
[ DeadEnd | LocAct _ _ -> txt
|
|
| _ -> txt ^ " or " ^ name_of_tree_failed entry bro ]
|
|
in
|
|
txt
|
|
| DeadEnd | LocAct _ _ -> "???" ]
|
|
;
|
|
|
|
value tree_failed entry prev_symb tree =
|
|
let (s2, s3) =
|
|
let txt = name_of_tree_failed entry tree in
|
|
match prev_symb with
|
|
[ Slist0 s ->
|
|
let txt1 = name_of_symbol_failed entry s in
|
|
("", txt1 ^ " or " ^ txt)
|
|
| Slist1 s ->
|
|
let txt1 = name_of_symbol_failed entry s in
|
|
("", txt1 ^ " or " ^ txt)
|
|
| Slist0sep s sep ->
|
|
let txt1 = name_of_symbol_failed entry s in
|
|
("", txt1 ^ " or " ^ txt)
|
|
| Slist1sep s sep ->
|
|
let txt1 = name_of_symbol_failed entry s in
|
|
("", txt1 ^ " or " ^ txt)
|
|
| Sopt _ | Stree _ -> ("", txt)
|
|
| _ -> (name_of_symbol entry prev_symb, txt) ]
|
|
in
|
|
<:expr<
|
|
P.error $str:entry.ename$ $str:String.escaped s2$ $str:String.escaped s3$
|
|
>>
|
|
;
|
|
|
|
(* Compilation *)
|
|
|
|
value rec find_act =
|
|
fun
|
|
[ DeadEnd -> failwith "find_act"
|
|
| LocAct act _ -> (magic_act act, 0)
|
|
| Node {son = son; brother = bro} ->
|
|
let (act, n) = try find_act son with [ Failure _ -> find_act bro ] in
|
|
(act, n + 1) ]
|
|
;
|
|
|
|
value level_number e l =
|
|
match e.edesc with
|
|
[ Dlevels elevs ->
|
|
loop 0 elevs where rec loop n =
|
|
fun
|
|
[ [lev :: levs] -> if lev.lname = Some l then n else loop (n + 1) levs
|
|
| [] -> failwith ("level " ^ l ^ " not found in entry " ^ e.ename) ]
|
|
| Dparser _ -> 0 ]
|
|
;
|
|
|
|
value nth_patt_of_act (e, n) =
|
|
let patt_list =
|
|
loop e where rec loop =
|
|
fun
|
|
[ <:expr< fun (loc : (int * int)) -> $_$ >> -> []
|
|
| <:expr< fun ($p$ : $_$) -> $e$ >> -> [p :: loop e]
|
|
| <:expr< fun $p$ -> $e$ >> -> [p :: loop e]
|
|
| _ -> failwith "nth_patt_of_act" ]
|
|
in
|
|
List.nth patt_list n
|
|
;
|
|
|
|
value rec last_patt_of_act =
|
|
fun
|
|
[ <:expr< fun ($p$ : $_$) (loc : (int * int)) -> $_$ >> -> p
|
|
| <:expr< fun $_$ -> $e$ >> -> last_patt_of_act e
|
|
| _ -> failwith "last_patt_of_act" ]
|
|
;
|
|
|
|
value rec final_action =
|
|
fun
|
|
[ <:expr< fun (loc : (int * int)) -> ($e$ : $_$) >> -> e
|
|
| <:expr< fun $_$ -> $e$ >> -> final_action e
|
|
| _ -> failwith "final_action" ]
|
|
;
|
|
|
|
value parse_standard_symbol e rkont fkont ending_act =
|
|
<:expr<
|
|
match try Some ($e$ strm__) with [ Stream.Failure -> None ] with
|
|
[ Some $nth_patt_of_act ending_act$ -> $rkont$
|
|
| _ -> $fkont$ ]
|
|
>>
|
|
;
|
|
|
|
value rec contain_loc =
|
|
fun
|
|
[ <:expr< $lid:s$ >> -> s = "loc"
|
|
| <:expr< $uid:_$ >> -> False
|
|
| <:expr< $str:_$ >> -> False
|
|
| <:expr< ($list:el$) >> -> List.exists contain_loc el
|
|
| <:expr< $e1$ $e2$ >> -> contain_loc e1 || contain_loc e2
|
|
| _ -> True ]
|
|
;
|
|
|
|
value gen_let_loc loc e =
|
|
if contain_loc e then <:expr< let loc = P.gloc bp strm__ in $e$ >> else e
|
|
;
|
|
|
|
value phony_entry = Grammar.Entry.obj Pcaml.implem;
|
|
|
|
value rec parse_tree entry nlevn alevn (tree, fst_symb) act_kont kont =
|
|
match tree with
|
|
[ DeadEnd -> kont
|
|
| LocAct act _ ->
|
|
let act = magic_act act in
|
|
act_kont False act
|
|
| Node {node = Sself; son = LocAct act _; brother = bro} ->
|
|
let act = magic_act act in
|
|
let n = entry.ename ^ "_" ^ string_of_int alevn in
|
|
let e =
|
|
if strict_parsing.val || alevn = 0 || fst_symb then <:expr< $lid:n$ >>
|
|
else <:expr< P.orzero $lid:n$ $lid:entry.ename ^ "_0"$ >>
|
|
in
|
|
let p2 =
|
|
match bro with
|
|
[ DeadEnd -> kont
|
|
| _ -> parse_tree entry nlevn alevn (bro, fst_symb) act_kont kont ]
|
|
in
|
|
let p1 = act_kont True act in
|
|
parse_standard_symbol e p1 p2 (act, 0)
|
|
| Node {node = s; son = LocAct act _; brother = bro} ->
|
|
let act = magic_act act in
|
|
let p2 = parse_tree entry nlevn alevn (bro, fst_symb) act_kont kont in
|
|
let p1 = act_kont False act in
|
|
parse_symbol entry nlevn s p1 p2 (act, 0)
|
|
| Node {node = s; son = son; brother = bro} ->
|
|
let p2 = parse_tree entry nlevn alevn (bro, fst_symb) act_kont kont in
|
|
let p1 =
|
|
let err =
|
|
let txt = tree_failed entry s son in
|
|
<:expr< raise (Stream.Error $txt$) >>
|
|
in
|
|
match son with
|
|
[ Node {brother = DeadEnd} ->
|
|
parse_tree entry nlevn alevn (son, False) act_kont err
|
|
| _ ->
|
|
let p1 =
|
|
parse_tree entry nlevn alevn (son, True) act_kont
|
|
<:expr< raise Stream.Failure >>
|
|
in
|
|
<:expr< try $p1$ with [ Stream.Failure -> $err$ ] >> ]
|
|
in
|
|
parse_symbol entry nlevn s p1 p2 (find_act son) ]
|
|
and parse_symbol entry nlevn s rkont fkont ending_act =
|
|
match s with
|
|
[ Slist0 s ->
|
|
let e = <:expr< P.list0 $symbol_parser entry nlevn s$ >> in
|
|
parse_standard_symbol e rkont fkont ending_act
|
|
| Slist1 s ->
|
|
let e = <:expr< P.list1 $symbol_parser entry nlevn s$ >> in
|
|
parse_standard_symbol e rkont fkont ending_act
|
|
| Slist0sep s sep ->
|
|
let e =
|
|
<:expr<
|
|
P.list0sep $symbol_parser entry nlevn s$
|
|
$symbol_parser entry nlevn sep$ >>
|
|
in
|
|
parse_standard_symbol e rkont fkont ending_act
|
|
| Slist1sep s sep ->
|
|
let e =
|
|
<:expr<
|
|
P.list1sep $symbol_parser entry nlevn s$
|
|
$symbol_parser entry nlevn sep$ >>
|
|
in
|
|
parse_standard_symbol e rkont fkont ending_act
|
|
| Sopt s ->
|
|
let e = <:expr< P.option $symbol_parser entry nlevn s$ >> in
|
|
parse_standard_symbol e rkont fkont ending_act
|
|
| Stree tree ->
|
|
let kont = <:expr< raise Stream.Failure >> in
|
|
let act_kont _ act = gen_let_loc loc (final_action act) in
|
|
let e = parse_tree phony_entry 0 0 (tree, True) act_kont kont in
|
|
parse_standard_symbol <:expr< fun strm__ -> $e$ >> rkont fkont ending_act
|
|
| Snterm e ->
|
|
let n =
|
|
match e.edesc with
|
|
[ Dparser _ -> e.ename
|
|
| Dlevels _ -> e.ename ^ "_0" ]
|
|
in
|
|
parse_standard_symbol <:expr< $lid:n$ >> rkont fkont ending_act
|
|
| Snterml e l ->
|
|
let n = e.ename ^ "_" ^ string_of_int (level_number e l) in
|
|
parse_standard_symbol <:expr< $lid:n$ >> rkont fkont ending_act
|
|
| Sself ->
|
|
let n = entry.ename ^ "_0" in
|
|
parse_standard_symbol <:expr< $lid:n$ >> rkont fkont ending_act
|
|
| Snext ->
|
|
let n = entry.ename ^ "_" ^ string_of_int nlevn in
|
|
parse_standard_symbol <:expr< $lid:n$ >> rkont fkont ending_act
|
|
| Stoken tok ->
|
|
let _ =
|
|
do {
|
|
if fst tok = "" && not (List.mem (snd tok) keywords.val) then
|
|
keywords.val := [snd tok :: keywords.val]
|
|
else ()
|
|
}
|
|
in
|
|
let p =
|
|
let patt = nth_patt_of_act ending_act in
|
|
let p_con = String.escaped (fst tok) in
|
|
let p_prm = String.escaped (snd tok) in
|
|
if snd tok = "" then
|
|
if fst tok = "ANY" then <:patt< (_, $patt$) >>
|
|
else <:patt< ($str:p_con$, $patt$) >>
|
|
else
|
|
let p = <:patt< ($str:p_con$, $str:p_prm$) >> in
|
|
match patt with
|
|
[ <:patt< _ >> -> <:patt< ($str:p_con$, $str:p_prm$) >>
|
|
| _ -> <:patt< ($str:p_con$, ($str:p_prm$ as $patt$)) >> ]
|
|
in
|
|
<:expr<
|
|
match Stream.peek strm__ with
|
|
[ Some $p$ -> do { Stream.junk strm__; $rkont$ }
|
|
| _ -> $fkont$ ]
|
|
>>
|
|
| _ ->
|
|
parse_standard_symbol <:expr< not_impl >> rkont fkont ending_act ]
|
|
and symbol_parser entry nlevn =
|
|
fun
|
|
[ Snterm e ->
|
|
let n = e.ename ^ "_0" in
|
|
<:expr< $lid:n$ >>
|
|
| Snterml e l ->
|
|
let n = e.ename ^ "_" ^ string_of_int (level_number e l) in
|
|
<:expr< $lid:n$ >>
|
|
| Snext ->
|
|
let n = entry.ename ^ "_" ^ string_of_int nlevn in
|
|
if strict_parsing.val then <:expr< $lid:n$ >>
|
|
else
|
|
let n0 = entry.ename ^ "_0" in
|
|
<:expr< P.orzero $lid:n$ $lid:n0$ >>
|
|
| Stoken tok ->
|
|
let _ =
|
|
do {
|
|
if fst tok = "" && not (List.mem (snd tok) keywords.val) then
|
|
keywords.val := [snd tok :: keywords.val]
|
|
else ()
|
|
}
|
|
in
|
|
let p_con = String.escaped (fst tok) in
|
|
let p_prm = String.escaped (snd tok) in
|
|
<:expr< P.token ($str:p_con$, $str:p_prm$) >>
|
|
| Stree tree ->
|
|
let kont = <:expr< raise Stream.Failure >> in
|
|
let act_kont _ act = final_action act in
|
|
<:expr<
|
|
fun strm__ ->
|
|
$parse_tree phony_entry 0 0 (tree, True) act_kont kont$
|
|
>>
|
|
| _ ->
|
|
<:expr< aaa >> ]
|
|
;
|
|
|
|
value rec start_parser_of_levels entry clevn levs =
|
|
let n = entry.ename ^ "_" ^ string_of_int clevn in
|
|
let next = entry.ename ^ "_" ^ string_of_int (clevn + 1) in
|
|
let p = <:patt< $lid:n$ >> in
|
|
match levs with
|
|
[ [] -> [Some (p, <:expr< fun strm__ -> raise Stream.Failure >>)]
|
|
| [lev :: levs] ->
|
|
let pel = start_parser_of_levels entry (succ clevn) levs in
|
|
match lev.lprefix with
|
|
[ DeadEnd ->
|
|
let ncont =
|
|
if not strict_parsing.val && clevn = 0 then
|
|
entry.ename ^ "_gen_cont"
|
|
else entry.ename ^ "_" ^ string_of_int clevn ^ "_cont"
|
|
in
|
|
let curr =
|
|
<:expr< let a = $lid:next$ strm__ in $lid:ncont$ bp a strm__ >>
|
|
in
|
|
let curr = <:expr< let bp = Stream.count strm__ in $curr$ >> in
|
|
let e = <:expr< fun strm__ -> $curr$ >> in
|
|
let pel = if levs = [] then [] else pel in
|
|
[Some (p, e) :: pel]
|
|
| tree ->
|
|
let alevn = clevn in
|
|
let (kont, pel) =
|
|
match levs with
|
|
[ [] -> (<:expr< raise Stream.Failure >>, [])
|
|
| _ ->
|
|
let e =
|
|
match (lev.assoc, lev.lsuffix) with
|
|
[ (NonA, _) | (_, DeadEnd) -> <:expr< $lid:next$ strm__ >>
|
|
| _ ->
|
|
let ncont =
|
|
entry.ename ^ "_" ^ string_of_int clevn ^ "_cont"
|
|
in
|
|
<:expr<
|
|
let a = $lid:next$ strm__ in
|
|
$lid:ncont$ bp a strm__
|
|
>> ]
|
|
in
|
|
(e, pel) ]
|
|
in
|
|
let act_kont end_with_self act =
|
|
if lev.lsuffix = DeadEnd then gen_let_loc loc (final_action act)
|
|
else
|
|
let ncont = entry.ename ^ "_" ^ string_of_int clevn ^ "_cont" in
|
|
gen_let_loc loc
|
|
<:expr< $lid:ncont$ bp $final_action act$ strm__ >>
|
|
in
|
|
let curr =
|
|
parse_tree entry (succ clevn) alevn (tree, True) act_kont kont
|
|
in
|
|
let curr = <:expr< let bp = Stream.count strm__ in $curr$ >> in
|
|
let e = <:expr< fun strm__ -> $curr$ >> in
|
|
[Some (p, e) :: pel] ] ]
|
|
;
|
|
|
|
value rec continue_parser_of_levels entry clevn levs =
|
|
let n = entry.ename ^ "_" ^ string_of_int clevn ^ "_cont" in
|
|
let p = <:patt< $lid:n$ >> in
|
|
match levs with
|
|
[ [] -> [None]
|
|
| [lev :: levs] ->
|
|
let pel = continue_parser_of_levels entry (succ clevn) levs in
|
|
match lev.lsuffix with
|
|
[ DeadEnd ->
|
|
[None :: pel]
|
|
| tree ->
|
|
let alevn =
|
|
match lev.assoc with
|
|
[ LeftA | NonA -> succ clevn
|
|
| RightA -> clevn ]
|
|
in
|
|
let (kont, pel) =
|
|
match levs with
|
|
[ [] -> (<:expr< a__ >>, [])
|
|
| _ -> (<:expr< a__ >>, pel) ]
|
|
in
|
|
let act_kont end_with_self act =
|
|
let p = last_patt_of_act act in
|
|
match lev.assoc with
|
|
[ RightA | NonA ->
|
|
<:expr<
|
|
let $p$ = a__ in
|
|
$gen_let_loc loc (final_action act)$
|
|
>>
|
|
| LeftA ->
|
|
let ncont =
|
|
entry.ename ^ "_" ^ string_of_int clevn ^ "_cont"
|
|
in
|
|
gen_let_loc loc
|
|
<:expr<
|
|
let $p$ = a__ in
|
|
$lid:ncont$ bp $final_action act$ strm__
|
|
>> ]
|
|
in
|
|
let curr =
|
|
parse_tree entry (succ clevn) alevn (tree, True) act_kont kont
|
|
in
|
|
let e = <:expr< fun bp a__ strm__ -> $curr$ >> in
|
|
[Some (p, e) :: pel] ] ]
|
|
;
|
|
|
|
value continue_parser_of_levels_again entry levs =
|
|
let n = entry.ename ^ "_gen_cont" in
|
|
let e =
|
|
loop <:expr< a__ >> 0 levs where rec loop var levn =
|
|
fun
|
|
[ [] -> <:expr< if x == a__ then x else $lid:n$ bp x strm__ >>
|
|
| [lev :: levs] ->
|
|
match lev.lsuffix with
|
|
[ DeadEnd -> loop var (levn + 1) levs
|
|
| _ ->
|
|
let n = entry.ename ^ "_" ^ string_of_int levn ^ "_cont" in
|
|
let rest = loop <:expr< x >> (levn + 1) levs in
|
|
<:expr< let x = $lid:n$ bp $var$ strm__ in $rest$ >> ] ]
|
|
in
|
|
(<:patt< $lid:n$ >>, <:expr< fun bp a__ strm__ -> $e$ >>)
|
|
;
|
|
|
|
value empty_entry ename =
|
|
let p = <:patt< $lid:ename$ >> in
|
|
let e =
|
|
<:expr<
|
|
fun strm__ ->
|
|
raise (Stream.Error $str:"entry [" ^ ename ^ "] is empty"$) >>
|
|
in
|
|
[Some (p, e)]
|
|
;
|
|
|
|
value start_parser_of_entry entry =
|
|
match entry.edesc with
|
|
[ Dlevels [] -> empty_entry entry.ename
|
|
| Dlevels elev -> start_parser_of_levels entry 0 elev
|
|
| Dparser p -> [] ]
|
|
;
|
|
|
|
value continue_parser_of_entry entry =
|
|
match entry.edesc with
|
|
[ Dlevels elev -> continue_parser_of_levels entry 0 elev
|
|
| Dparser p -> [] ]
|
|
;
|
|
|
|
value continue_parser_of_entry_again entry =
|
|
if strict_parsing.val then []
|
|
else
|
|
match entry.edesc with
|
|
[ Dlevels ([_; _ :: _] as levs) ->
|
|
[continue_parser_of_levels_again entry levs]
|
|
| _ -> [] ]
|
|
;
|
|
|
|
value rec list_alternate l1 l2 =
|
|
match (l1, l2) with
|
|
[ ([x1 :: l1], [x2 :: l2]) -> [x1; x2 :: list_alternate l1 l2]
|
|
| ([], l2) -> l2
|
|
| (l1, []) -> l1 ]
|
|
;
|
|
|
|
value compile_entry entry =
|
|
let pel1 = start_parser_of_entry entry in
|
|
let pel2 = continue_parser_of_entry entry in
|
|
let pel = list_alternate pel1 pel2 in
|
|
List.fold_right
|
|
(fun pe list ->
|
|
match pe with
|
|
[ Some pe -> [pe :: list]
|
|
| None -> list ])
|
|
pel (continue_parser_of_entry_again entry)
|
|
;
|
|
|
|
(* get all entries connected together *)
|
|
|
|
value rec scan_tree list =
|
|
fun
|
|
[ Node {node = n; son = son; brother = bro} ->
|
|
let list = scan_symbol list n in
|
|
let list = scan_tree list son in
|
|
let list = scan_tree list bro in
|
|
list
|
|
| LocAct _ _ | DeadEnd -> list ]
|
|
and scan_symbol list =
|
|
fun
|
|
[ Snterm e -> scan_entry list e
|
|
| Snterml e l -> scan_entry list e
|
|
| Slist0 s -> scan_symbol list s
|
|
| Slist0sep s sep -> scan_symbol (scan_symbol list s) sep
|
|
| Slist1 s -> scan_symbol list s
|
|
| Slist1sep s sep -> scan_symbol (scan_symbol list s) sep
|
|
| Sopt s -> scan_symbol list s
|
|
| Stree t -> scan_tree list t
|
|
| Smeta _ _ _ | Sself | Snext | Stoken _ -> list ]
|
|
and scan_level list lev =
|
|
let list = scan_tree list lev.lsuffix in
|
|
let list = scan_tree list lev.lprefix in
|
|
list
|
|
and scan_levels list levs = List.fold_left scan_level list levs
|
|
and scan_entry list entry =
|
|
if List.memq entry list then list
|
|
else
|
|
match entry.edesc with
|
|
[ Dlevels levs -> scan_levels [entry :: list] levs
|
|
| Dparser _ -> list ]
|
|
;
|
|
|
|
value all_entries_in_graph list entry =
|
|
List.rev (scan_entry list entry)
|
|
;
|
|
|
|
(* main *)
|
|
|
|
value entries = ref [];
|
|
|
|
value rec list_mem_right_assoc x =
|
|
fun
|
|
[ [] -> False
|
|
| [(a, b) :: l] -> x = b || list_mem_right_assoc x l ]
|
|
;
|
|
|
|
value rec expr_list =
|
|
fun
|
|
[ [] -> <:expr< [] >>
|
|
| [x :: l] -> <:expr< [$str:String.escaped x$ :: $expr_list l$] >> ]
|
|
;
|
|
|
|
value compile () =
|
|
let _ = do { keywords.val := []; } in
|
|
let list = List.fold_left all_entries_in_graph [] entries.val in
|
|
let list =
|
|
List.filter (fun e -> List.memq e list) entries.val @
|
|
List.filter (fun e -> not (List.memq e entries.val)) list
|
|
in
|
|
let list =
|
|
let set = ref [] in
|
|
List.fold_right
|
|
(fun entry list ->
|
|
if List.mem entry.ename set.val then
|
|
list
|
|
else do { set.val := [entry.ename :: set.val]; [entry :: list] })
|
|
list []
|
|
in
|
|
let pell = List.map compile_entry list in
|
|
let pel = List.flatten pell in
|
|
let si1 = <:str_item< value rec $list:pel$ >> in
|
|
let si2 =
|
|
let list = List.sort compare keywords.val in
|
|
<:str_item<
|
|
List.iter (fun kw -> P.lexer.Token.tok_using ("", kw))
|
|
$expr_list list$
|
|
>>
|
|
in
|
|
let loc = (1, 1) in
|
|
([(si1, loc); (si2, loc)], False)
|
|
;
|
|
|
|
Pcaml.parse_implem.val := fun _ -> compile ();
|
|
|
|
Pcaml.add_option "-strict_parsing" (Arg.Set strict_parsing)
|
|
"Don't generate error recovering by trying continuations or first levels"
|
|
;
|