259 lines
8.0 KiB
OCaml
259 lines
8.0 KiB
OCaml
(* camlp4r *)
|
|
(***********************************************************************)
|
|
(* *)
|
|
(* Camlp4 *)
|
|
(* *)
|
|
(* Daniel de Rauglaudre, projet Cristal, INRIA Rocquencourt *)
|
|
(* *)
|
|
(* Copyright 2001 Institut National de Recherche en Informatique et *)
|
|
(* Automatique. Distributed only by permission. *)
|
|
(* *)
|
|
(***********************************************************************)
|
|
|
|
(* $Id$ *)
|
|
|
|
type t = (string * string);
|
|
type pattern = (string * string);
|
|
|
|
exception Error of string;
|
|
|
|
value make_loc (bp, ep) =
|
|
({ (Lexing.dummy_pos) with Lexing.pos_cnum = bp; Lexing.pos_lnum = 1 },
|
|
{ (Lexing.dummy_pos) with Lexing.pos_cnum = ep; Lexing.pos_lnum = 1 })
|
|
;
|
|
|
|
value nowhere = { (Lexing.dummy_pos) with Lexing.pos_cnum = 0 };
|
|
|
|
value dummy_loc = (Lexing.dummy_pos, Lexing.dummy_pos);
|
|
|
|
value succ_pos p =
|
|
{ ( p ) with Lexing.pos_cnum = p.Lexing.pos_cnum + 1};
|
|
value lt_pos p1 p2 = p1.Lexing.pos_cnum < p2.Lexing.pos_cnum;
|
|
|
|
type flocation = (Lexing.position * Lexing.position);
|
|
|
|
type flocation_function = int -> flocation;
|
|
type lexer_func 'te = Stream.t char -> (Stream.t 'te * flocation_function);
|
|
|
|
type glexer 'te =
|
|
{ tok_func : lexer_func 'te;
|
|
tok_using : pattern -> unit;
|
|
tok_removing : pattern -> unit;
|
|
tok_match : pattern -> 'te -> string;
|
|
tok_text : pattern -> string;
|
|
tok_comm : mutable option (list flocation) }
|
|
;
|
|
type lexer =
|
|
{ func : lexer_func t;
|
|
using : pattern -> unit;
|
|
removing : pattern -> unit;
|
|
tparse : pattern -> option (Stream.t t -> string);
|
|
text : pattern -> string }
|
|
;
|
|
|
|
value lexer_text (con, prm) =
|
|
if con = "" then "'" ^ prm ^ "'"
|
|
else if prm = "" then con
|
|
else con ^ " '" ^ prm ^ "'"
|
|
;
|
|
|
|
value locerr () = invalid_arg "Lexer: flocation function";
|
|
|
|
value tsz = 256; (* up to 2^29 entries on a 32-bit machine, 2^61 on 64-bit *)
|
|
|
|
value loct_create () = (ref [| |], ref False);
|
|
|
|
value loct_func (loct, ov) i =
|
|
match
|
|
if i < 0 || i/tsz >= Array.length loct.val then None
|
|
else if loct.val.(i/tsz) = [| |] then
|
|
if ov.val then Some (nowhere, nowhere) else None
|
|
else Array.unsafe_get (Array.unsafe_get loct.val (i/tsz)) (i mod tsz)
|
|
with
|
|
[ Some loc -> loc
|
|
| _ -> locerr () ]
|
|
;
|
|
|
|
value loct_add (loct, ov) i loc = do {
|
|
while i/tsz >= Array.length loct.val && (not ov.val) do {
|
|
let new_tmax = Array.length loct.val * 2 + 1 in
|
|
if new_tmax < Sys.max_array_length then do {
|
|
let new_loct = Array.make new_tmax [| |] in
|
|
Array.blit loct.val 0 new_loct 0 (Array.length loct.val);
|
|
loct.val := new_loct
|
|
} else ov.val := True
|
|
};
|
|
if not(ov.val) then do {
|
|
if loct.val.(i/tsz) = [| |] then
|
|
loct.val.(i/tsz) := Array.make tsz None
|
|
else ();
|
|
loct.val.(i/tsz).(i mod tsz) := Some loc
|
|
} else ()
|
|
};
|
|
|
|
value make_stream_and_flocation next_token_loc =
|
|
let loct = loct_create () in
|
|
let ts =
|
|
Stream.from
|
|
(fun i ->
|
|
let (tok, loc) = next_token_loc () in
|
|
do { loct_add loct i loc; Some tok })
|
|
in
|
|
(ts, loct_func loct)
|
|
;
|
|
|
|
value lexer_func_of_parser next_token_loc cs =
|
|
make_stream_and_flocation (fun () -> next_token_loc cs)
|
|
;
|
|
|
|
value lexer_func_of_ocamllex lexfun cs =
|
|
let lb =
|
|
Lexing.from_function
|
|
(fun s n ->
|
|
try do { s.[0] := Stream.next cs; 1 } with [ Stream.Failure -> 0 ])
|
|
in
|
|
let next_token_loc _ =
|
|
let tok = lexfun lb in
|
|
let loc = (Lexing.lexeme_start_p lb, Lexing.lexeme_end_p lb) in
|
|
(tok, loc)
|
|
in
|
|
make_stream_and_flocation next_token_loc
|
|
;
|
|
|
|
(* Char and string tokens to real chars and string *)
|
|
|
|
value buff = ref (String.create 80);
|
|
value store len x =
|
|
do {
|
|
if len >= String.length buff.val then
|
|
buff.val := buff.val ^ String.create (String.length buff.val)
|
|
else ();
|
|
buff.val.[len] := x;
|
|
succ len
|
|
}
|
|
;
|
|
value mstore len s =
|
|
add_rec len 0 where rec add_rec len i =
|
|
if i == String.length s then len else add_rec (store len s.[i]) (succ i)
|
|
;
|
|
value get_buff len = String.sub buff.val 0 len;
|
|
|
|
value valch x = Char.code x - Char.code '0';
|
|
value valch_a x = Char.code x - Char.code 'a' + 10;
|
|
value valch_A x = Char.code x - Char.code 'A' + 10;
|
|
|
|
value rec backslash s i =
|
|
if i = String.length s then raise Not_found
|
|
else
|
|
match s.[i] with
|
|
[ 'n' -> ('\n', i + 1)
|
|
| 'r' -> ('\r', i + 1)
|
|
| 't' -> ('\t', i + 1)
|
|
| 'b' -> ('\b', i + 1)
|
|
| '\\' -> ('\\', i + 1)
|
|
| '"' -> ('"', i + 1)
|
|
| ''' -> (''', i + 1)
|
|
| ' ' -> (' ', i + 1)
|
|
| '0'..'9' as c -> backslash1 (valch c) s (i + 1)
|
|
| 'x' -> backslash1h s (i + 1)
|
|
| _ -> raise Not_found ]
|
|
and backslash1 cod s i =
|
|
if i = String.length s then raise Not_found
|
|
else
|
|
match s.[i] with
|
|
[ '0'..'9' as c -> backslash2 (10 * cod + valch c) s (i + 1)
|
|
| _ -> raise Not_found ]
|
|
and backslash2 cod s i =
|
|
if i = String.length s then raise Not_found
|
|
else
|
|
match s.[i] with
|
|
[ '0'..'9' as c -> (Char.chr (10 * cod + valch c), i + 1)
|
|
| _ -> raise Not_found ]
|
|
and backslash1h s i =
|
|
if i = String.length s then raise Not_found
|
|
else
|
|
match s.[i] with
|
|
[ '0'..'9' as c -> backslash2h (valch c) s (i + 1)
|
|
| 'a'..'f' as c -> backslash2h (valch_a c) s (i + 1)
|
|
| 'A'..'F' as c -> backslash2h (valch_A c) s (i + 1)
|
|
| _ -> raise Not_found ]
|
|
and backslash2h cod s i =
|
|
if i = String.length s then ('\\', i - 2)
|
|
else
|
|
match s.[i] with
|
|
[ '0'..'9' as c -> (Char.chr (16 * cod + valch c), i + 1)
|
|
| 'a'..'f' as c -> (Char.chr (16 * cod + valch_a c), i + 1)
|
|
| 'A'..'F' as c -> (Char.chr (16 * cod + valch_A c), i + 1)
|
|
| _ -> raise Not_found ]
|
|
;
|
|
|
|
value rec skip_indent s i =
|
|
if i = String.length s then i
|
|
else
|
|
match s.[i] with
|
|
[ ' ' | '\t' -> skip_indent s (i + 1)
|
|
| _ -> i ]
|
|
;
|
|
|
|
value skip_opt_linefeed s i =
|
|
if i = String.length s then i else if s.[i] = '\010' then i + 1 else i
|
|
;
|
|
|
|
value eval_char s =
|
|
if String.length s = 1 then s.[0]
|
|
else if String.length s = 0 then failwith "invalid char token"
|
|
else if s.[0] = '\\' then
|
|
if String.length s = 2 && s.[1] = ''' then '''
|
|
else
|
|
try
|
|
let (c, i) = backslash s 1 in
|
|
if i = String.length s then c else raise Not_found
|
|
with
|
|
[ Not_found -> failwith "invalid char token" ]
|
|
else failwith "invalid char token"
|
|
;
|
|
|
|
value eval_string (bp, ep) s =
|
|
loop 0 0 where rec loop len i =
|
|
if i = String.length s then get_buff len
|
|
else
|
|
let (len, i) =
|
|
if s.[i] = '\\' then
|
|
let i = i + 1 in
|
|
if i = String.length s then failwith "invalid string token" else
|
|
if s.[i] = '"' then (store len '"', i + 1) else
|
|
match s.[i] with
|
|
[ '\010' -> (len, skip_indent s (i + 1))
|
|
| '\013' -> (len, skip_indent s (skip_opt_linefeed s (i + 1)))
|
|
| c ->
|
|
try
|
|
let (c, i) = backslash s i in
|
|
(store len c, i)
|
|
with
|
|
[ Not_found -> do {
|
|
let txt = "Invalid backslash escape in string" in
|
|
let pos = bp.Lexing.pos_cnum - bp.Lexing.pos_bol + i in
|
|
if bp.Lexing.pos_fname = "" then
|
|
Printf.eprintf "Warning: line %d, chars %d-%d: %s\n"
|
|
bp.Lexing.pos_lnum pos (pos + 1) txt
|
|
else
|
|
Printf.eprintf "Warning: File \"%s\", line %d, chars %d-%d: %s\n"
|
|
bp.Lexing.pos_fname bp.Lexing.pos_lnum pos (pos + 1) txt;
|
|
(store (store len '\\') c, i + 1) } ] ]
|
|
else (store len s.[i], i + 1)
|
|
in
|
|
loop len i
|
|
;
|
|
|
|
value default_match =
|
|
fun
|
|
[ ("ANY", "") -> fun (con, prm) -> prm
|
|
| ("ANY", v) ->
|
|
fun (con, prm) -> if v = prm then v else raise Stream.Failure
|
|
| (p_con, "") ->
|
|
fun (con, prm) -> if con = p_con then prm else raise Stream.Failure
|
|
| (p_con, p_prm) ->
|
|
fun (con, prm) ->
|
|
if con = p_con && prm = p_prm then prm else raise Stream.Failure ]
|
|
;
|