ocaml/camlp4/Camlp4/Struct/Token.ml

243 lines
8.3 KiB
OCaml
Raw Normal View History

(****************************************************************************)
(* *)
(* Objective Caml *)
(* *)
(* INRIA Rocquencourt *)
(* *)
(* Copyright 2006 Institut National de Recherche en Informatique et *)
(* en Automatique. All rights reserved. This file is distributed under *)
(* the terms of the GNU Library General Public License, with the special *)
(* exception on linking described in LICENSE at the top of the Objective *)
(* Caml source tree. *)
(* *)
(****************************************************************************)
(* Authors:
* - Nicolas Pouillard: initial version
*)
open Format;
module Make (Loc : Sig.Loc.S)
: Sig.Camlp4Token.S with module Loc = Loc
= struct
module Loc = Loc;
open Sig.Quotation;
open Sig.Camlp4Token;
type t = Sig.Camlp4Token.t;
type token = t;
value to_string =
fun
[ KEYWORD s -> sprintf "KEYWORD %S" s
| SYMBOL s -> sprintf "SYMBOL %S" s
| LIDENT s -> sprintf "LIDENT %S" s
| UIDENT s -> sprintf "UIDENT %S" s
| INT _ s -> sprintf "INT %s" s
| INT32 _ s -> sprintf "INT32 %sd" s
| INT64 _ s -> sprintf "INT64 %sd" s
| NATIVEINT _ s-> sprintf "NATIVEINT %sd" s
| FLOAT _ s -> sprintf "FLOAT %s" s
| CHAR _ s -> sprintf "CHAR '%s'" s
| STRING _ s -> sprintf "STRING \"%s\"" s
(* here it's not %S since the string is already escaped *)
| LABEL s -> sprintf "LABEL %S" s
| OPTLABEL s -> sprintf "OPTLABEL %S" s
| ANTIQUOT n s -> sprintf "ANTIQUOT %s: %S" n s
| QUOTATION x -> sprintf "QUOTATION { q_name=%S; q_loc=%S; q_shift=%d; q_contents=%S }"
x.q_name x.q_loc x.q_shift x.q_contents
| COMMENT s -> sprintf "COMMENT %S" s
| BLANKS s -> sprintf "BLANKS %S" s
| NEWLINE -> sprintf "NEWLINE"
| EOI -> sprintf "EOI"
| ESCAPED_IDENT s -> sprintf "ESCAPED_IDENT %S" s
| LINE_DIRECTIVE i None -> sprintf "LINE_DIRECTIVE %d" i
| LINE_DIRECTIVE i (Some s) -> sprintf "LINE_DIRECTIVE %d %S" i s ];
value print ppf x = pp_print_string ppf (to_string x);
value match_keyword kwd =
fun
[ KEYWORD kwd' when kwd = kwd' -> True
| _ -> False ];
value extract_string =
fun
[ KEYWORD s | SYMBOL s | LIDENT s | UIDENT s | INT _ s | INT32 _ s |
INT64 _ s | NATIVEINT _ s | FLOAT _ s | CHAR _ s | STRING _ s |
LABEL s | OPTLABEL s | COMMENT s | BLANKS s | ESCAPED_IDENT s -> s
| tok ->
invalid_arg ("Cannot extract a string from a this token: "^
to_string tok) ];
module Error = struct
type t =
[ Illegal_token of string
| Keyword_as_label of string
| Illegal_token_pattern of string and string
| Illegal_constructor of string ];
exception E of t;
value print ppf =
fun
[ Illegal_token s ->
fprintf ppf "Illegal token (%s)" s
| Keyword_as_label kwd ->
fprintf ppf "`%s' is a keyword, it cannot be used as label name" kwd
| Illegal_token_pattern p_con p_prm ->
fprintf ppf "Illegal token pattern: %s %S" p_con p_prm
| Illegal_constructor con ->
fprintf ppf "Illegal constructor %S" con ];
value to_string x =
let b = Buffer.create 50 in
let () = bprintf b "%a" print x in Buffer.contents b;
end;
let module M = ErrorHandler.Register Error in ();
module Filter = struct
type token_filter = Sig.Token.stream_filter t Loc.t;
type t =
{ is_kwd : string -> bool;
filter : mutable token_filter };
value err error loc =
raise (Loc.Exc_located loc (Error.E error));
value keyword_conversion tok is_kwd =
match tok with
[ SYMBOL s | LIDENT s | UIDENT s when is_kwd s -> KEYWORD s
| ESCAPED_IDENT s -> LIDENT s
| _ -> tok ];
value check_keyword_as_label tok loc is_kwd =
let s =
match tok with
[ LABEL s -> s
| OPTLABEL s -> s
| _ -> "" ]
in if s <> "" && is_kwd s then err (Error.Keyword_as_label s) loc else ();
value check_unknown_keywords tok loc =
match tok with
[ SYMBOL s -> err (Error.Illegal_token s) loc
| _ -> () ];
value error_no_respect_rules p_con p_prm =
raise (Error.E (Error.Illegal_token_pattern p_con p_prm));
value check_keyword _ = True;
(* FIXME let lb = Lexing.from_string s in
let next () = token default_context lb in
try
match next () with
[ SYMBOL _ | UIDENT _ | LIDENT _ -> (next () = EOI)
| _ -> False ]
with [ Stream.Error _ -> False ]; *)
value error_on_unknown_keywords = ref False;
value rec ignore_layout =
parser
[ [: `(COMMENT _ | BLANKS _ | NEWLINE | LINE_DIRECTIVE _ _, _); s :] ->
ignore_layout s
| [: ` x; s :] -> [: ` x; ignore_layout s :]
| [: :] -> [: :] ];
value mk is_kwd =
{ is_kwd = is_kwd;
filter = ignore_layout };
value filter x =
let f tok loc = do {
let tok = keyword_conversion tok x.is_kwd;
check_keyword_as_label tok loc x.is_kwd;
if error_on_unknown_keywords.val
then check_unknown_keywords tok loc else ();
debug token "@[<hov 2>Lexer before filter:@ %a@ at@ %a@]@."
print tok Loc.dump loc in
(tok, loc)
} in
let rec filter =
parser
[ [: `(tok, loc); s :] -> [: ` f tok loc; filter s :]
| [: :] -> [: :] ]
in
let rec tracer = (* FIXME add a debug block construct *)
parser
[ [: `((_tok, _loc) as x); xs :] ->
debug token "@[<hov 2>Lexer after filter:@ %a@ at@ %a@]@."
print _tok Loc.dump _loc in
[: ` x; tracer xs :]
| [: :] -> [: :] ]
in fun strm -> tracer (x.filter (filter strm));
value define_filter x f = x.filter := f x.filter;
value keyword_added _ _ _ = ();
value keyword_removed _ _ = ();
end;
end;
(* Char and string tokens to real chars and string *)
module Eval = struct
value valch x = Char.code x - Char.code '0';
value valch_hex x =
let d = Char.code x in
if d >= 97 then d - 87
else if d >= 65 then d - 55
else d - 48;
value rec skip_indent = parser
[ [: `' ' | '\t'; s :] -> skip_indent s
| [: :] -> () ];
value skip_opt_linefeed = parser
[ [: `'\010' :] -> ()
| [: :] -> () ];
value rec backslash = parser
[ [: `'\010' :] -> '\010'
| [: `'\013' :] -> '\013'
| [: `'n' :] -> '\n'
| [: `'r' :] -> '\r'
| [: `'t' :] -> '\t'
| [: `'b' :] -> '\b'
| [: `'\\' :] -> '\\'
| [: `'"' :] -> '"'
| [: `''' :] -> '''
| [: `' ' :] -> ' '
| [: `('0'..'9' as c1); `('0'..'9' as c2); `('0'..'9' as c3) :] ->
Char.chr (100 * (valch c1) + 10 * (valch c2) + (valch c3))
| [: `'x'; `('0'..'9' | 'a'..'f' | 'A'..'F' as c1) ;
`('0'..'9' | 'a'..'f' | 'A'..'F' as c2) :] ->
Char.chr (16 * (valch_hex c1) + (valch_hex c2)) ];
value rec backslash_in_string strict store = parser
[ [: `'\010'; s :] -> skip_indent s
| [: `'\013'; s :] -> do { skip_opt_linefeed s; skip_indent s }
| [: x = backslash :] -> store x
| [: `c when not strict :] -> do { store '\\'; store c }
| [: :] -> failwith "invalid string token" ];
value char s =
if String.length s = 1 then s.[0]
else if String.length s = 0 then failwith "invalid char token"
else match Stream.of_string s with parser
[ [: `'\\'; x = backslash :] -> x
| [: :] -> failwith "invalid char token" ];
value string ?strict s =
let buf = Buffer.create 23 in
let store = Buffer.add_char buf in
let rec parse = parser
[ [: `'\\'; _ = backslash_in_string (strict <> None) store; s :] -> parse s
| [: `c; s :] -> do { store c; parse s }
| [: :] -> Buffer.contents buf ]
in parse (Stream.of_string s);
end;