485 lines
19 KiB
OCaml
485 lines
19 KiB
OCaml
(****************************************************************************)
|
|
(* *)
|
|
(* Objective Caml *)
|
|
(* *)
|
|
(* INRIA Rocquencourt *)
|
|
(* *)
|
|
(* Copyright 2006-2006 Institut National de Recherche en Informatique et *)
|
|
(* en Automatique. All rights reserved. This file is distributed under *)
|
|
(* the terms of the GNU Library General Public License, with the special *)
|
|
(* exception on linking described in LICENSE at the top of the Objective *)
|
|
(* Caml source tree. *)
|
|
(* *)
|
|
(****************************************************************************)
|
|
|
|
(* Authors:
|
|
* - Daniel de Rauglaudre: initial version
|
|
* - Nicolas Pouillard: refactoring
|
|
*)
|
|
|
|
|
|
(* $Id$ *)
|
|
|
|
(* The lexer definition *)
|
|
|
|
|
|
{
|
|
|
|
(** A lexical analyzer. *)
|
|
|
|
(* FIXME interface module Make (Token : Token) |+ Note that this Token sig is not in Sig +| *)
|
|
(* : Sig.Lexer. S with module Loc = Token.Loc and module Token = Token; *)
|
|
|
|
(* type context =
|
|
{ loc : Loc.t ;
|
|
in_comment : bool ;
|
|
|+* FIXME When True, all lexers built by [Plexer.make ()] do not lex the
|
|
quotation syntax any more. Default is False (quotations are
|
|
lexed). +|
|
|
quotations : bool };
|
|
|
|
value default_context : context;
|
|
|
|
value mk : Loc.t -> Stream.t char -> Stream.t (Token.t * Loc.t);
|
|
|
|
value mk' : context -> Stream.t char -> Stream.t (Token.t * Loc.t); *)
|
|
(* FIXME Beware the context argument must be given like that:
|
|
* mk' { (default_context) with ... = ... } strm
|
|
*)
|
|
|
|
module TokenEval = Token.Eval
|
|
module Make (Token : Sig.Camlp4Token)
|
|
= struct
|
|
module Loc = Token.Loc
|
|
module Token = Token
|
|
|
|
open Lexing
|
|
open Sig
|
|
|
|
(* Error report *)
|
|
module Error = struct
|
|
|
|
type t =
|
|
| Illegal_character of char
|
|
| Illegal_escape of string
|
|
| Unterminated_comment
|
|
| Unterminated_string
|
|
| Unterminated_quotation
|
|
| Unterminated_antiquot
|
|
| Unterminated_string_in_comment
|
|
| Comment_start
|
|
| Comment_not_end
|
|
| Literal_overflow of string
|
|
|
|
exception E of t
|
|
|
|
open Format
|
|
|
|
let print ppf =
|
|
function
|
|
| Illegal_character c ->
|
|
fprintf ppf "Illegal character (%s)" (Char.escaped c)
|
|
| Illegal_escape s ->
|
|
fprintf ppf "Illegal backslash escape in string or character (%s)" s
|
|
| Unterminated_comment ->
|
|
fprintf ppf "Comment not terminated"
|
|
| Unterminated_string ->
|
|
fprintf ppf "String literal not terminated"
|
|
| Unterminated_string_in_comment ->
|
|
fprintf ppf "This comment contains an unterminated string literal"
|
|
| Unterminated_quotation ->
|
|
fprintf ppf "Quotation not terminated"
|
|
| Unterminated_antiquot ->
|
|
fprintf ppf "Antiquotation not terminated"
|
|
| Literal_overflow ty ->
|
|
fprintf ppf "Integer literal exceeds the range of representable integers of type %s" ty
|
|
| Comment_start ->
|
|
fprintf ppf "this is the start of a comment"
|
|
| Comment_not_end ->
|
|
fprintf ppf "this is not the end of a comment"
|
|
|
|
let to_string x =
|
|
let b = Buffer.create 50 in
|
|
let () = bprintf b "%a" print x in Buffer.contents b
|
|
end;;
|
|
|
|
let module M = ErrorHandler.Register(Error) in ()
|
|
|
|
open Error
|
|
|
|
(* To store some context information:
|
|
* loc : position of the beginning of a string, quotation and comment
|
|
* in_comment: are we in a comment?
|
|
* quotations: shall we lex quotation?
|
|
* If quotations is false it's a SYMBOL token.
|
|
* antiquots : shall we lex antiquotations.
|
|
*)
|
|
|
|
type context =
|
|
{ loc : Loc.t ;
|
|
in_comment : bool ;
|
|
quotations : bool ;
|
|
antiquots : bool ;
|
|
lexbuf : lexbuf ;
|
|
buffer : Buffer.t }
|
|
|
|
let default_context lb =
|
|
{ loc = Loc.ghost ;
|
|
in_comment = false ;
|
|
quotations = true ;
|
|
antiquots = false ;
|
|
lexbuf = lb ;
|
|
buffer = Buffer.create 256 }
|
|
|
|
(* To buffer string literals, quotations and antiquotations *)
|
|
|
|
let store c = Buffer.add_string c.buffer (Lexing.lexeme c.lexbuf)
|
|
let istore_char c i = Buffer.add_char c.buffer (Lexing.lexeme_char c.lexbuf i)
|
|
let buff_contents c =
|
|
let contents = Buffer.contents c.buffer in
|
|
Buffer.reset c.buffer; contents
|
|
|
|
let loc c = Loc.merge c.loc (Loc.of_lexbuf c.lexbuf)
|
|
let quotations c = c.quotations
|
|
let antiquots c = c.antiquots
|
|
let is_in_comment c = c.in_comment
|
|
let in_comment c = { (c) with in_comment = true }
|
|
let set_start_p c = c.lexbuf.lex_start_p <- Loc.start_pos c.loc
|
|
let move_start_p shift c =
|
|
let p = c.lexbuf.lex_start_p in
|
|
c.lexbuf.lex_start_p <- { (p) with pos_cnum = p.pos_cnum + shift }
|
|
|
|
let update_loc c = { (c) with loc = Loc.of_lexbuf c.lexbuf }
|
|
let with_curr_loc f c = f (update_loc c) c.lexbuf
|
|
let parse_nested f c =
|
|
with_curr_loc f c;
|
|
set_start_p c;
|
|
buff_contents c
|
|
let shift n c = { (c) with loc = Loc.move `both n c.loc }
|
|
let store_parse f c = store c ; f c c.lexbuf
|
|
let parse f c = f c c.lexbuf
|
|
let mk_quotation quotation c name loc shift =
|
|
let s = parse_nested quotation (update_loc c) in
|
|
let contents = String.sub s 0 (String.length s - 2) in
|
|
QUOTATION { q_name = name ;
|
|
q_loc = loc ;
|
|
q_shift = shift ;
|
|
q_contents = contents }
|
|
|
|
|
|
(* Update the current location with file name and line number. *)
|
|
|
|
let update_loc c file line absolute chars =
|
|
let lexbuf = c.lexbuf in
|
|
let pos = lexbuf.lex_curr_p in
|
|
let new_file = match file with
|
|
| None -> pos.pos_fname
|
|
| Some s -> s
|
|
in
|
|
lexbuf.lex_curr_p <- { pos with
|
|
pos_fname = new_file;
|
|
pos_lnum = if absolute then line else pos.pos_lnum + line;
|
|
pos_bol = pos.pos_cnum - chars;
|
|
}
|
|
|
|
let err error loc =
|
|
raise(Loc.Exc_located(loc, Error.E error))
|
|
|
|
let warn error loc =
|
|
Format.eprintf "Warning: %a: %a@." Loc.print loc Error.print error
|
|
|
|
}
|
|
|
|
let newline = ('\010' | '\013' | "\013\010")
|
|
let blank = [' ' '\009' '\012']
|
|
let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']
|
|
let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']
|
|
let identchar =
|
|
['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9']
|
|
let ident = (lowercase|uppercase) identchar*
|
|
let locname = ident
|
|
let not_star_symbolchar =
|
|
['$' '!' '%' '&' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~' '\\']
|
|
let symbolchar = '*' | not_star_symbolchar
|
|
let quotchar =
|
|
['!' '%' '&' '+' '-' '.' '/' ':' '=' '?' '@' '^' '|' '~' '\\' '*']
|
|
let hexa_char = ['0'-'9' 'A'-'F' 'a'-'f']
|
|
let decimal_literal =
|
|
['0'-'9'] ['0'-'9' '_']*
|
|
let hex_literal =
|
|
'0' ['x' 'X'] hexa_char ['0'-'9' 'A'-'F' 'a'-'f' '_']*
|
|
let oct_literal =
|
|
'0' ['o' 'O'] ['0'-'7'] ['0'-'7' '_']*
|
|
let bin_literal =
|
|
'0' ['b' 'B'] ['0'-'1'] ['0'-'1' '_']*
|
|
let int_literal =
|
|
decimal_literal | hex_literal | oct_literal | bin_literal
|
|
let float_literal =
|
|
['0'-'9'] ['0'-'9' '_']*
|
|
('.' ['0'-'9' '_']* )?
|
|
(['e' 'E'] ['+' '-']? ['0'-'9'] ['0'-'9' '_']*)?
|
|
|
|
(* Delimitors are extended (from 3.09) in a conservative way *)
|
|
|
|
(* These chars that can't start an expression or a pattern: *)
|
|
let safe_delimchars = ['%' '&' '/' '@' '^']
|
|
|
|
(* These symbols are unsafe since "[<", "[|", etc. exsist. *)
|
|
let delimchars = safe_delimchars | ['|' '<' '>' ':' '=' '.']
|
|
|
|
let left_delims = ['(' '[' '{']
|
|
let right_delims = [')' ']' '}']
|
|
|
|
let left_delimitor =
|
|
(* At least a safe_delimchars *)
|
|
left_delims delimchars* safe_delimchars (delimchars|left_delims)*
|
|
|
|
(* A '(' or a new super '(' without "(<" *)
|
|
| '(' (['|' ':'] delimchars*)?
|
|
(* Old brackets, no new brackets starting with "[|" or "[:" *)
|
|
| '[' ['|' ':']?
|
|
(* Old "[<","{<" and new ones *)
|
|
| ['[' '{'] delimchars* '<'
|
|
(* Old brace and new ones *)
|
|
| '{' (['|' ':'] delimchars*)?
|
|
|
|
let right_delimitor =
|
|
(* At least a safe_delimchars *)
|
|
(delimchars|right_delims)* safe_delimchars (delimchars|right_delims)* right_delims
|
|
(* A ')' or a new super ')' without ">)" *)
|
|
| (delimchars* ['|' ':'])? ')'
|
|
(* Old brackets, no new brackets ending with "|]" or ":]" *)
|
|
| ['|' ':']? ']'
|
|
(* Old ">]",">}" and new ones *)
|
|
| '>' delimchars* [']' '}']
|
|
(* Old brace and new ones *)
|
|
| (delimchars* ['|' ':'])? '}'
|
|
|
|
|
|
rule token c = parse
|
|
| newline { update_loc c None 1 false 0; NEWLINE }
|
|
| blank + as x { BLANKS x }
|
|
| "~" (lowercase identchar * as x) ':' { LABEL x }
|
|
| "?" (lowercase identchar * as x) ':' { OPTLABEL x }
|
|
| lowercase identchar * as x { LIDENT x }
|
|
| uppercase identchar * as x { UIDENT x }
|
|
| int_literal as i
|
|
{ try INT(int_of_string i, i)
|
|
with Failure _ -> err (Literal_overflow "int") (Loc.of_lexbuf lexbuf) }
|
|
| float_literal as f
|
|
{ try FLOAT(float_of_string f, f)
|
|
with Failure _ -> err (Literal_overflow "float") (Loc.of_lexbuf lexbuf) }
|
|
| (int_literal as i) "l"
|
|
{ try INT32(Int32.of_string i, i)
|
|
with Failure _ -> err (Literal_overflow "int32") (Loc.of_lexbuf lexbuf) }
|
|
| (int_literal as i) "L"
|
|
{ try INT64(Int64.of_string i, i)
|
|
with Failure _ -> err (Literal_overflow "int64") (Loc.of_lexbuf lexbuf) }
|
|
| (int_literal as i) "n"
|
|
{ try NATIVEINT(Nativeint.of_string i, i)
|
|
with Failure _ -> err (Literal_overflow "nativeint") (Loc.of_lexbuf lexbuf) }
|
|
| '"'
|
|
{ with_curr_loc string c;
|
|
let s = buff_contents c in STRING (TokenEval.string s, s) }
|
|
| "'" (newline as x) "'"
|
|
{ update_loc c None 1 false 1; CHAR (TokenEval.char x, x) }
|
|
| "'" ( [^ '\\' '\010' '\013']
|
|
| '\\' (['\\' '"' 'n' 't' 'b' 'r' ' ' '\'']
|
|
|['0'-'9'] ['0'-'9'] ['0'-'9']
|
|
|'x' hexa_char hexa_char)
|
|
as x) "'" { CHAR (TokenEval.char x, x) }
|
|
| "'\\" (_ as c)
|
|
{ err (Illegal_escape (String.make 1 c)) (Loc.of_lexbuf lexbuf) }
|
|
| "(*"
|
|
{ store c; COMMENT(parse_nested comment (in_comment c)) }
|
|
| "(*)"
|
|
{ warn Comment_start (Loc.of_lexbuf lexbuf) ;
|
|
parse comment (in_comment c); COMMENT (buff_contents c) }
|
|
| "*)"
|
|
{ warn Comment_not_end (Loc.of_lexbuf lexbuf) ;
|
|
move_start_p (-1) c; SYMBOL "*" }
|
|
| "<<" (quotchar* as beginning)
|
|
{ if quotations c
|
|
then (move_start_p (-String.length beginning);
|
|
mk_quotation quotation c "" "" 2)
|
|
else parse (symbolchar_star ("<<" ^ beginning)) c }
|
|
| "<<>>"
|
|
{ if quotations c
|
|
then QUOTATION { q_name = ""; q_loc = ""; q_shift = 2; q_contents = "" }
|
|
else parse (symbolchar_star "<<>>") c }
|
|
| "<@"
|
|
{ if quotations c then with_curr_loc maybe_quotation_at c
|
|
else parse (symbolchar_star "<@") c }
|
|
| "<:"
|
|
{ if quotations c then with_curr_loc maybe_quotation_colon c
|
|
else parse (symbolchar_star "<:") c }
|
|
| "#" [' ' '\t']* (['0'-'9']+ as num) [' ' '\t']*
|
|
("\"" ([^ '\010' '\013' '"' ] * as name) "\"")?
|
|
[^ '\010' '\013'] * newline
|
|
{ let inum = int_of_string num
|
|
in update_loc c name inum true 0; LINE_DIRECTIVE(inum, name) }
|
|
| '(' (not_star_symbolchar as op) ')'
|
|
{ ESCAPED_IDENT (String.make 1 op) }
|
|
| '(' (not_star_symbolchar symbolchar* not_star_symbolchar as op) ')'
|
|
{ ESCAPED_IDENT op }
|
|
| '(' (not_star_symbolchar symbolchar* as op) blank+ ')'
|
|
{ ESCAPED_IDENT op }
|
|
| '(' blank+ (symbolchar* not_star_symbolchar as op) ')'
|
|
{ ESCAPED_IDENT op }
|
|
| '(' blank+ (symbolchar+ as op) blank+ ')'
|
|
{ ESCAPED_IDENT op }
|
|
| ( "#" | "`" | "'" | "," | "." | ".." | ":" | "::"
|
|
| ":=" | ":>" | ";" | ";;" | "_"
|
|
| left_delimitor | right_delimitor ) as x { SYMBOL x }
|
|
| '$' { if antiquots c
|
|
then with_curr_loc dollar (shift 1 c)
|
|
else parse (symbolchar_star "$") c }
|
|
| ['~' '?' '!' '=' '<' '>' '|' '&' '@' '^' '+' '-' '*' '/' '%' '\\'] symbolchar *
|
|
as x { SYMBOL x }
|
|
| eof
|
|
{ let pos = lexbuf.lex_curr_p in
|
|
lexbuf.lex_curr_p <- { pos with pos_bol = pos.pos_bol + 1 ;
|
|
pos_cnum = pos.pos_cnum + 1 }; EOI }
|
|
| _ as c { err (Illegal_character c) (Loc.of_lexbuf lexbuf) }
|
|
|
|
and comment c = parse
|
|
"(*"
|
|
{ store c; with_curr_loc comment c; parse comment c }
|
|
| "*)" { store c }
|
|
| '<' (':' ident)? ('@' locname)? '<'
|
|
{ store c;
|
|
if quotations c then with_curr_loc quotation c; parse comment c }
|
|
| ident { store_parse comment c }
|
|
| "\""
|
|
{ store c;
|
|
begin try with_curr_loc string c
|
|
with Loc.Exc_located(_, Error.E Unterminated_string) ->
|
|
err Unterminated_string_in_comment (loc c)
|
|
end;
|
|
Buffer.add_char c.buffer '"';
|
|
parse comment c }
|
|
| "''" { store_parse comment c }
|
|
| "'''" { store_parse comment c }
|
|
| "'" newline "'"
|
|
{ update_loc c None 1 false 1; store_parse comment c }
|
|
| "'" [^ '\\' '\'' '\010' '\013' ] "'" { store_parse comment c }
|
|
| "'\\" ['\\' '"' '\'' 'n' 't' 'b' 'r' ' '] "'" { store_parse comment c }
|
|
| "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" { store_parse comment c }
|
|
| "'\\" 'x' hexa_char hexa_char "'" { store_parse comment c }
|
|
| eof
|
|
{ err Unterminated_comment (loc c) }
|
|
| newline
|
|
{ update_loc c None 1 false 0; store_parse comment c }
|
|
| _ { store_parse comment c }
|
|
|
|
and string c = parse
|
|
'"' { set_start_p c }
|
|
| '\\' newline ([' ' '\t'] * as space)
|
|
{ update_loc c None 1 false (String.length space);
|
|
store_parse string c }
|
|
| '\\' ['\\' '"' 'n' 't' 'b' 'r' ' ' '\''] { store_parse string c }
|
|
| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] { store_parse string c }
|
|
| '\\' 'x' hexa_char hexa_char { store_parse string c }
|
|
| '\\' (_ as x)
|
|
{ if is_in_comment c
|
|
then store_parse string c
|
|
else begin
|
|
warn (Illegal_escape (String.make 1 x)) (Loc.of_lexbuf lexbuf);
|
|
store_parse string c
|
|
end }
|
|
| newline
|
|
{ update_loc c None 1 false 0; store_parse string c }
|
|
| eof { err Unterminated_string (loc c) }
|
|
| _ { store_parse string c }
|
|
|
|
and symbolchar_star beginning c = parse
|
|
| symbolchar* as tok { move_start_p (-String.length beginning) c ;
|
|
SYMBOL(beginning ^ tok) }
|
|
|
|
and maybe_quotation_at c = parse
|
|
| (ident as loc) '<'
|
|
{ mk_quotation quotation c "" loc (3 + String.length loc) }
|
|
| symbolchar* as tok { SYMBOL("<@" ^ tok) }
|
|
|
|
and maybe_quotation_colon c = parse
|
|
| (ident as name) '<'
|
|
{ mk_quotation quotation c name "" (3 + String.length name) }
|
|
| (ident as name) '@' (locname as loc) '<'
|
|
{ mk_quotation quotation c name loc
|
|
(4 + String.length loc + String.length name) }
|
|
| symbolchar* as tok { SYMBOL("<:" ^ tok) }
|
|
|
|
and quotation c = parse
|
|
| '<' (':' ident)? ('@' locname)? '<' { store c ;
|
|
with_curr_loc quotation c ;
|
|
parse quotation c }
|
|
| ">>" { store c }
|
|
| eof { err Unterminated_quotation (loc c) }
|
|
| newline { update_loc c None 1 false 0 ;
|
|
store_parse quotation c }
|
|
| _ { store_parse quotation c }
|
|
|
|
and dollar c = parse
|
|
| '$' { set_start_p c; ANTIQUOT("", "") }
|
|
| ('`'? (identchar*|'.'+) as name) ':'
|
|
{ with_curr_loc (antiquot name) (shift (1 + String.length name) c) }
|
|
| _ { store_parse (antiquot "") c }
|
|
|
|
and antiquot name c = parse
|
|
| '$' { set_start_p c; ANTIQUOT(name, buff_contents c) }
|
|
| eof { err Unterminated_antiquot (loc c) }
|
|
| newline
|
|
{ update_loc c None 1 false 0; store_parse (antiquot name) c }
|
|
| '<' (':' ident)? ('@' locname)? '<'
|
|
{ store c; with_curr_loc quotation c; parse (antiquot name) c }
|
|
| _ { store_parse (antiquot name) c }
|
|
|
|
{
|
|
|
|
let lexing_store s buff max =
|
|
let rec self n s =
|
|
if n >= max then n
|
|
else
|
|
match Stream.peek s with
|
|
| Some x ->
|
|
Stream.junk s;
|
|
buff.[n] <- x;
|
|
succ n
|
|
| _ -> n
|
|
in
|
|
self 0 s
|
|
|
|
let from_context c =
|
|
let next _ =
|
|
let tok = with_curr_loc token c in
|
|
let loc = Loc.of_lexbuf c.lexbuf in
|
|
Some ((tok, loc))
|
|
in Stream.from next
|
|
|
|
let from_lexbuf ?(quotations = true) lb =
|
|
let c = { (default_context lb) with
|
|
loc = Loc.of_lexbuf lb;
|
|
antiquots = !Camlp4_config.antiquotations;
|
|
quotations = quotations }
|
|
in from_context c
|
|
|
|
let setup_loc lb loc =
|
|
let start_pos = Loc.start_pos loc in
|
|
lb.lex_abs_pos <- start_pos.pos_cnum;
|
|
lb.lex_curr_p <- start_pos
|
|
|
|
let from_string ?quotations loc str =
|
|
let lb = Lexing.from_string str in
|
|
setup_loc lb loc;
|
|
from_lexbuf ?quotations lb
|
|
|
|
let from_stream ?quotations loc strm =
|
|
let lb = Lexing.from_function (lexing_store strm) in
|
|
setup_loc lb loc;
|
|
from_lexbuf ?quotations lb
|
|
|
|
let mk () loc strm =
|
|
from_stream ~quotations:!Camlp4_config.quotations loc strm
|
|
end
|
|
}
|