2001-09-07 00:32:09 -07:00
|
|
|
(* camlp4r *)
|
|
|
|
(***********************************************************************)
|
|
|
|
(* *)
|
|
|
|
(* Camlp4 *)
|
|
|
|
(* *)
|
|
|
|
(* Daniel de Rauglaudre, projet Cristal, INRIA Rocquencourt *)
|
|
|
|
(* *)
|
|
|
|
(* Copyright 2001 Institut National de Recherche en Informatique et *)
|
|
|
|
(* Automatique. Distributed only by permission. *)
|
|
|
|
(* *)
|
|
|
|
(***********************************************************************)
|
|
|
|
|
|
|
|
(* $Id$ *)
|
|
|
|
|
2002-07-19 07:53:56 -07:00
|
|
|
(** Lexers for Camlp4 grammars.
|
2001-09-07 00:32:09 -07:00
|
|
|
|
2002-07-19 07:53:56 -07:00
|
|
|
This module defines the Camlp4 lexer type to be used in extensible
|
2001-09-07 00:32:09 -07:00
|
|
|
grammars (see module [Grammar]). It also provides some useful functions
|
|
|
|
to create lexers (this module should be renamed [Glexer] one day). *)
|
|
|
|
|
|
|
|
type pattern = (string * string);
|
2002-07-19 07:53:56 -07:00
|
|
|
(** Token patterns come from the EXTEND statement.
|
2001-09-07 00:32:09 -07:00
|
|
|
- The first string is the constructor name (must start with
|
|
|
|
an uppercase character). When it is empty, the second string
|
|
|
|
is supposed to be a keyword.
|
|
|
|
- The second string is the constructor parameter. Empty if it
|
|
|
|
has no parameter.
|
2002-07-19 07:53:56 -07:00
|
|
|
- The way tokens patterns are interpreted to parse tokens is
|
|
|
|
done by the lexer, function [tok_match] below. *)
|
2001-09-07 00:32:09 -07:00
|
|
|
|
|
|
|
exception Error of string;
|
2002-07-19 07:53:56 -07:00
|
|
|
(** An lexing error exception to be used by lexers. *)
|
2001-09-07 00:32:09 -07:00
|
|
|
|
2002-07-19 07:53:56 -07:00
|
|
|
(** {6 Lexer type} *)
|
2001-09-07 00:32:09 -07:00
|
|
|
|
|
|
|
type location = (int * int);
|
|
|
|
type location_function = int -> location;
|
2002-07-19 07:53:56 -07:00
|
|
|
(** The type for a function associating a number of a token in a stream
|
2001-09-07 00:32:09 -07:00
|
|
|
(starting from 0) to its source location. *)
|
2002-02-16 10:44:22 -08:00
|
|
|
type lexer_func 'te = Stream.t char -> (Stream.t 'te * location_function);
|
2002-07-19 07:53:56 -07:00
|
|
|
(** The type for a lexer function. The character stream is the input
|
2001-09-07 00:32:09 -07:00
|
|
|
stream to be lexed. The result is a pair of a token stream and
|
|
|
|
a location function for this tokens stream. *)
|
|
|
|
|
2002-02-16 10:44:22 -08:00
|
|
|
type glexer 'te =
|
|
|
|
{ tok_func : lexer_func 'te;
|
|
|
|
tok_using : pattern -> unit;
|
|
|
|
tok_removing : pattern -> unit;
|
|
|
|
tok_match : pattern -> 'te -> string;
|
2003-07-10 05:28:35 -07:00
|
|
|
tok_text : pattern -> string;
|
|
|
|
tok_comm : mutable option (list location) }
|
2001-09-07 00:32:09 -07:00
|
|
|
;
|
2002-07-19 07:53:56 -07:00
|
|
|
(** The type for a lexer used by Camlp4 grammars.
|
2002-02-16 10:44:22 -08:00
|
|
|
- The field [tok_func] is the main lexer function. See [lexer_func]
|
2001-09-07 00:32:09 -07:00
|
|
|
type above. This function may be created from a [char stream parser]
|
|
|
|
or for an [ocamllex] function using the functions below.
|
2002-02-16 10:44:22 -08:00
|
|
|
- The field [tok_using] is a function telling the lexer that the grammar
|
2001-09-07 00:32:09 -07:00
|
|
|
uses this token (pattern). The lexer can check that its constructor
|
|
|
|
is correct, and interpret some kind of tokens as keywords (to record
|
|
|
|
them in its tables). Called by [EXTEND] statements.
|
2002-02-16 10:44:22 -08:00
|
|
|
- The field [tok_removing] is a function telling the lexer that the
|
|
|
|
grammar does not uses the given token (pattern) any more. If the
|
|
|
|
lexer has a notion of "keywords", it can release it from its tables.
|
|
|
|
Called by [DELETE_RULE] statements.
|
|
|
|
- The field [tok_match] is a function taking a pattern and returning
|
|
|
|
a function matching a token against the pattern. Warning: for
|
|
|
|
efficency, write it as a function returning functions according
|
|
|
|
to the values of the pattern, not a function with two parameters.
|
|
|
|
- The field [tok_text] returns the name of some token pattern,
|
2003-07-10 05:28:35 -07:00
|
|
|
used in error messages.
|
|
|
|
- The field [tok_comm] if not None asks the lexer to record the
|
|
|
|
locations of the comments. *)
|
2001-09-07 00:32:09 -07:00
|
|
|
|
|
|
|
value lexer_text : pattern -> string;
|
2002-07-19 07:53:56 -07:00
|
|
|
(** A simple [tok_text] function for lexers *)
|
2002-02-16 10:44:22 -08:00
|
|
|
|
2002-07-19 07:53:56 -07:00
|
|
|
value default_match : pattern -> (string * string) -> string;
|
|
|
|
(** A simple [tok_match] function for lexers, appling to token type
|
|
|
|
[(string * string)] *)
|
2001-09-07 00:32:09 -07:00
|
|
|
|
2002-07-19 07:53:56 -07:00
|
|
|
(** {6 Lexers from char stream parsers or ocamllex function}
|
2001-09-07 00:32:09 -07:00
|
|
|
|
2002-07-19 07:53:56 -07:00
|
|
|
The functions below create lexer functions either from a [char stream]
|
2001-09-07 00:32:09 -07:00
|
|
|
parser or for an [ocamllex] function. With the returned function [f],
|
|
|
|
the simplest [Token.lexer] can be written:
|
2002-07-19 07:53:56 -07:00
|
|
|
{[
|
|
|
|
{ Token.tok_func = f;
|
|
|
|
Token.tok_using = (fun _ -> ());
|
|
|
|
Token.tok_removing = (fun _ -> ());
|
|
|
|
Token.tok_match = Token.default_match;
|
|
|
|
Token.tok_text = Token.lexer_text }
|
|
|
|
]}
|
2002-02-16 10:44:22 -08:00
|
|
|
Note that a better [tok_using] function should check the used tokens
|
2001-09-07 00:32:09 -07:00
|
|
|
and raise [Token.Error] for incorrect ones. The other functions
|
2002-02-16 10:44:22 -08:00
|
|
|
[tok_removing], [tok_match] and [tok_text] may have other implementations
|
2001-09-07 00:32:09 -07:00
|
|
|
as well. *)
|
|
|
|
|
2002-02-16 10:44:22 -08:00
|
|
|
value lexer_func_of_parser :
|
|
|
|
(Stream.t char -> ('te * location)) -> lexer_func 'te;
|
2002-07-19 07:53:56 -07:00
|
|
|
(** A lexer function from a lexer written as a char stream parser
|
2001-09-07 00:32:09 -07:00
|
|
|
returning the next token and its location. *)
|
2002-02-16 10:44:22 -08:00
|
|
|
value lexer_func_of_ocamllex : (Lexing.lexbuf -> 'te) -> lexer_func 'te;
|
2002-07-19 07:53:56 -07:00
|
|
|
(** A lexer function from a lexer created by [ocamllex] *)
|
2001-09-07 00:32:09 -07:00
|
|
|
|
|
|
|
value make_stream_and_location :
|
2002-07-19 07:53:56 -07:00
|
|
|
(unit -> ('te * location)) -> (Stream.t 'te * location_function);
|
|
|
|
(** General function *)
|
2001-09-07 00:32:09 -07:00
|
|
|
|
2002-07-19 07:53:56 -07:00
|
|
|
(** {6 Useful functions} *)
|
2001-09-07 00:32:09 -07:00
|
|
|
|
|
|
|
value eval_char : string -> char;
|
2003-11-21 05:36:42 -08:00
|
|
|
(** Convert a char token, where the escape sequences (backslashes)
|
|
|
|
remain to be interpreted; raise [Failure] if an
|
|
|
|
incorrect backslash sequence is found; [Token.eval_char (Char.escaped c)]
|
|
|
|
returns [c] *)
|
|
|
|
|
|
|
|
value eval_string : location -> string -> string;
|
|
|
|
(** Convert a string token, where the escape sequences (backslashes)
|
|
|
|
remain to be interpreted; issue a warning if an incorrect
|
|
|
|
backslash sequence is found;
|
|
|
|
[Token.eval_string loc (String.escaped s)] returns [s] *)
|
2002-02-16 10:44:22 -08:00
|
|
|
|
2002-07-19 07:53:56 -07:00
|
|
|
(**/**)
|
2002-02-16 10:44:22 -08:00
|
|
|
|
2002-07-19 07:53:56 -07:00
|
|
|
(* deprecated since version 3.05; use rather type glexer *)
|
|
|
|
type t = (string * string);
|
2002-02-16 10:44:22 -08:00
|
|
|
type lexer =
|
|
|
|
{ func : lexer_func t;
|
|
|
|
using : pattern -> unit;
|
|
|
|
removing : pattern -> unit;
|
|
|
|
tparse : pattern -> option (Stream.t t -> string);
|
|
|
|
text : pattern -> string }
|
|
|
|
;
|