ocaml/camlp4/lib/token.mli

(* camlp4r *)
(***********************************************************************)
(*                                                                     *)
(*                             Camlp4                                  *)
(*                                                                     *)
(*        Daniel de Rauglaudre, projet Cristal, INRIA Rocquencourt     *)
(*                                                                     *)
(*  Copyright 2001 Institut National de Recherche en Informatique et   *)
(*  Automatique.  Distributed only by permission.                      *)
(*                                                                     *)
(***********************************************************************)

(* $Id$ *)

(** Lexers for Camlp4 grammars.

   This module defines the Camlp4 lexer type to be used in extensible
   grammars (see module [Grammar]). It also provides some useful functions
   to create lexers (this module should be renamed [Glexer] one day). *)

type pattern = (string * string);
    (** Token patterns come from the EXTEND statement.
-       The first string is the constructor name (must start with
        an uppercase character). When it is empty, the second string
        is supposed to be a keyword.
-       The second string is the constructor parameter. Empty if it
        has no parameter.
-       The way tokens patterns are interpreted to parse tokens is
        done by the lexer, function [tok_match] below. *)

exception Error of string;
    (** An lexing error exception to be used by lexers. *)

(** {6 Lexer type} *)

type location = (int * int);
type location_function = int -> location;
  (** The type for a function associating a number of a token in a stream
      (starting from 0) to its source location. *)
type lexer_func 'te = Stream.t char -> (Stream.t 'te * location_function);
  (** The type for a lexer function. The character stream is the input
      stream to be lexed. The result is a pair of a token stream and
      a location function for this tokens stream. *)

type glexer 'te =
  { tok_func : lexer_func 'te;
    tok_using : pattern -> unit;
    tok_removing : pattern -> unit;
    tok_match : pattern -> 'te -> string;
    tok_text : pattern -> string;
    tok_comm : mutable option (list location) }
;
   (** The type for a lexer used by Camlp4 grammars.
-      The field [tok_func] is the main lexer function. See [lexer_func]
       type above. This function may be created from a [char stream parser]
       or for an [ocamllex] function using the functions below.
-      The field [tok_using] is a function telling the lexer that the grammar
       uses this token (pattern). The lexer can check that its constructor
       is correct, and interpret some kind of tokens as keywords (to record
       them in its tables). Called by [EXTEND] statements.
-      The field [tok_removing] is a function telling the lexer that the
       grammar does not uses the given token (pattern) any more. If the
       lexer has a notion of "keywords", it can release it from its tables.
       Called by [DELETE_RULE] statements.
-      The field [tok_match] is a function taking a pattern and returning
       a function matching a token against the pattern. Warning: for
       efficency, write it as a function returning functions according
       to the values of the pattern, not a function with two parameters.
-      The field [tok_text] returns the name of some token pattern,
       used in error messages.
-      The field [tok_comm] if not None asks the lexer to record the
       locations of the comments.  *)

value lexer_text : pattern -> string;
   (** A simple [tok_text] function for lexers *)

value default_match : pattern -> (string * string) -> string;
   (** A simple [tok_match] function for lexers, appling to token type
       [(string * string)] *)

(** {6 Lexers from char stream parsers or ocamllex function}

   The functions below create lexer functions either from a [char stream]
   parser or for an [ocamllex] function. With the returned function [f],
   the simplest [Token.lexer] can be written:
   {[
          { Token.tok_func = f;
            Token.tok_using = (fun _ -> ());
            Token.tok_removing = (fun _ -> ());
            Token.tok_match = Token.default_match;
            Token.tok_text = Token.lexer_text }
   ]}
   Note that a better [tok_using] function should check the used tokens
   and raise [Token.Error] for incorrect ones. The other functions
   [tok_removing], [tok_match] and [tok_text] may have other implementations
   as well. *)

value lexer_func_of_parser :
  (Stream.t char -> ('te * location)) -> lexer_func 'te;
   (** A lexer function from a lexer written as a char stream parser
       returning the next token and its location. *)
value lexer_func_of_ocamllex : (Lexing.lexbuf -> 'te) -> lexer_func 'te;
   (** A lexer function from a lexer created by [ocamllex] *)

value make_stream_and_location :
  (unit -> ('te * location)) -> (Stream.t 'te * location_function);
   (** General function *)

(** {6 Useful functions} *)

value eval_char : string -> char;
   (** Convert a char token, where the escape sequences (backslashes)
       remain to be interpreted; raise [Failure] if an
       incorrect backslash sequence is found; [Token.eval_char (Char.escaped c)]
       returns [c] *)

value eval_string : location -> string -> string;
   (** Convert a string token, where the escape sequences (backslashes)
       remain to be interpreted; issue a warning if an incorrect
       backslash sequence is found;
       [Token.eval_string loc (String.escaped s)] returns [s] *)

(**/**)

(* deprecated since version 3.05; use rather type glexer *)
type t = (string * string);
type lexer =
  { func : lexer_func t;
    using : pattern -> unit;
    removing : pattern -> unit;
    tparse : pattern -> option (Stream.t t -> string);
    text : pattern -> string }
;
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00			`(* camlp4r *)`
			`(***********************************************************************)`
			`(* *)`
			`(* Camlp4 *)`
			`(* *)`
			`(* Daniel de Rauglaudre, projet Cristal, INRIA Rocquencourt *)`
			`(* *)`
			`(* Copyright 2001 Institut National de Recherche en Informatique et *)`
			`(* Automatique. Distributed only by permission. *)`
			`(* *)`
			`(***********************************************************************)`

			`(* $Id$ *)`

MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`(** Lexers for Camlp4 grammars.`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`This module defines the Camlp4 lexer type to be used in extensible`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00			`grammars (see module [Grammar]). It also provides some useful functions`
			`to create lexers (this module should be renamed [Glexer] one day). *)`

			`type pattern = (string * string);`
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`(** Token patterns come from the EXTEND statement.`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00			`- The first string is the constructor name (must start with`
			`an uppercase character). When it is empty, the second string`
			`is supposed to be a keyword.`
			`- The second string is the constructor parameter. Empty if it`
			`has no parameter.`
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`- The way tokens patterns are interpreted to parse tokens is`
			`done by the lexer, function [tok_match] below. *)`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00
			`exception Error of string;`
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`(** An lexing error exception to be used by lexers. *)`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`(** {6 Lexer type} *)`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00
			`type location = (int * int);`
			`type location_function = int -> location;`
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`(** The type for a function associating a number of a token in a stream`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00			`(starting from 0) to its source location. *)`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@4422 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-02-16 10:44:22 -08:00			`type lexer_func 'te = Stream.t char -> (Stream.t 'te * location_function);`
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`(** The type for a lexer function. The character stream is the input`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00			`stream to be lexed. The result is a pair of a token stream and`
			`a location function for this tokens stream. *)`

- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@4422 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-02-16 10:44:22 -08:00			`type glexer 'te =`
			`{ tok_func : lexer_func 'te;`
			`tok_using : pattern -> unit;`
			`tok_removing : pattern -> unit;`
			`tok_match : pattern -> 'te -> string;`
* empty log message * git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5683 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2003-07-10 05:28:35 -07:00			`tok_text : pattern -> string;`
			`tok_comm : mutable option (list location) }`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00			`;`
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`(** The type for a lexer used by Camlp4 grammars.`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@4422 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-02-16 10:44:22 -08:00			`- The field [tok_func] is the main lexer function. See [lexer_func]`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00			`type above. This function may be created from a [char stream parser]`
			`or for an [ocamllex] function using the functions below.`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@4422 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-02-16 10:44:22 -08:00			`- The field [tok_using] is a function telling the lexer that the grammar`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00			`uses this token (pattern). The lexer can check that its constructor`
			`is correct, and interpret some kind of tokens as keywords (to record`
			`them in its tables). Called by [EXTEND] statements.`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@4422 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-02-16 10:44:22 -08:00			`- The field [tok_removing] is a function telling the lexer that the`
			`grammar does not uses the given token (pattern) any more. If the`
			`lexer has a notion of "keywords", it can release it from its tables.`
			`Called by [DELETE_RULE] statements.`
			`- The field [tok_match] is a function taking a pattern and returning`
			`a function matching a token against the pattern. Warning: for`
			`efficency, write it as a function returning functions according`
			`to the values of the pattern, not a function with two parameters.`
			`- The field [tok_text] returns the name of some token pattern,`
* empty log message * git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5683 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2003-07-10 05:28:35 -07:00			`used in error messages.`
			`- The field [tok_comm] if not None asks the lexer to record the`
			`locations of the comments. *)`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00
			`value lexer_text : pattern -> string;`
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`(** A simple [tok_text] function for lexers *)`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@4422 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-02-16 10:44:22 -08:00
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`value default_match : pattern -> (string * string) -> string;`
			`(** A simple [tok_match] function for lexers, appling to token type`
			`[(string * string)] *)`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`(** {6 Lexers from char stream parsers or ocamllex function}`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`The functions below create lexer functions either from a [char stream]`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00			`parser or for an [ocamllex] function. With the returned function [f],`
			`the simplest [Token.lexer] can be written:`
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`{[`
			`{ Token.tok_func = f;`
			`Token.tok_using = (fun _ -> ());`
			`Token.tok_removing = (fun _ -> ());`
			`Token.tok_match = Token.default_match;`
			`Token.tok_text = Token.lexer_text }`
			`]}`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@4422 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-02-16 10:44:22 -08:00			`Note that a better [tok_using] function should check the used tokens`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00			`and raise [Token.Error] for incorrect ones. The other functions`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@4422 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-02-16 10:44:22 -08:00			`[tok_removing], [tok_match] and [tok_text] may have other implementations`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00			`as well. *)`

- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@4422 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-02-16 10:44:22 -08:00			`value lexer_func_of_parser :`
			`(Stream.t char -> ('te * location)) -> lexer_func 'te;`
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`(** A lexer function from a lexer written as a char stream parser`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00			`returning the next token and its location. *)`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@4422 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-02-16 10:44:22 -08:00			`value lexer_func_of_ocamllex : (Lexing.lexbuf -> 'te) -> lexer_func 'te;`
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`(** A lexer function from a lexer created by [ocamllex] *)`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00
			`value make_stream_and_location :`
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`(unit -> ('te * location)) -> (Stream.t 'te * location_function);`
			`(** General function *)`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`(** {6 Useful functions} *)`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3706 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-07 00:32:09 -07:00
			`value eval_char : string -> char;`
Added warning for illegal escapes sequences in strings git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5942 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2003-11-21 05:36:42 -08:00			`(** Convert a char token, where the escape sequences (backslashes)`
			`remain to be interpreted; raise [Failure] if an`
			`incorrect backslash sequence is found; [Token.eval_char (Char.escaped c)]`
			`returns [c] *)`

			`value eval_string : location -> string -> string;`
			`(** Convert a string token, where the escape sequences (backslashes)`
			`remain to be interpreted; issue a warning if an incorrect`
			`backslash sequence is found;`
			`[Token.eval_string loc (String.escaped s)] returns [s] *)`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@4422 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-02-16 10:44:22 -08:00
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`(/)`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@4422 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-02-16 10:44:22 -08:00
MAJ pour 3.05 git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@5007 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-07-19 07:53:56 -07:00			`(* deprecated since version 3.05; use rather type glexer *)`
			`type t = (string * string);`
- git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@4422 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2002-02-16 10:44:22 -08:00			`type lexer =`
			`{ func : lexer_func t;`
			`using : pattern -> unit;`
			`removing : pattern -> unit;`
			`tparse : pattern -> option (Stream.t t -> string);`
			`text : pattern -> string }`
			`;`