ocaml/stdlib/genlex.mli

(***********************************************************************)
(*                                                                     *)
(*                           Objective Caml                            *)
(*                                                                     *)
(*              Xavier Leroy, projet Cristal, INRIA Rocquencourt       *)
(*                                                                     *)
(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
(*  en Automatique.  All rights reserved.  This file is distributed    *)
(*  under the terms of the GNU Library General Public License.         *)
(*                                                                     *)
(***********************************************************************)

(* $Id$ *)

(** A generic lexical analyzer.


   This module implements a simple ``standard'' lexical analyzer, presented
   as a function from character streams to token streams. It implements
   roughly the lexical conventions of Caml, but is parameterized by the
   set of keywords of your language. 


   Example: a lexer suitable for a desk calculator is obtained by
   {[     let lexer = make_lexer ["+";"-";"*";"/";"let";"="; "("; ")"]  ]}

   The associated parser would be a function from [token stream]
   to, for instance, [int], and would have rules such as:

   {[
           let parse_expr = parser
                  [< 'Int n >] -> n
                | [< 'Kwd "("; n = parse_expr; 'Kwd ")" >] -> n
                | [< n1 = parse_expr; n2 = parse_remainder n1 >] -> n2
           and parse_remainder n1 = parser
                  [< 'Kwd "+"; n2 = parse_expr >] -> n1+n2
                | ...
   ]}
*)

(** The type of tokens. The lexical classes are: [Int] and [Float]
   for integer and floating-point numbers; [String] for
   string literals, enclosed in double quotes; [Char] for
   character literals, enclosed in single quotes; [Ident] for
   identifiers (either sequences of letters, digits, underscores
   and quotes, or sequences of ``operator characters'' such as
   [+], [*], etc); and [Kwd] for keywords (either identifiers or
   single ``special characters'' such as [(], [}], etc). *)
type token =
    Kwd of string
  | Ident of string
  | Int of int
  | Float of float
  | String of string
  | Char of char
           
val make_lexer : string list -> char Stream.t -> token Stream.t
(** Construct the lexer function. The first argument is the list of
   keywords. An identifier [s] is returned as [Kwd s] if [s]
   belongs to this list, and as [Ident s] otherwise.
   A special character [s] is returned as [Kwd s] if [s]
   belongs to this list, and cause a lexical error (exception
   [Parse_error]) otherwise. Blanks and newlines are skipped.
   Comments delimited by [(*] and [*)] are skipped as well,
   and can be nested. *)
Pervasives: ajout option Open_nonblock Stream: commentaire. Makefile: ajout de Genlex. git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@769 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 1996-04-29 06:23:25 -07:00			`(***********************************************************************)`
			`(* *)`
Renommage en Objective Caml git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@782 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 1996-04-30 07:53:58 -07:00			`(* Objective Caml *)`
Pervasives: ajout option Open_nonblock Stream: commentaire. Makefile: ajout de Genlex. git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@769 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 1996-04-29 06:23:25 -07:00			`(* *)`
			`(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)`
			`(* *)`
Renommage en Objective Caml git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@782 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 1996-04-30 07:53:58 -07:00			`(* Copyright 1996 Institut National de Recherche en Informatique et *)`
Changement de la licence git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@2553 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 1999-11-17 10:59:06 -08:00			`(* en Automatique. All rights reserved. This file is distributed *)`
			`(* under the terms of the GNU Library General Public License. *)`
Pervasives: ajout option Open_nonblock Stream: commentaire. Makefile: ajout de Genlex. git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@769 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 1996-04-29 06:23:25 -07:00			`(* *)`
			`(***********************************************************************)`

			`(* $Id$ *)`

Modification emplacement et syntaxe des commentaires pour OCamldoc git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3924 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-10-26 15:37:14 -07:00			`(** A generic lexical analyzer.`
Pervasives: ajout option Open_nonblock Stream: commentaire. Makefile: ajout de Genlex. git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@769 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 1996-04-29 06:23:25 -07:00
Modification emplacement et syntaxe des commentaires pour OCamldoc git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3924 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-10-26 15:37:14 -07:00
			This module implements a simple ``standard'' lexical analyzer, presented
Pervasives: ajout option Open_nonblock Stream: commentaire. Makefile: ajout de Genlex. git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@769 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 1996-04-29 06:23:25 -07:00			`as a function from character streams to token streams. It implements`
			`roughly the lexical conventions of Caml, but is parameterized by the`
Modification emplacement et syntaxe des commentaires pour OCamldoc git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3924 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-10-26 15:37:14 -07:00			`set of keywords of your language.`


			`Example: a lexer suitable for a desk calculator is obtained by`
			`{[ let lexer = make_lexer ["+";"-";"*";"/";"let";"="; "("; ")"] ]}`
Pervasives: ajout option Open_nonblock Stream: commentaire. Makefile: ajout de Genlex. git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@769 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 1996-04-29 06:23:25 -07:00
Modification emplacement et syntaxe des commentaires pour OCamldoc git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3924 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-10-26 15:37:14 -07:00			`The associated parser would be a function from [token stream]`
			`to, for instance, [int], and would have rules such as:`

			`{[`
			`let parse_expr = parser`
			`[< 'Int n >] -> n`
			`\| [< 'Kwd "("; n = parse_expr; 'Kwd ")" >] -> n`
			`\| [< n1 = parse_expr; n2 = parse_remainder n1 >] -> n2`
			`and parse_remainder n1 = parser`
			`[< 'Kwd "+"; n2 = parse_expr >] -> n1+n2`
			`\| ...`
			`]}`
			`*)`

			`(** The type of tokens. The lexical classes are: [Int] and [Float]`
			`for integer and floating-point numbers; [String] for`
			`string literals, enclosed in double quotes; [Char] for`
			`character literals, enclosed in single quotes; [Ident] for`
			`identifiers (either sequences of letters, digits, underscores`
			and quotes, or sequences of ``operator characters'' such as
			`[+], [*], etc); and [Kwd] for keywords (either identifiers or`
			single ``special characters'' such as [(], [}], etc). *)
correction commentaire git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@4084 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-12-03 14:47:17 -08:00			`type token =`
			`Kwd of string`
			`\| Ident of string`
			`\| Int of int`
			`\| Float of float`
			`\| String of string`
			`\| Char of char`
Pervasives: ajout option Open_nonblock Stream: commentaire. Makefile: ajout de Genlex. git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@769 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 1996-04-29 06:23:25 -07:00
commentaires après git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@4082 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-12-03 14:01:28 -08:00			`val make_lexer : string list -> char Stream.t -> token Stream.t`
Modification emplacement et syntaxe des commentaires pour OCamldoc git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3924 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-10-26 15:37:14 -07:00			`(** Construct the lexer function. The first argument is the list of`
			`keywords. An identifier [s] is returned as [Kwd s] if [s]`
			`belongs to this list, and as [Ident s] otherwise.`
			`A special character [s] is returned as [Kwd s] if [s]`
			`belongs to this list, and cause a lexical error (exception`
			`[Parse_error]) otherwise. Blanks and newlines are skipped.`
			`Comments delimited by [(] and [)] are skipped as well,`
			`and can be nested. *)`