ocaml/tools/lexer301.mll

(***********************************************************************)
(*                                                                     *)
(*                           Objective Caml                            *)
(*                                                                     *)
(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
(*                                                                     *)
(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
(*  en Automatique.  All rights reserved.  This file is distributed    *)
(*  under the terms of the Q Public License version 1.0.               *)
(*                                                                     *)
(***********************************************************************)

(* $Id$ *)

(* The lexer definition *)

{
open Misc

type token =
    AMPERAMPER
  | AMPERSAND
  | AND
  | AS
  | ASSERT
  | BACKQUOTE
  | BAR
  | BARBAR
  | BARRBRACKET
  | BEGIN
  | CHAR of (char)
  | CLASS
  | COLON
  | COLONCOLON
  | COLONEQUAL
  | COLONGREATER
  | COMMA
  | CONSTRAINT
  | DO
  | DONE
  | DOT
  | DOTDOT
  | DOWNTO
  | ELSE
  | END
  | EOF
  | EQUAL
  | EXCEPTION
  | EXTERNAL
  | FALSE
  | FLOAT of (string)
  | FOR
  | FUN
  | FUNCTION
  | FUNCTOR
  | GREATER
  | GREATERRBRACE
  | GREATERRBRACKET
  | IF
  | IN
  | INCLUDE
  | INFIXOP0 of (string)
  | INFIXOP1 of (string)
  | INFIXOP2 of (string)
  | INFIXOP3 of (string)
  | INFIXOP4 of (string)
  | INHERIT
  | INITIALIZER
  | INT of (int)
  | LABEL of (string)
  | LAZY
  | LBRACE
  | LBRACELESS
  | LBRACKET
  | LBRACKETBAR
  | LBRACKETLESS
  | LESS
  | LESSMINUS
  | LET
  | LIDENT of (string)
  | LPAREN
  | MATCH
  | METHOD
  | MINUS
  | MINUSDOT
  | MINUSGREATER
  | MODULE
  | MUTABLE
  | NEW
  | OBJECT
  | OF
  | OPEN
  | OPTLABEL of (string)
  | OR
  | PARSER
  | PLUS
  | PREFIXOP of (string)
  | PRIVATE
  | QUESTION
  | QUESTION2
  | QUOTE
  | RBRACE
  | RBRACKET
  | REC
  | RPAREN
  | SEMI
  | SEMISEMI
  | SHARP
  | SIG
  | STAR
  | STRING of (string)
  | STRUCT
  | THEN
  | TILDE
  | TO
  | TRUE
  | TRY
  | TYPE
  | UIDENT of (string)
  | UNDERSCORE
  | VAL
  | VIRTUAL
  | WHEN
  | WHILE
  | WITH

type error =
  | Illegal_character of char
  | Unterminated_comment
  | Unterminated_string
  | Unterminated_string_in_comment
  | Keyword_as_label of string
;;

exception Error of error * int * int

(* The table of keywords *)

let keyword_table =
  create_hashtable 149 [
    "and", AND;
    "as", AS;
    "assert", ASSERT;
    "begin", BEGIN;
    "class", CLASS;
    "constraint", CONSTRAINT;
    "do", DO;
    "done", DONE;
    "downto", DOWNTO;
    "else", ELSE;
    "end", END;
    "exception", EXCEPTION;
    "external", EXTERNAL;
    "false", FALSE;
    "for", FOR;
    "fun", FUN;
    "function", FUNCTION;
    "functor", FUNCTOR;
    "if", IF;
    "in", IN;
    "include", INCLUDE;
    "inherit", INHERIT;
    "initializer", INITIALIZER;
    "lazy", LAZY;
    "let", LET;
    "match", MATCH;
    "method", METHOD;
    "module", MODULE;
    "mutable", MUTABLE;
    "new", NEW;
    "object", OBJECT;
    "of", OF;
    "open", OPEN;
    "or", OR;
    "parser", PARSER;
    "private", PRIVATE;
    "rec", REC;
    "sig", SIG;
    "struct", STRUCT;
    "then", THEN;
    "to", TO;
    "true", TRUE;
    "try", TRY;
    "type", TYPE;
    "val", VAL;
    "virtual", VIRTUAL;
    "when", WHEN;
    "while", WHILE;
    "with", WITH;

    "mod", INFIXOP3("mod");
    "land", INFIXOP3("land");
    "lor", INFIXOP3("lor");
    "lxor", INFIXOP3("lxor");
    "lsl", INFIXOP4("lsl");
    "lsr", INFIXOP4("lsr");
    "asr", INFIXOP4("asr")
]

(* To buffer string literals *)

let initial_string_buffer = String.create 256
let string_buff = ref initial_string_buffer
let string_index = ref 0

let reset_string_buffer () =
  string_buff := initial_string_buffer;
  string_index := 0

let store_string_char c =
  if !string_index >= String.length (!string_buff) then begin
    let new_buff = String.create (String.length (!string_buff) * 2) in
      String.blit (!string_buff) 0 new_buff 0 (String.length (!string_buff));
      string_buff := new_buff
  end;
  String.unsafe_set (!string_buff) (!string_index) c;
  incr string_index

let get_stored_string () =
  let s = String.sub (!string_buff) 0 (!string_index) in
  string_buff := initial_string_buffer;
  s

(* To translate escape sequences *)

let char_for_backslash =
  match Sys.os_type with
  | "Unix" | "Win32" | "Cygwin" ->
      begin function
      | 'n' -> '\010'
      | 'r' -> '\013'
      | 'b' -> '\008'
      | 't' -> '\009'
      | c   -> c
      end
  | "MacOS" ->
      begin function
      | 'n' -> '\013'
      | 'r' -> '\010'
      | 'b' -> '\008'
      | 't' -> '\009'
      | c   -> c
      end
  | x -> fatal_error "Lexer: unknown system type"

let char_for_decimal_code lexbuf i =
  let c = 100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) +
           10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +
                (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48) in  
  Char.chr(c land 0xFF)

(* To store the position of the beginning of a string and comment *)
let string_start_pos = ref 0;;
let comment_start_pos = ref [];;
let in_comment () = !comment_start_pos <> [];;

(* Error report *)

open Format

let report_error ppf = function
  | Illegal_character c ->
      fprintf ppf "Illegal character (%s)" (Char.escaped c)
  | Unterminated_comment ->
      fprintf ppf "Comment not terminated"
  | Unterminated_string ->
      fprintf ppf "String literal not terminated"
  | Unterminated_string_in_comment ->
      fprintf ppf "This comment contains an unterminated string literal"
  | Keyword_as_label kwd ->
      fprintf ppf "`%s' is a keyword, it cannot be used as label name" kwd
;;

}

let blank = [' ' '\010' '\013' '\009' '\012']
let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']
let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']
let identchar = 
  ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9']
let symbolchar =
  ['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~']
let decimal_literal = ['0'-'9']+
let hex_literal = '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']+
let oct_literal = '0' ['o' 'O'] ['0'-'7']+
let bin_literal = '0' ['b' 'B'] ['0'-'1']+
let float_literal =
  ['0'-'9']+ ('.' ['0'-'9']* )? (['e' 'E'] ['+' '-']? ['0'-'9']+)?

rule token = parse
    blank +
      { token lexbuf }
  | "_"
      { UNDERSCORE }
  | "~"  { TILDE }
  | "~" lowercase identchar * ':'
      { let s = Lexing.lexeme lexbuf in
        let name = String.sub s 1 (String.length s - 2) in
        if Hashtbl.mem keyword_table name then
          raise (Error(Keyword_as_label name, Lexing.lexeme_start lexbuf,
                       Lexing.lexeme_end lexbuf));
        LABEL name }
  | "?"  { QUESTION }
  | "?" lowercase identchar * ':'
      { let s = Lexing.lexeme lexbuf in
        let name = String.sub s 1 (String.length s - 2) in
        if Hashtbl.mem keyword_table name then
          raise (Error(Keyword_as_label name, Lexing.lexeme_start lexbuf,
                       Lexing.lexeme_end lexbuf));
        OPTLABEL name }
  | lowercase identchar *
      { let s = Lexing.lexeme lexbuf in
          try
            Hashtbl.find keyword_table s
          with Not_found ->
            LIDENT s }
  | uppercase identchar *
      { UIDENT(Lexing.lexeme lexbuf) }       (* No capitalized keywords *)
  | decimal_literal | hex_literal | oct_literal | bin_literal
      { INT (int_of_string(Lexing.lexeme lexbuf)) }
  | float_literal
      { FLOAT (Lexing.lexeme lexbuf) }
  | "\""
      { reset_string_buffer();
        let string_start = Lexing.lexeme_start lexbuf in
        string_start_pos := string_start;
        string lexbuf;
        lexbuf.Lexing.lex_start_pos <-
          string_start - lexbuf.Lexing.lex_abs_pos;
        STRING (get_stored_string()) }
  | "'" [^ '\\' '\''] "'"
      { CHAR(Lexing.lexeme_char lexbuf 1) }
  | "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'"
      { CHAR(char_for_backslash (Lexing.lexeme_char lexbuf 2)) }
  | "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
      { CHAR(char_for_decimal_code lexbuf 2) }
  | "(*"
      { comment_start_pos := [Lexing.lexeme_start lexbuf];
        comment lexbuf;
        token lexbuf }
  | "(*)"
      { let loc = { Location.loc_start = Lexing.lexeme_start lexbuf;
                    Location.loc_end = Lexing.lexeme_end lexbuf - 1;
                    Location.loc_ghost = false }
        and warn = Warnings.Comment "the start of a comment"
        in
        Location.prerr_warning loc warn;
        comment_start_pos := [Lexing.lexeme_start lexbuf];
        comment lexbuf;
        token lexbuf
      }
  | "*)"
      { let loc = { Location.loc_start = Lexing.lexeme_start lexbuf;
                    Location.loc_end = Lexing.lexeme_end lexbuf;
                    Location.loc_ghost = false }
        and warn = Warnings.Comment "not the end of a comment"
        in
        Location.prerr_warning loc warn;
        lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1;
        STAR
      }
  | "#" [' ' '\t']* ['0'-'9']+ [^ '\n' '\r'] * ('\n' | '\r' | "\r\n")
      (* # linenum ...  *)
      { token lexbuf }
  | "#"  { SHARP }
  | "&"  { AMPERSAND }
  | "&&" { AMPERAMPER }
  | "`"  { BACKQUOTE }
  | "'"  { QUOTE }
  | "("  { LPAREN }
  | ")"  { RPAREN }
  | "*"  { STAR }
  | ","  { COMMA }
  | "??" { QUESTION2 }
  | "->" { MINUSGREATER }
  | "."  { DOT }
  | ".." { DOTDOT }
  | ":"  { COLON }
  | "::" { COLONCOLON }
  | ":=" { COLONEQUAL }
  | ":>" { COLONGREATER }
  | ";"  { SEMI }
  | ";;" { SEMISEMI }
  | "<"  { LESS }
  | "<-" { LESSMINUS }
  | "="  { EQUAL }
  | "["  { LBRACKET }
  | "[|" { LBRACKETBAR }
  | "[<" { LBRACKETLESS }
  | "]"  { RBRACKET }
  | "{"  { LBRACE }
  | "{<" { LBRACELESS }
  | "|"  { BAR }
  | "||" { BARBAR }
  | "|]" { BARRBRACKET }
  | ">"  { GREATER }
  | ">]" { GREATERRBRACKET }
  | "}"  { RBRACE }
  | ">}" { GREATERRBRACE }

  | "!=" { INFIXOP0 "!=" }
  | "+"  { PLUS }
  | "-"  { MINUS }
  | "-." { MINUSDOT }

  | "!" symbolchar *
            { PREFIXOP(Lexing.lexeme lexbuf) }
  | ['~' '?'] symbolchar +
            { PREFIXOP(Lexing.lexeme lexbuf) }
  | ['=' '<' '>' '|' '&' '$'] symbolchar *
            { INFIXOP0(Lexing.lexeme lexbuf) }
  | ['@' '^'] symbolchar *
            { INFIXOP1(Lexing.lexeme lexbuf) }
  | ['+' '-'] symbolchar *
            { INFIXOP2(Lexing.lexeme lexbuf) }
  | "**" symbolchar *
            { INFIXOP4(Lexing.lexeme lexbuf) }
  | ['*' '/' '%'] symbolchar *
            { INFIXOP3(Lexing.lexeme lexbuf) }
  | eof { EOF }
  | _
      { raise (Error(Illegal_character ((Lexing.lexeme lexbuf).[0]),
                     Lexing.lexeme_start lexbuf, Lexing.lexeme_end lexbuf)) }

and comment = parse
    "(*"
      { comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;
        comment lexbuf;
      }
  | "*)"
      { match !comment_start_pos with
        | [] -> assert false
        | [x] -> comment_start_pos := [];
        | _ :: l -> comment_start_pos := l;
                    comment lexbuf;
       }
  | "\""
      { reset_string_buffer();
        string_start_pos := Lexing.lexeme_start lexbuf;
        begin try string lexbuf
        with Error (Unterminated_string, _, _) ->
          let st = List.hd !comment_start_pos in
          raise (Error (Unterminated_string_in_comment, st, st + 2))
        end;
        string_buff := initial_string_buffer;
        comment lexbuf }
  | "''"
      { comment lexbuf }
  | "'" [^ '\\' '\''] "'"
      { comment lexbuf }
  | "'\\" ['\\' '\'' 'n' 't' 'b' 'r'] "'"
      { comment lexbuf }
  | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
      { comment lexbuf }
  | eof
      { let st = List.hd !comment_start_pos in
        raise (Error (Unterminated_comment, st, st + 2));
      }
  | _
      { comment lexbuf }

and string = parse
    '"'
      { () }
  | '\\' ("\010" | "\013" | "\013\010") [' ' '\009'] *
      { string lexbuf }
  | '\\' ['\\' '"' 'n' 't' 'b' 'r']
      { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
        string lexbuf }
  | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
      { store_string_char(char_for_decimal_code lexbuf 1);
         string lexbuf }
  | eof
      { raise (Error (Unterminated_string,
                      !string_start_pos, !string_start_pos+1)) }
  | _
      { store_string_char(Lexing.lexeme_char lexbuf 0);
        string lexbuf }
passage aux labels stricts git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@3696 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02 2001-09-06 01:52:32 -07:00			`(***********************************************************************)`
			`(* *)`
			`(* Objective Caml *)`
			`(* *)`
			`(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)`
			`(* *)`
			`(* Copyright 1996 Institut National de Recherche en Informatique et *)`
			`(* en Automatique. All rights reserved. This file is distributed *)`
			`(* under the terms of the Q Public License version 1.0. *)`
			`(* *)`
			`(***********************************************************************)`

			`(* $Id$ *)`

			`(* The lexer definition *)`

			`{`
			`open Misc`

			`type token =`
			`AMPERAMPER`
			`\| AMPERSAND`
			`\| AND`
			`\| AS`
			`\| ASSERT`
			`\| BACKQUOTE`
			`\| BAR`
			`\| BARBAR`
			`\| BARRBRACKET`
			`\| BEGIN`
			`\| CHAR of (char)`
			`\| CLASS`
			`\| COLON`
			`\| COLONCOLON`
			`\| COLONEQUAL`
			`\| COLONGREATER`
			`\| COMMA`
			`\| CONSTRAINT`
			`\| DO`
			`\| DONE`
			`\| DOT`
			`\| DOTDOT`
			`\| DOWNTO`
			`\| ELSE`
			`\| END`
			`\| EOF`
			`\| EQUAL`
			`\| EXCEPTION`
			`\| EXTERNAL`
			`\| FALSE`
			`\| FLOAT of (string)`
			`\| FOR`
			`\| FUN`
			`\| FUNCTION`
			`\| FUNCTOR`
			`\| GREATER`
			`\| GREATERRBRACE`
			`\| GREATERRBRACKET`
			`\| IF`
			`\| IN`
			`\| INCLUDE`
			`\| INFIXOP0 of (string)`
			`\| INFIXOP1 of (string)`
			`\| INFIXOP2 of (string)`
			`\| INFIXOP3 of (string)`
			`\| INFIXOP4 of (string)`
			`\| INHERIT`
			`\| INITIALIZER`
			`\| INT of (int)`
			`\| LABEL of (string)`
			`\| LAZY`
			`\| LBRACE`
			`\| LBRACELESS`
			`\| LBRACKET`
			`\| LBRACKETBAR`
			`\| LBRACKETLESS`
			`\| LESS`
			`\| LESSMINUS`
			`\| LET`
			`\| LIDENT of (string)`
			`\| LPAREN`
			`\| MATCH`
			`\| METHOD`
			`\| MINUS`
			`\| MINUSDOT`
			`\| MINUSGREATER`
			`\| MODULE`
			`\| MUTABLE`
			`\| NEW`
			`\| OBJECT`
			`\| OF`
			`\| OPEN`
			`\| OPTLABEL of (string)`
			`\| OR`
			`\| PARSER`
			`\| PLUS`
			`\| PREFIXOP of (string)`
			`\| PRIVATE`
			`\| QUESTION`
			`\| QUESTION2`
			`\| QUOTE`
			`\| RBRACE`
			`\| RBRACKET`
			`\| REC`
			`\| RPAREN`
			`\| SEMI`
			`\| SEMISEMI`
			`\| SHARP`
			`\| SIG`
			`\| STAR`
			`\| STRING of (string)`
			`\| STRUCT`
			`\| THEN`
			`\| TILDE`
			`\| TO`
			`\| TRUE`
			`\| TRY`
			`\| TYPE`
			`\| UIDENT of (string)`
			`\| UNDERSCORE`
			`\| VAL`
			`\| VIRTUAL`
			`\| WHEN`
			`\| WHILE`
			`\| WITH`

			`type error =`
			`\| Illegal_character of char`
			`\| Unterminated_comment`
			`\| Unterminated_string`
			`\| Unterminated_string_in_comment`
			`\| Keyword_as_label of string`
			`;;`

			`exception Error of error * int * int`

			`(* The table of keywords *)`

			`let keyword_table =`
			`create_hashtable 149 [`
			`"and", AND;`
			`"as", AS;`
			`"assert", ASSERT;`
			`"begin", BEGIN;`
			`"class", CLASS;`
			`"constraint", CONSTRAINT;`
			`"do", DO;`
			`"done", DONE;`
			`"downto", DOWNTO;`
			`"else", ELSE;`
			`"end", END;`
			`"exception", EXCEPTION;`
			`"external", EXTERNAL;`
			`"false", FALSE;`
			`"for", FOR;`
			`"fun", FUN;`
			`"function", FUNCTION;`
			`"functor", FUNCTOR;`
			`"if", IF;`
			`"in", IN;`
			`"include", INCLUDE;`
			`"inherit", INHERIT;`
			`"initializer", INITIALIZER;`
			`"lazy", LAZY;`
			`"let", LET;`
			`"match", MATCH;`
			`"method", METHOD;`
			`"module", MODULE;`
			`"mutable", MUTABLE;`
			`"new", NEW;`
			`"object", OBJECT;`
			`"of", OF;`
			`"open", OPEN;`
			`"or", OR;`
			`"parser", PARSER;`
			`"private", PRIVATE;`
			`"rec", REC;`
			`"sig", SIG;`
			`"struct", STRUCT;`
			`"then", THEN;`
			`"to", TO;`
			`"true", TRUE;`
			`"try", TRY;`
			`"type", TYPE;`
			`"val", VAL;`
			`"virtual", VIRTUAL;`
			`"when", WHEN;`
			`"while", WHILE;`
			`"with", WITH;`

			`"mod", INFIXOP3("mod");`
			`"land", INFIXOP3("land");`
			`"lor", INFIXOP3("lor");`
			`"lxor", INFIXOP3("lxor");`
			`"lsl", INFIXOP4("lsl");`
			`"lsr", INFIXOP4("lsr");`
			`"asr", INFIXOP4("asr")`
			`]`

			`(* To buffer string literals *)`

			`let initial_string_buffer = String.create 256`
			`let string_buff = ref initial_string_buffer`
			`let string_index = ref 0`

			`let reset_string_buffer () =`
			`string_buff := initial_string_buffer;`
			`string_index := 0`

			`let store_string_char c =`
			`if !string_index >= String.length (!string_buff) then begin`
			`let new_buff = String.create (String.length (!string_buff) * 2) in`
			`String.blit (!string_buff) 0 new_buff 0 (String.length (!string_buff));`
			`string_buff := new_buff`
			`end;`
			`String.unsafe_set (!string_buff) (!string_index) c;`
			`incr string_index`

			`let get_stored_string () =`
			`let s = String.sub (!string_buff) 0 (!string_index) in`
			`string_buff := initial_string_buffer;`
			`s`

			`(* To translate escape sequences *)`

			`let char_for_backslash =`
			`match Sys.os_type with`
			`\| "Unix" \| "Win32" \| "Cygwin" ->`
			`begin function`
			`\| 'n' -> '\010'`
			`\| 'r' -> '\013'`
			`\| 'b' -> '\008'`
			`\| 't' -> '\009'`
			`\| c -> c`
			`end`
			`\| "MacOS" ->`
			`begin function`
			`\| 'n' -> '\013'`
			`\| 'r' -> '\010'`
			`\| 'b' -> '\008'`
			`\| 't' -> '\009'`
			`\| c -> c`
			`end`
			`\| x -> fatal_error "Lexer: unknown system type"`

			`let char_for_decimal_code lexbuf i =`
			`let c = 100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) +`
			`10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +`
			`(Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48) in`
			`Char.chr(c land 0xFF)`

			`(* To store the position of the beginning of a string and comment *)`
			`let string_start_pos = ref 0;;`
			`let comment_start_pos = ref [];;`
			`let in_comment () = !comment_start_pos <> [];;`

			`(* Error report *)`

			`open Format`

			`let report_error ppf = function`
			`\| Illegal_character c ->`
			`fprintf ppf "Illegal character (%s)" (Char.escaped c)`
			`\| Unterminated_comment ->`
			`fprintf ppf "Comment not terminated"`
			`\| Unterminated_string ->`
			`fprintf ppf "String literal not terminated"`
			`\| Unterminated_string_in_comment ->`
			`fprintf ppf "This comment contains an unterminated string literal"`
			`\| Keyword_as_label kwd ->`
			fprintf ppf "`%s' is a keyword, it cannot be used as label name" kwd
			`;;`

			`}`

			`let blank = [' ' '\010' '\013' '\009' '\012']`
			`let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']`
			`let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']`
			`let identchar =`
			`['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9']`
			`let symbolchar =`
			`['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '\|' '~']`
			`let decimal_literal = ['0'-'9']+`
			`let hex_literal = '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']+`
			`let oct_literal = '0' ['o' 'O'] ['0'-'7']+`
			`let bin_literal = '0' ['b' 'B'] ['0'-'1']+`
			`let float_literal =`
			`['0'-'9']+ ('.' ['0'-'9']* )? (['e' 'E'] ['+' '-']? ['0'-'9']+)?`

			`rule token = parse`
			`blank +`
			`{ token lexbuf }`
			`\| "_"`
			`{ UNDERSCORE }`
			`\| "~" { TILDE }`
			`\| "~" lowercase identchar * ':'`
			`{ let s = Lexing.lexeme lexbuf in`
			`let name = String.sub s 1 (String.length s - 2) in`
			`if Hashtbl.mem keyword_table name then`
			`raise (Error(Keyword_as_label name, Lexing.lexeme_start lexbuf,`
			`Lexing.lexeme_end lexbuf));`
			`LABEL name }`
			`\| "?" { QUESTION }`
			`\| "?" lowercase identchar * ':'`
			`{ let s = Lexing.lexeme lexbuf in`
			`let name = String.sub s 1 (String.length s - 2) in`
			`if Hashtbl.mem keyword_table name then`
			`raise (Error(Keyword_as_label name, Lexing.lexeme_start lexbuf,`
			`Lexing.lexeme_end lexbuf));`
			`OPTLABEL name }`
			`\| lowercase identchar *`
			`{ let s = Lexing.lexeme lexbuf in`
			`try`
			`Hashtbl.find keyword_table s`
			`with Not_found ->`
			`LIDENT s }`
			`\| uppercase identchar *`
			`{ UIDENT(Lexing.lexeme lexbuf) } (* No capitalized keywords *)`
			`\| decimal_literal \| hex_literal \| oct_literal \| bin_literal`
			`{ INT (int_of_string(Lexing.lexeme lexbuf)) }`
			`\| float_literal`
			`{ FLOAT (Lexing.lexeme lexbuf) }`
			`\| "\""`
			`{ reset_string_buffer();`
			`let string_start = Lexing.lexeme_start lexbuf in`
			`string_start_pos := string_start;`
			`string lexbuf;`
			`lexbuf.Lexing.lex_start_pos <-`
			`string_start - lexbuf.Lexing.lex_abs_pos;`
			`STRING (get_stored_string()) }`
			`\| "'" [^ '\\' '\''] "'"`
			`{ CHAR(Lexing.lexeme_char lexbuf 1) }`
			`\| "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'"`
			`{ CHAR(char_for_backslash (Lexing.lexeme_char lexbuf 2)) }`
			`\| "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"`
			`{ CHAR(char_for_decimal_code lexbuf 2) }`
			`\| "(*"`
			`{ comment_start_pos := [Lexing.lexeme_start lexbuf];`
			`comment lexbuf;`
			`token lexbuf }`
			`\| "(*)"`
			`{ let loc = { Location.loc_start = Lexing.lexeme_start lexbuf;`
			`Location.loc_end = Lexing.lexeme_end lexbuf - 1;`
			`Location.loc_ghost = false }`
			`and warn = Warnings.Comment "the start of a comment"`
			`in`
			`Location.prerr_warning loc warn;`
			`comment_start_pos := [Lexing.lexeme_start lexbuf];`
			`comment lexbuf;`
			`token lexbuf`
			`}`
			`\| "*)"`
			`{ let loc = { Location.loc_start = Lexing.lexeme_start lexbuf;`
			`Location.loc_end = Lexing.lexeme_end lexbuf;`
			`Location.loc_ghost = false }`
			`and warn = Warnings.Comment "not the end of a comment"`
			`in`
			`Location.prerr_warning loc warn;`
			`lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1;`
			`STAR`
			`}`
			`\| "#" [' ' '\t']* ['0'-'9']+ [^ '\n' '\r'] * ('\n' \| '\r' \| "\r\n")`
			`(* # linenum ... *)`
			`{ token lexbuf }`
			`\| "#" { SHARP }`
			`\| "&" { AMPERSAND }`
			`\| "&&" { AMPERAMPER }`
			\| "`" { BACKQUOTE }
			`\| "'" { QUOTE }`
			`\| "(" { LPAREN }`
			`\| ")" { RPAREN }`
			`\| "*" { STAR }`
			`\| "," { COMMA }`
			`\| "??" { QUESTION2 }`
			`\| "->" { MINUSGREATER }`
			`\| "." { DOT }`
			`\| ".." { DOTDOT }`
			`\| ":" { COLON }`
			`\| "::" { COLONCOLON }`
			`\| ":=" { COLONEQUAL }`
			`\| ":>" { COLONGREATER }`
			`\| ";" { SEMI }`
			`\| ";;" { SEMISEMI }`
			`\| "<" { LESS }`
			`\| "<-" { LESSMINUS }`
			`\| "=" { EQUAL }`
			`\| "[" { LBRACKET }`
			`\| "[\|" { LBRACKETBAR }`
			`\| "[<" { LBRACKETLESS }`
			`\| "]" { RBRACKET }`
			`\| "{" { LBRACE }`
			`\| "{<" { LBRACELESS }`
			`\| "\|" { BAR }`
			`\| "\|\|" { BARBAR }`
			`\| "\|]" { BARRBRACKET }`
			`\| ">" { GREATER }`
			`\| ">]" { GREATERRBRACKET }`
			`\| "}" { RBRACE }`
			`\| ">}" { GREATERRBRACE }`

			`\| "!=" { INFIXOP0 "!=" }`
			`\| "+" { PLUS }`
			`\| "-" { MINUS }`
			`\| "-." { MINUSDOT }`

			`\| "!" symbolchar *`
			`{ PREFIXOP(Lexing.lexeme lexbuf) }`
			`\| ['~' '?'] symbolchar +`
			`{ PREFIXOP(Lexing.lexeme lexbuf) }`
			`\| ['=' '<' '>' '\|' '&' '$'] symbolchar *`
			`{ INFIXOP0(Lexing.lexeme lexbuf) }`
			`\| ['@' '^'] symbolchar *`
			`{ INFIXOP1(Lexing.lexeme lexbuf) }`
			`\| ['+' '-'] symbolchar *`
			`{ INFIXOP2(Lexing.lexeme lexbuf) }`
			`\| "*" symbolchar `
			`{ INFIXOP4(Lexing.lexeme lexbuf) }`
			`\| ['' '/' '%'] symbolchar `
			`{ INFIXOP3(Lexing.lexeme lexbuf) }`
			`\| eof { EOF }`
			`\| _`
			`{ raise (Error(Illegal_character ((Lexing.lexeme lexbuf).[0]),`
			`Lexing.lexeme_start lexbuf, Lexing.lexeme_end lexbuf)) }`

			`and comment = parse`
			`"(*"`
			`{ comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;`
			`comment lexbuf;`
			`}`
			`\| "*)"`
			`{ match !comment_start_pos with`
			`\| [] -> assert false`
			`\| [x] -> comment_start_pos := [];`
			`\| _ :: l -> comment_start_pos := l;`
			`comment lexbuf;`
			`}`
			`\| "\""`
			`{ reset_string_buffer();`
			`string_start_pos := Lexing.lexeme_start lexbuf;`
			`begin try string lexbuf`
			`with Error (Unterminated_string, _, _) ->`
			`let st = List.hd !comment_start_pos in`
			`raise (Error (Unterminated_string_in_comment, st, st + 2))`
			`end;`
			`string_buff := initial_string_buffer;`
			`comment lexbuf }`
			`\| "''"`
			`{ comment lexbuf }`
			`\| "'" [^ '\\' '\''] "'"`
			`{ comment lexbuf }`
			`\| "'\\" ['\\' '\'' 'n' 't' 'b' 'r'] "'"`
			`{ comment lexbuf }`
			`\| "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"`
			`{ comment lexbuf }`
			`\| eof`
			`{ let st = List.hd !comment_start_pos in`
			`raise (Error (Unterminated_comment, st, st + 2));`
			`}`
			`\| _`
			`{ comment lexbuf }`

			`and string = parse`
			`'"'`
			`{ () }`
			`\| '\\' ("\010" \| "\013" \| "\013\010") [' ' '\009'] *`
			`{ string lexbuf }`
			`\| '\\' ['\\' '"' 'n' 't' 'b' 'r']`
			`{ store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));`
			`string lexbuf }`
			`\| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']`
			`{ store_string_char(char_for_decimal_code lexbuf 1);`
			`string lexbuf }`
			`\| eof`
			`{ raise (Error (Unterminated_string,`
			`!string_start_pos, !string_start_pos+1)) }`
			`\| _`
			`{ store_string_char(Lexing.lexeme_char lexbuf 0);`
			`string lexbuf }`