231 lines
6.7 KiB
OCaml
231 lines
6.7 KiB
OCaml
(***********************************************************************)
|
|
(* *)
|
|
(* Objective Caml *)
|
|
(* *)
|
|
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
(* *)
|
|
(* Copyright 1996 Institut National de Recherche en Informatique et *)
|
|
(* en Automatique. All rights reserved. This file is distributed *)
|
|
(* under the terms of the Q Public License version 1.0. *)
|
|
(* *)
|
|
(***********************************************************************)
|
|
|
|
(* $Id$ *)
|
|
|
|
(* The lexical analyzer for lexer definitions. Bootstrapped! *)
|
|
|
|
{
|
|
open Syntax
|
|
open Parser
|
|
|
|
(* Auxiliaries for the lexical analyzer *)
|
|
|
|
let brace_depth = ref 0
|
|
and comment_depth = ref 0
|
|
|
|
exception Lexical_error of string * int * int
|
|
|
|
let initial_string_buffer = String.create 256
|
|
let string_buff = ref initial_string_buffer
|
|
let string_index = ref 0
|
|
|
|
let reset_string_buffer () =
|
|
string_buff := initial_string_buffer;
|
|
string_index := 0
|
|
|
|
let store_string_char c =
|
|
if !string_index >= String.length !string_buff then begin
|
|
let new_buff = String.create (String.length !string_buff * 2) in
|
|
String.blit !string_buff 0 new_buff 0 (String.length !string_buff);
|
|
string_buff := new_buff
|
|
end;
|
|
!string_buff.[!string_index] <- c;
|
|
incr string_index
|
|
|
|
let get_stored_string () =
|
|
String.sub !string_buff 0 !string_index
|
|
|
|
let char_for_backslash = function
|
|
'n' -> '\n'
|
|
| 't' -> '\t'
|
|
| 'b' -> '\b'
|
|
| 'r' -> '\r'
|
|
| c -> c
|
|
|
|
let char_for_decimal_code lexbuf i =
|
|
Char.chr(100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) +
|
|
10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +
|
|
(Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48))
|
|
|
|
let line_num = ref 1
|
|
let line_start_pos = ref 0
|
|
|
|
let handle_lexical_error fn lexbuf =
|
|
let line = !line_num
|
|
and column = Lexing.lexeme_start lexbuf - !line_start_pos in
|
|
try
|
|
fn lexbuf
|
|
with Lexical_error(msg, _, _) ->
|
|
raise(Lexical_error(msg, line, column))
|
|
|
|
let warning lexbuf msg =
|
|
Printf.eprintf "ocamllex warning:\nFile \"%s\", line %d, character %d: %s.\n"
|
|
Sys.argv.(1) !line_num
|
|
(Lexing.lexeme_start lexbuf - !line_start_pos) msg;
|
|
flush stderr;
|
|
;;
|
|
|
|
}
|
|
|
|
let identstart =
|
|
['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255']
|
|
let identbody =
|
|
['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9']
|
|
let backslash_escapes =
|
|
['\\' '"' '\'' 'n' 't' 'b' 'r']
|
|
|
|
rule main = parse
|
|
[' ' '\013' '\009' '\012' ] +
|
|
{ main lexbuf }
|
|
| '\010'
|
|
{ line_start_pos := Lexing.lexeme_end lexbuf;
|
|
incr line_num;
|
|
main lexbuf }
|
|
| "(*"
|
|
{ comment_depth := 1;
|
|
handle_lexical_error comment lexbuf;
|
|
main lexbuf }
|
|
| '_' { Tunderscore }
|
|
| identstart identbody *
|
|
{ match Lexing.lexeme lexbuf with
|
|
"rule" -> Trule
|
|
| "parse" -> Tparse
|
|
| "and" -> Tand
|
|
| "eof" -> Teof
|
|
| "let" -> Tlet
|
|
| s -> Tident s }
|
|
| '"'
|
|
{ reset_string_buffer();
|
|
handle_lexical_error string lexbuf;
|
|
Tstring(get_stored_string()) }
|
|
| "'" [^ '\\'] "'"
|
|
{ Tchar(Char.code(Lexing.lexeme_char lexbuf 1)) }
|
|
| "'" '\\' backslash_escapes "'"
|
|
{ Tchar(Char.code(char_for_backslash (Lexing.lexeme_char lexbuf 2))) }
|
|
| "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
|
|
{ Tchar(Char.code(char_for_decimal_code lexbuf 2)) }
|
|
| '{'
|
|
{ let n1 = Lexing.lexeme_end lexbuf
|
|
and l1 = !line_num
|
|
and s1 = !line_start_pos in
|
|
brace_depth := 1;
|
|
let n2 = handle_lexical_error action lexbuf in
|
|
Taction({start_pos = n1; end_pos = n2;
|
|
start_line = l1; start_col = n1 - s1}) }
|
|
| '=' { Tequal }
|
|
| '|' { Tor }
|
|
| '[' { Tlbracket }
|
|
| ']' { Trbracket }
|
|
| '*' { Tstar }
|
|
| '?' { Tmaybe }
|
|
| '+' { Tplus }
|
|
| '(' { Tlparen }
|
|
| ')' { Trparen }
|
|
| '^' { Tcaret }
|
|
| '-' { Tdash }
|
|
| eof { Tend }
|
|
| _
|
|
{ raise(Lexical_error
|
|
("illegal character " ^ String.escaped(Lexing.lexeme lexbuf),
|
|
!line_num, Lexing.lexeme_start lexbuf - !line_start_pos)) }
|
|
|
|
and action = parse
|
|
'{'
|
|
{ incr brace_depth;
|
|
action lexbuf }
|
|
| '}'
|
|
{ decr brace_depth;
|
|
if !brace_depth = 0 then Lexing.lexeme_start lexbuf else action lexbuf }
|
|
| '"'
|
|
{ reset_string_buffer();
|
|
string lexbuf;
|
|
reset_string_buffer();
|
|
action lexbuf }
|
|
| "'" [^ '\\' '\''] "'"
|
|
{ action lexbuf }
|
|
| "'" '\\' backslash_escapes "'"
|
|
{ action lexbuf }
|
|
| "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
|
|
{ action lexbuf }
|
|
| "(*"
|
|
{ comment_depth := 1;
|
|
comment lexbuf;
|
|
action lexbuf }
|
|
| eof
|
|
{ raise (Lexical_error("unterminated action", 0, 0)) }
|
|
| '\010'
|
|
{ line_start_pos := Lexing.lexeme_end lexbuf;
|
|
incr line_num;
|
|
action lexbuf }
|
|
| _
|
|
{ action lexbuf }
|
|
|
|
and string = parse
|
|
'"'
|
|
{ () }
|
|
| '\\' [' ' '\013' '\009' '\012'] * '\010' [' ' '\013' '\009' '\012'] *
|
|
{ line_start_pos := Lexing.lexeme_end lexbuf;
|
|
incr line_num;
|
|
string lexbuf }
|
|
| '\\' backslash_escapes
|
|
{ store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
|
|
string lexbuf }
|
|
| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
|
|
{ store_string_char(char_for_decimal_code lexbuf 1);
|
|
string lexbuf }
|
|
| eof
|
|
{ raise(Lexical_error("unterminated string", 0, 0)) }
|
|
| '\010'
|
|
{ store_string_char '\010';
|
|
line_start_pos := Lexing.lexeme_end lexbuf;
|
|
incr line_num;
|
|
string lexbuf }
|
|
| '\\' _
|
|
{ warning lexbuf
|
|
(Printf.sprintf "illegal backslash escape in string: `\\%c'"
|
|
(Lexing.lexeme_char lexbuf 1));
|
|
store_string_char(Lexing.lexeme_char lexbuf 0);
|
|
store_string_char(Lexing.lexeme_char lexbuf 1);
|
|
string lexbuf }
|
|
| _
|
|
{ store_string_char(Lexing.lexeme_char lexbuf 0);
|
|
string lexbuf }
|
|
|
|
and comment = parse
|
|
"(*"
|
|
{ incr comment_depth; comment lexbuf }
|
|
| "*)"
|
|
{ decr comment_depth;
|
|
if !comment_depth = 0 then () else comment lexbuf }
|
|
| '"'
|
|
{ reset_string_buffer();
|
|
string lexbuf;
|
|
reset_string_buffer();
|
|
comment lexbuf }
|
|
| "''"
|
|
{ comment lexbuf }
|
|
| "'" [^ '\\' '\''] "'"
|
|
{ comment lexbuf }
|
|
| "'\\" backslash_escapes "'"
|
|
{ comment lexbuf }
|
|
| "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
|
|
{ comment lexbuf }
|
|
| eof
|
|
{ raise(Lexical_error("unterminated comment", 0, 0)) }
|
|
| '\010'
|
|
{ line_start_pos := Lexing.lexeme_end lexbuf;
|
|
incr line_num;
|
|
comment lexbuf }
|
|
| _
|
|
{ comment lexbuf }
|