ocaml/testsuite/tools/lexcmm.mll

259 lines
7.1 KiB
OCaml

(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
{
open Parsecmm
type error =
Illegal_character
| Unterminated_comment
| Unterminated_string
exception Error of error
(* For nested comments *)
let comment_depth = ref 0
(* The table of keywords *)
let keyword_table =
Misc.create_hashtable 149 [
"absf", ABSF;
"addr", ADDR;
"align", ALIGN;
"alloc", ALLOC;
"and", AND;
"app", APPLY;
"assign", ASSIGN;
"byte", BYTE;
"case", CASE;
"catch", CATCH;
"checkbound", CHECKBOUND;
"data", DATA;
"exit", EXIT;
"extcall", EXTCALL;
"float", FLOAT;
"float32", FLOAT32;
"float64", FLOAT64;
"floatofint", FLOATOFINT;
"function", FUNCTION;
"global", GLOBAL;
"half", HALF;
"if", IF;
"int", INT;
"int32", INT32;
"intoffloat", INTOFFLOAT;
"string", KSTRING;
"let", LET;
"letmut", LETMUT;
"load", LOAD;
"mod", MODI;
"mulh", MULH;
"or", OR;
"proj", PROJ;
"raise", RAISE Lambda.Raise_regular;
"reraise", RAISE Lambda.Raise_reraise;
"raise_notrace", RAISE Lambda.Raise_notrace;
"seq", SEQ;
"signed", SIGNED;
"skip", SKIP;
"store", STORE;
"switch", SWITCH;
"try", TRY;
"unit", UNIT;
"unsigned", UNSIGNED;
"val", VAL;
"while", WHILE;
"with", WITH;
"xor", XOR;
"addraref", ADDRAREF;
"intaref", INTAREF;
"floataref", FLOATAREF;
"addraset", ADDRASET;
"intaset", INTASET;
"floataset", FLOATASET
]
(* To buffer string literals *)
let initial_string_buffer = Bytes.create 256
let string_buff = ref initial_string_buffer
let string_index = ref 0
let reset_string_buffer () =
string_buff := initial_string_buffer;
string_index := 0
let store_string_char c =
if !string_index >= Bytes.length (!string_buff) then begin
let new_buff = Bytes.create (Bytes.length (!string_buff) * 2) in
Bytes.blit (!string_buff) 0 new_buff 0 (Bytes.length (!string_buff));
string_buff := new_buff
end;
Bytes.unsafe_set (!string_buff) (!string_index) c;
incr string_index
let get_stored_string () =
let s = Bytes.sub_string (!string_buff) 0 (!string_index) in
string_buff := initial_string_buffer;
s
(* To translate escape sequences *)
let char_for_backslash = function
'n' -> '\010'
| 'r' -> '\013'
| 'b' -> '\008'
| 't' -> '\009'
| c -> c
let char_for_decimal_code lexbuf i =
Char.chr(100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) +
10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +
(Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48))
(* Error report *)
let report_error lexbuf msg =
prerr_string "Lexical error around character ";
prerr_int (Lexing.lexeme_start lexbuf);
match msg with
Illegal_character ->
prerr_string ": illegal character"
| Unterminated_comment ->
prerr_string ": unterminated comment"
| Unterminated_string ->
prerr_string ": unterminated string"
}
let newline = ('\013'* '\010')
rule token = parse
newline
{ Lexing.new_line lexbuf; token lexbuf }
| [' ' '\009' '\012'] +
{ token lexbuf }
| "+a" { ADDA }
| "+v" { ADDV }
| "+f" { ADDF }
| "+" { ADDI }
| ">>s" { ASR }
| ":" { COLON }
| "/f" { DIVF }
| "/" { DIVI }
| eof { EOF }
| "==a" { EQA }
| "==f" { EQF }
| "==" { EQI }
| ">=a" { GEA }
| ">=f" { GEF }
| ">=" { GEI }
| ">a" { GTA }
| ">f" { GTF }
| ">" { GTI }
| "[" { LBRACKET }
| "<=a" { LEA }
| "<=f" { LEF }
| "<=" { LEI }
| "(" { LPAREN }
| "<<" { LSL }
| ">>u" { LSR }
| "<a" { LTA }
| "<f" { LTF }
| "<" { LTI }
| "*f" { MULF }
| "*" { STAR }
| "!=a" { NEA }
| "!=f" { NEF }
| "!=" { NEI }
| "!>=f" { NGEF }
| "!>f" { NGTF }
| "!<=f" { NLEF }
| "!<f" { NLTF }
| "]" { RBRACKET }
| ")" { RPAREN }
| "-f" { SUBF }
| "-" { SUBI }
| '-'? (['0'-'9']+ | "0x" ['0'-'9' 'a'-'f' 'A'-'F']+
| "0o" ['0'-'7']+ | "0b" ['0'-'1']+)
{ INTCONST(int_of_string(Lexing.lexeme lexbuf)) }
| '-'? ['0'-'9']+ ('.' ['0'-'9']*)? (['e' 'E'] ['+' '-']? ['0'-'9']+)?
{ FLOATCONST(Lexing.lexeme lexbuf) }
| ['A'-'Z' 'a'-'z' '\223'-'\246' '\248'-'\255' ]
(['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255'
'\'' '0'-'9' ]) * '/'? (['0'-'9'] *)
{ let s = Lexing.lexeme lexbuf in
try
Hashtbl.find keyword_table s
with Not_found ->
IDENT s }
| "\""
{ reset_string_buffer();
string lexbuf;
STRING (get_stored_string()) }
| "(*"
{ comment_depth := 1;
comment lexbuf;
token lexbuf }
| '{' ['A' - 'Z' 'a'-'z' '/' ',' '.' '-' '_' ' ''0'-'9']+
':' [ '0'-'9' ]+ ',' ['0'-'9' ]+ '-' ['0'-'9' ]+ '}'
{
let loc_s = Lexing.lexeme lexbuf in
let pos_fname, pos_lnum, start, end_ =
Scanf.sscanf loc_s "{%s@:%i,%i-%i}" (fun file line start end_ ->
(file, line, start, end_))
in
let loc_start =
Lexing.{ pos_fname; pos_lnum; pos_bol = 0; pos_cnum = start }
in
let loc_end =
Lexing.{ pos_fname; pos_lnum; pos_bol = 0; pos_cnum = end_ }
in
let location = Location.{ loc_start; loc_end; loc_ghost = false } in
LOCATION location }
| _ { raise(Error(Illegal_character)) }
and comment = parse
"(*"
{ comment_depth := succ !comment_depth; comment lexbuf }
| "*)"
{ comment_depth := pred !comment_depth;
if !comment_depth > 0 then comment lexbuf }
| eof
{ raise (Error(Unterminated_comment)) }
| newline
{ Lexing.new_line lexbuf; comment lexbuf }
| _
{ comment lexbuf }
and string = parse
'"'
{ () }
| '\\' [' ' '\010' '\013' '\009' '\026' '\012'] +
{ string lexbuf }
| '\\' ['\\' '"' 'n' 't' 'b' 'r']
{ store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
string lexbuf }
| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
{ store_string_char(char_for_decimal_code lexbuf 1);
string lexbuf }
| eof
{ raise (Error(Unterminated_string)) }
| _
{ store_string_char(Lexing.lexeme_char lexbuf 0);
string lexbuf }