(***********************************************************************) (* *) (* Objective Caml *) (* *) (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) (* *) (* Copyright 1996 Institut National de Recherche en Informatique et *) (* Automatique. Distributed only by permission. *) (* *) (***********************************************************************) (* $Id$ *) { open Parsecmm type error = Illegal_character | Unterminated_comment | Unterminated_string exception Error of error (* For nested comments *) let comment_depth = ref 0 (* The table of keywords *) let keyword_table = Misc.create_hashtable 149 [ "absf", ABSF; "addr", ADDR; "align", ALIGN; "alloc", ALLOC; "and", AND; "app", APPLY; "assign", ASSIGN; "byte", BYTE; "case", CASE; "catch", CATCH; "checkbound", CHECKBOUND; "exit", EXIT; "extcall", EXTCALL; "float", FLOAT; "floatofint", FLOATOFINT; "function", FUNCTION; "half", HALF; "if", IF; "int", INT; "intoffloat", INTOFFLOAT; "string", KSTRING; "let", LET; "load", LOAD; "mod", MODI; "or", OR; "proj", PROJ; "raise", RAISE; "seq", SEQ; "signed", SIGNED; "skip", SKIP; "store", STORE; "switch", SWITCH; "try", TRY; "unit", UNIT; "unsigned", UNSIGNED; "while", WHILE; "with", WITH; "xor", XOR; "addraref", ADDRAREF; "intaref", INTAREF; "floataref", FLOATAREF; "addraset", ADDRASET; "intaset", INTASET; "floataset", FLOATASET ] (* To buffer string literals *) let initial_string_buffer = String.create 256 let string_buff = ref initial_string_buffer let string_index = ref 0 let reset_string_buffer () = string_buff := initial_string_buffer; string_index := 0 let store_string_char c = if !string_index >= String.length (!string_buff) then begin let new_buff = String.create (String.length (!string_buff) * 2) in String.blit (!string_buff) 0 new_buff 0 (String.length (!string_buff)); string_buff := new_buff end; String.unsafe_set (!string_buff) (!string_index) c; incr string_index let get_stored_string () = let s = String.sub (!string_buff) 0 (!string_index) in string_buff := initial_string_buffer; s (* To translate escape sequences *) let char_for_backslash = function 'n' -> '\010' | 'r' -> '\013' | 'b' -> '\008' | 't' -> '\009' | c -> c let char_for_decimal_code lexbuf i = Char.chr(100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) + 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) + (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48)) (* Error report *) let report_error lexbuf msg = prerr_string "Lexical error around character "; prerr_int (Lexing.lexeme_start lexbuf); match msg with Illegal_character -> prerr_string ": illegal character" | Unterminated_comment -> prerr_string ": unterminated comment" | Unterminated_string -> prerr_string ": unterminated string" } rule token = parse [' ' '\010' '\013' '\009' '\012'] + { token lexbuf } | "+a" { ADDA } | "+f" { ADDF } | "+" { ADDI } | ">>s" { ASR } | ":" { COLON } | "/f" { DIVF } | "/" { DIVI } | eof { EOF } | "==a" { EQA } | "==f" { EQF } | "==" { EQI } | ">=a" { GEA } | ">=f" { GEF } | ">=" { GEI } | ">a" { GTA } | ">f" { GTF } | ">" { GTI } | "[" { LBRACKET } | "<=a" { LEA } | "<=f" { LEF } | "<=" { LEI } | "(" { LPAREN } | "<<" { LSL } | ">>u" { LSR } | " IDENT s } | "\"" { reset_string_buffer(); string lexbuf; STRING (get_stored_string()) } | "(*" { comment_depth := 1; comment lexbuf; token lexbuf } | _ { raise(Error(Illegal_character)) } and comment = parse "(*" { comment_depth := succ !comment_depth; comment lexbuf } | "*)" { comment_depth := pred !comment_depth; if !comment_depth > 0 then comment lexbuf } | eof { raise (Error(Unterminated_comment)) } | _ { comment lexbuf } and string = parse '"' { () } | '\\' [' ' '\010' '\013' '\009' '\026' '\012'] + { string lexbuf } | '\\' ['\\' '"' 'n' 't' 'b' 'r'] { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1)); string lexbuf } | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] { store_string_char(char_for_decimal_code lexbuf 1); string lexbuf } | eof { raise (Error(Unterminated_string)) } | _ { store_string_char(Lexing.lexeme_char lexbuf 0); string lexbuf }