172 lines
4.9 KiB
OCaml
172 lines
4.9 KiB
OCaml
(***********************************************************************)
|
|
(* *)
|
|
(* Objective Caml *)
|
|
(* *)
|
|
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
|
|
(* *)
|
|
(* Copyright 1996 Institut National de Recherche en Informatique et *)
|
|
(* en Automatique. All rights reserved. This file is distributed *)
|
|
(* under the terms of the GNU Library General Public License. *)
|
|
(* *)
|
|
(***********************************************************************)
|
|
|
|
(* $Id$ *)
|
|
|
|
type token =
|
|
Kwd of string
|
|
| Ident of string
|
|
| Int of int
|
|
| Float of float
|
|
| String of string
|
|
| Char of char
|
|
|
|
|
|
(* The string buffering machinery *)
|
|
|
|
let initial_buffer = String.create 32
|
|
|
|
let buffer = ref initial_buffer
|
|
let bufpos = ref 0
|
|
|
|
let reset_buffer () =
|
|
buffer := initial_buffer;
|
|
bufpos := 0
|
|
|
|
let store c =
|
|
if !bufpos >= String.length !buffer then begin
|
|
let newbuffer = String.create (2 * !bufpos) in
|
|
String.blit !buffer 0 newbuffer 0 !bufpos;
|
|
buffer := newbuffer
|
|
end;
|
|
String.set !buffer !bufpos c;
|
|
incr bufpos
|
|
|
|
let get_string () =
|
|
let s = String.sub !buffer 0 !bufpos in buffer := initial_buffer; s
|
|
|
|
(* The lexer *)
|
|
|
|
let make_lexer keywords =
|
|
|
|
let kwd_table = Hashtbl.create 17 in
|
|
List.iter (fun s -> Hashtbl.add kwd_table s (Kwd s)) keywords;
|
|
|
|
let ident_or_keyword id =
|
|
try Hashtbl.find kwd_table id with Not_found -> Ident id
|
|
|
|
and keyword_or_error c =
|
|
let s = String.make 1 c in
|
|
try Hashtbl.find kwd_table s
|
|
with Not_found -> raise(Stream.Error("Illegal character " ^ s)) in
|
|
|
|
let rec next_token = parser
|
|
[< ' ' '|'\010'|'\013'|'\009'|'\026'|'\012'; s >] ->
|
|
next_token s
|
|
| [< ' 'A'..'Z'|'a'..'z'|'\192'..'\255' as c; s>] ->
|
|
reset_buffer(); store c; ident s
|
|
| [< ' '!'|'%'|'&'|'$'|'#'|'+'|'/'|':'|'<'|'='|'>'|'?'|'@'|'\\'|
|
|
'~'|'^'|'|'|'*' as c; s >] ->
|
|
reset_buffer(); store c; ident2 s
|
|
| [< ' '0'..'9' as c; s>] ->
|
|
reset_buffer(); store c; number s
|
|
| [< ' '\''; c = char; ' '\'' >] ->
|
|
Some(Char c)
|
|
| [< ' '"' (* '"' *); s >] ->
|
|
reset_buffer(); Some(String(string s))
|
|
| [< ' '-'; s >] ->
|
|
neg_number s
|
|
| [< ' '('; s >] ->
|
|
maybe_comment s
|
|
| [< ' c >] ->
|
|
Some(keyword_or_error c)
|
|
| [< >] ->
|
|
None
|
|
|
|
and ident = parser
|
|
[< ' 'A'..'Z'|'a'..'z'|'\192'..'\255'|'0'..'9'|'_'|'\'' as c; s>] ->
|
|
store c; ident s
|
|
| [< >] ->
|
|
Some(ident_or_keyword(get_string()))
|
|
|
|
and ident2 = parser
|
|
[< ' '!'|'%'|'&'|'$'|'#'|'+'|'-'|'/'|':'|'<'|'='|'>'|'?'|'@'|'\\'|
|
|
'~'|'^'|'|'|'*' as c; s >] ->
|
|
store c; ident2 s
|
|
| [< >] ->
|
|
Some(ident_or_keyword(get_string()))
|
|
|
|
and neg_number = parser
|
|
[< ' '0'..'9' as c; s >] ->
|
|
reset_buffer(); store '-'; store c; number s
|
|
| [< s >] ->
|
|
reset_buffer(); store '-'; ident2 s
|
|
|
|
and number = parser
|
|
[< ' '0'..'9' as c; s >] ->
|
|
store c; number s
|
|
| [< ' '.'; s >] ->
|
|
store '.'; decimal_part s
|
|
| [< ' 'e'|'E'; s >] ->
|
|
store 'E'; exponent_part s
|
|
| [< >] ->
|
|
Some(Int(int_of_string(get_string())))
|
|
|
|
and decimal_part = parser
|
|
[< ' '0'..'9' as c; s >] ->
|
|
store c; decimal_part s
|
|
| [< ' 'e'|'E'; s >] ->
|
|
store 'E'; exponent_part s
|
|
| [< >] ->
|
|
Some(Float(float_of_string(get_string())))
|
|
|
|
and exponent_part = parser
|
|
[< ' '+'|'-' as c; s >] ->
|
|
store c; end_exponent_part s
|
|
| [< s >] ->
|
|
end_exponent_part s
|
|
|
|
and end_exponent_part = parser
|
|
[< ' '0'..'9' as c; s >] ->
|
|
store c; end_exponent_part s
|
|
| [< >] ->
|
|
Some(Float(float_of_string(get_string())))
|
|
|
|
and string = parser
|
|
[< ' '"' (* '"' *) >] -> get_string()
|
|
| [< ' '\\'; c = escape; s >] -> store c; string s
|
|
| [< ' c; s >] -> store c; string s
|
|
|
|
and char = parser
|
|
[< ' '\\'; c = escape >] -> c
|
|
| [< ' c >] -> c
|
|
|
|
and escape = parser
|
|
[< ' 'n' >] -> '\n'
|
|
| [< ' 'r' >] -> '\r'
|
|
| [< ' 't' >] -> '\t'
|
|
| [< ' '0'..'9' as c1; ' '0'..'9' as c2; ' '0'..'9' as c3 >] ->
|
|
Char.chr((Char.code c1 - 48) * 100 +
|
|
(Char.code c2 - 48) * 10 +
|
|
(Char.code c3 - 48))
|
|
| [< ' c >] -> c
|
|
|
|
and maybe_comment = parser
|
|
[< ' '*'; s >] -> comment s; next_token s
|
|
| [< >] -> Some(keyword_or_error '(')
|
|
|
|
and comment = parser
|
|
[< ' '('; s >] -> maybe_nested_comment s
|
|
| [< ' '*'; s >] -> maybe_end_comment s
|
|
| [< ' c; s >] -> comment s
|
|
|
|
and maybe_nested_comment = parser
|
|
[< ' '*'; s >] -> comment s; comment s
|
|
| [< ' c; s >] -> comment s
|
|
|
|
and maybe_end_comment = parser
|
|
[< ' ')' >] -> ()
|
|
| [< ' '*'; s >] -> maybe_end_comment s
|
|
| [< ' c; s >] -> comment s
|
|
|
|
in fun input -> Stream.from (fun count -> next_token input)
|