ocaml/byterun/lexing.c

107 lines
3.3 KiB
C

/***********************************************************************/
/* */
/* Objective Caml */
/* */
/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
/* */
/* Copyright 1996 Institut National de Recherche en Informatique et */
/* en Automatique. All rights reserved. This file is distributed */
/* under the terms of the GNU Library General Public License. */
/* */
/***********************************************************************/
/* $Id$ */
/* The table-driven automaton for lexers generated by camllex. */
#include "fail.h"
#include "mlvalues.h"
#include "stacks.h"
struct lexer_buffer {
value refill_buff;
value lex_buffer;
value lex_buffer_len;
value lex_abs_pos;
value lex_start_pos;
value lex_curr_pos;
value lex_last_pos;
value lex_last_action;
value lex_eof_reached;
};
struct lexing_table {
value lex_base;
value lex_backtrk;
value lex_default;
value lex_trans;
value lex_check;
};
#if defined(ARCH_BIG_ENDIAN) || SIZEOF_SHORT != 2
#define Short(tbl,n) \
(*((unsigned char *)((tbl) + (n) * 2)) + \
(*((schar *)((tbl) + (n) * 2 + 1)) << 8))
#else
#define Short(tbl,n) (((short *)(tbl))[(n)])
#endif
CAMLprim value lex_engine(struct lexing_table *tbl, value start_state,
struct lexer_buffer *lexbuf)
{
int state, base, backtrk, c;
state = Int_val(start_state);
if (state >= 0) {
/* First entry */
lexbuf->lex_last_pos = lexbuf->lex_start_pos = lexbuf->lex_curr_pos;
lexbuf->lex_last_action = Val_int(-1);
} else {
/* Reentry after refill */
state = -state - 1;
}
while(1) {
/* Lookup base address or action number for current state */
base = Short(tbl->lex_base, state);
if (base < 0) return Val_int(-base-1);
/* See if it's a backtrack point */
backtrk = Short(tbl->lex_backtrk, state);
if (backtrk >= 0) {
lexbuf->lex_last_pos = lexbuf->lex_curr_pos;
lexbuf->lex_last_action = Val_int(backtrk);
}
/* See if we need a refill */
if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len){
if (lexbuf->lex_eof_reached == Val_bool (0)){
return Val_int(-state - 1);
}else{
c = 256;
}
}else{
/* Read next input char */
c = Byte_u(lexbuf->lex_buffer, Long_val(lexbuf->lex_curr_pos));
lexbuf->lex_curr_pos += 2;
}
/* Determine next state */
if (Short(tbl->lex_check, base + c) == state)
state = Short(tbl->lex_trans, base + c);
else
state = Short(tbl->lex_default, state);
/* If no transition on this char, return to last backtrack point */
if (state < 0) {
lexbuf->lex_curr_pos = lexbuf->lex_last_pos;
if (lexbuf->lex_last_action == Val_int(-1)) {
failwith("lexing: empty token");
} else {
return lexbuf->lex_last_action;
}
}else{
/* Erase the EOF condition only if the EOF pseudo-character was
consumed by the automaton (i.e. there was no backtrack above)
*/
if (c == 256) lexbuf->lex_eof_reached = Val_bool (0);
}
}
}