107 lines
3.3 KiB
C
107 lines
3.3 KiB
C
/***********************************************************************/
|
|
/* */
|
|
/* Objective Caml */
|
|
/* */
|
|
/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
|
|
/* */
|
|
/* Copyright 1996 Institut National de Recherche en Informatique et */
|
|
/* en Automatique. All rights reserved. This file is distributed */
|
|
/* under the terms of the GNU Library General Public License. */
|
|
/* */
|
|
/***********************************************************************/
|
|
|
|
/* $Id$ */
|
|
|
|
/* The table-driven automaton for lexers generated by camllex. */
|
|
|
|
#include "fail.h"
|
|
#include "mlvalues.h"
|
|
#include "stacks.h"
|
|
|
|
struct lexer_buffer {
|
|
value refill_buff;
|
|
value lex_buffer;
|
|
value lex_buffer_len;
|
|
value lex_abs_pos;
|
|
value lex_start_pos;
|
|
value lex_curr_pos;
|
|
value lex_last_pos;
|
|
value lex_last_action;
|
|
value lex_eof_reached;
|
|
};
|
|
|
|
struct lexing_table {
|
|
value lex_base;
|
|
value lex_backtrk;
|
|
value lex_default;
|
|
value lex_trans;
|
|
value lex_check;
|
|
};
|
|
|
|
#if defined(ARCH_BIG_ENDIAN) || SIZEOF_SHORT != 2
|
|
#define Short(tbl,n) \
|
|
(*((unsigned char *)((tbl) + (n) * 2)) + \
|
|
(*((schar *)((tbl) + (n) * 2 + 1)) << 8))
|
|
#else
|
|
#define Short(tbl,n) (((short *)(tbl))[(n)])
|
|
#endif
|
|
|
|
CAMLprim value lex_engine(struct lexing_table *tbl, value start_state,
|
|
struct lexer_buffer *lexbuf)
|
|
{
|
|
int state, base, backtrk, c;
|
|
|
|
state = Int_val(start_state);
|
|
if (state >= 0) {
|
|
/* First entry */
|
|
lexbuf->lex_last_pos = lexbuf->lex_start_pos = lexbuf->lex_curr_pos;
|
|
lexbuf->lex_last_action = Val_int(-1);
|
|
} else {
|
|
/* Reentry after refill */
|
|
state = -state - 1;
|
|
}
|
|
while(1) {
|
|
/* Lookup base address or action number for current state */
|
|
base = Short(tbl->lex_base, state);
|
|
if (base < 0) return Val_int(-base-1);
|
|
/* See if it's a backtrack point */
|
|
backtrk = Short(tbl->lex_backtrk, state);
|
|
if (backtrk >= 0) {
|
|
lexbuf->lex_last_pos = lexbuf->lex_curr_pos;
|
|
lexbuf->lex_last_action = Val_int(backtrk);
|
|
}
|
|
/* See if we need a refill */
|
|
if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len){
|
|
if (lexbuf->lex_eof_reached == Val_bool (0)){
|
|
return Val_int(-state - 1);
|
|
}else{
|
|
c = 256;
|
|
}
|
|
}else{
|
|
/* Read next input char */
|
|
c = Byte_u(lexbuf->lex_buffer, Long_val(lexbuf->lex_curr_pos));
|
|
lexbuf->lex_curr_pos += 2;
|
|
}
|
|
/* Determine next state */
|
|
if (Short(tbl->lex_check, base + c) == state)
|
|
state = Short(tbl->lex_trans, base + c);
|
|
else
|
|
state = Short(tbl->lex_default, state);
|
|
/* If no transition on this char, return to last backtrack point */
|
|
if (state < 0) {
|
|
lexbuf->lex_curr_pos = lexbuf->lex_last_pos;
|
|
if (lexbuf->lex_last_action == Val_int(-1)) {
|
|
failwith("lexing: empty token");
|
|
} else {
|
|
return lexbuf->lex_last_action;
|
|
}
|
|
}else{
|
|
/* Erase the EOF condition only if the EOF pseudo-character was
|
|
consumed by the automaton (i.e. there was no backtrack above)
|
|
*/
|
|
if (c == 256) lexbuf->lex_eof_reached = Val_bool (0);
|
|
}
|
|
}
|
|
}
|
|
|