803 lines
29 KiB
Lua

--
-- Project: LuaIDL
-- Version: 0.8.9b
-- Author: Ricardo Cosme <rcosme@tecgraf.puc-rio.br>
-- Filename: lex.lua
--
local type = type
local pairs = pairs
local tonumber = tonumber
local error = error
local ipairs = ipairs
local table = table
local string = require "string"
module 'luaidl.lex'
tab_tokens = { TK_ID = 257, TK_ABSTRACT = 258, TK_ANY = 259, TK_ATTRIBUTE = 260,
TK_BOOLEAN = 261, TK_CASE = 262, TK_CHAR = 263, TK_COMPONENT = 264,
TK_CONST = 265, TK_CONSUMES = 266, TK_CONTEXT = 267, TK_CUSTOM = 268,
TK_DEFAULT = 269, TK_DOUBLE = 270, TK_EXCEPTION = 271, TK_EMITS = 272,
TK_ENUM = 273, TK_EVENTTYPE = 274, TK_FACTORY = 275, TK_FALSE = 276,
TK_FINDER = 277, TK_FIXED = 278, TK_FLOAT = 279, TK_GETRAISES = 280,
TK_HOME = 281, TK_IMPORT = 282, TK_IN = 283, TK_INOUT = 284,
TK_INTERFACE = 285, TK_LOCAL = 286, TK_LONG = 287, TK_MODULE = 288,
TK_MULTIPLE = 289, TK_NATIVE = 290, TK_OBJECT = 291, TK_OCTET = 292,
TK_ONEWAY = 293, TK_OUT = 294, TK_PRIMARYKEY = 295, TK_PRIVATE = 296,
TK_PROVIDES = 297, TK_PUBLIC = 298, TK_PUBLISHES = 299, TK_RAISES = 300,
TK_READONLY = 301, TK_SETRAISES = 302, TK_SEQUENCE = 303, TK_SHORT = 304,
TK_STRING = 305, TK_STRUCT = 306, TK_SUPPORTS = 307, TK_SWITCH = 308,
TK_TRUE = 309, TK_TRUNCATABLE = 310, TK_TYPEDEF = 311, TK_TYPEID = 312,
TK_TYPEPREFIX = 313, TK_UNSIGNED = 314, TK_UNION = 315, TK_USES = 316,
TK_VALUEBASE = 317, TK_VALUETYPE = 318, TK_VOID = 319, TK_WCHAR = 320,
TK_WSTRING = 321, TK_INTEGER_LITERAL = 322, TK_FLOAT_LITERAL = 323,
TK_CHAR_LITERAL = 324, TK_WCHAR_LITERAL = 325, TK_STRING_LITERAL = 326,
TK_WSTRING_LITERAL = 327, TK_FIXED_LITERAL = 328, TK_PRAGMA_PREFIX = 329,
TK_PRAGMA_ID = 330, TK_MANAGES = 332,
}
local tab_keywords = {
['abstract'] = { token = tab_tokens.TK_ABSTRACT },
['any'] = { token = tab_tokens.TK_ANY },
['attribute'] = { token = tab_tokens.TK_ATTRIBUTE },
['boolean'] = { token = tab_tokens.TK_BOOLEAN },
['case'] = { token = tab_tokens.TK_CASE },
['char'] = { token = tab_tokens.TK_CHAR },
['component'] = { token = tab_tokens.TK_COMPONENT },
['const'] = { token = tab_tokens.TK_CONST },
['consumes'] = { token = tab_tokens.TK_CONSUMES },
['context'] = { token = tab_tokens.TK_CONTEXT },
['custom'] = { token = tab_tokens.TK_CUSTOM },
['default'] = { token = tab_tokens.TK_DEFAULT },
['double'] = { token = tab_tokens.TK_DOUBLE },
['exception'] = { token = tab_tokens.TK_EXCEPTION },
['emits'] = { token = tab_tokens.TK_EMITS },
['enum'] = { token = tab_tokens.TK_ENUM },
['eventtype'] = { token = tab_tokens.TK_EVENTTYPE },
['factory'] = { token = tab_tokens.TK_FACTORY },
['FALSE'] = { token = tab_tokens.TK_FALSE },
['finder'] = { token = tab_tokens.TK_FINDER },
['fixed'] = { token = tab_tokens.TK_FIXED },
['float'] = { token = tab_tokens.TK_FLOAT },
['getraises'] = { token = tab_tokens.TK_GETRAISES },
['home'] = { token = tab_tokens.TK_HOME },
['import'] = { token = tab_tokens.TK_IMPORT },
['in'] = { token = tab_tokens.TK_IN },
['inout'] = { token = tab_tokens.TK_INOUT },
['interface'] = { token = tab_tokens.TK_INTERFACE },
['local'] = { token = tab_tokens.TK_LOCAL },
['long'] = { token = tab_tokens.TK_LONG },
['manages'] = { token = tab_tokens.TK_MANAGES },
['module'] = { token = tab_tokens.TK_MODULE },
['multiple'] = { token = tab_tokens.TK_MULTIPLE },
['native'] = { token = tab_tokens.TK_NATIVE },
['Object'] = { token = tab_tokens.TK_OBJECT },
['octet'] = { token = tab_tokens.TK_OCTET },
['oneway'] = { token = tab_tokens.TK_ONEWAY },
['out'] = { token = tab_tokens.TK_OUT },
['primarykey'] = { token = tab_tokens.TK_PRIMARYKEY },
['private'] = { token = tab_tokens.TK_PRIVATE },
['provides'] = { token = tab_tokens.TK_PROVIDES },
['public'] = { token = tab_tokens.TK_PUBLIC },
['publishes'] = { token = tab_tokens.TK_PUBLISHES },
['raises'] = { token = tab_tokens.TK_RAISES },
['readonly'] = { token = tab_tokens.TK_READONLY },
['setraises'] = { token = tab_tokens.TK_SETRAISES },
['sequence'] = { token = tab_tokens.TK_SEQUENCE },
['short'] = { token = tab_tokens.TK_SHORT },
['string'] = { token = tab_tokens.TK_STRING },
['struct'] = { token = tab_tokens.TK_STRUCT },
['supports'] = { token = tab_tokens.TK_SUPPORTS },
['switch'] = { token = tab_tokens.TK_SWITCH },
['TRUE'] = { token = tab_tokens.TK_TRUE },
['truncatable'] = { token = tab_tokens.TK_TRUNCATABLE },
['typedef'] = { token = tab_tokens.TK_TYPEDEF },
['typeid'] = { token = tab_tokens.TK_TYPEID },
['typeprefix'] = { token = tab_tokens.TK_TYPEPREFIX },
['unsigned'] = { token = tab_tokens.TK_UNSIGNED },
['union'] = { token = tab_tokens.TK_UNION },
['uses'] = { token = tab_tokens.TK_USES },
['ValueBase'] = { token = tab_tokens.TK_VALUEBASE },
['valuetype'] = { token = tab_tokens.TK_VALUETYPE },
['void'] = { token = tab_tokens.TK_VOID },
['wchar'] = { token = tab_tokens.TK_WCHAR },
['wstring'] = { token = tab_tokens.TK_WSTRING },
}
local tab_symbols
PRAGMA_VERSION = '1.0'
ERROR_MSG_TYPE = '[lexical error]:_LINE:_ERRORMSG.'
local token
local lookahead
local i
local stridllen
local linemarkDeclared
local function is_blank( char )
if ( lookahead == ' ' or lookahead == '\f' or lookahead == '\r' or
lookahead == '\t' or lookahead == '\v' ) then
return true
else
return false
end
end
local function is_digit( char )
return string.find(char , '%d')
end
local function is_hex_digit( char )
return string.find(char , '%x')
end
local function is_octal_digit( char )
return string.find(char , '[0-7]')
end
local function is_alpha( char )
return string.find(char , '%w')
end
local function is_near_value( lookahead )
if string.find(lookahead , '%w') then
return ' near \''..lookahead..'\''
else
return ''
end
end
local function insert_symbols( lexeme )
tab_symbols[lexeme] = {token = tab_tokens.TK_ID ,descriptions = {}}
end
local function search_symbols( lexeme , tab_symbols )
if tab_symbols[lexeme] then
return lexeme ,tab_symbols[lexeme].token
else
for key, value in pairs(tab_symbols) do
if string.upper(lexeme) == string.upper(key) then
return key ,'collide'
end
end
end
return nil ,nil
end
local function search_symbols_wocollide( lexeme , tab_symbols )
if tab_symbols[lexeme] then
return lexeme ,tab_symbols[lexeme].token
else
for key, value in pairs(tab_symbols) do
-- if string.upper( lexeme ) == string.upper( key ) then
-- return key , 'collide'
-- end
end
end
return nil ,nil
end
local function error_lex( error_msg )
if (type(ERROR_MSG_TYPE) ~= 'string') then
error('bad value to \'error_msg_type\' variable (string expected)' ,2)
end
init()
local _LINE = line
error( string.gsub( string.gsub( ERROR_MSG_TYPE , '_LINE' , _LINE) ,
'_ERRORMSG' , error_msg ) , 3 )
end
function get_constructor ( symbol_name )
if tab_symbols[symbol_name] then
return tab_symbols[symbol_name].descriptions
else
return nil
end
end
function set_constructor ( symbol_name , field_name , value )
for k , v in ipairs(tab_symbols) do
if (v.lexeme == symbol_name) then
v.descriptions[field_name] = value
end
end
return nil
end
local function lineCount()
line = line + 1
end
local function getchar(stridl)
if (i > stridllen) then
return nil
else
local c = string.sub(stridl, i, i)
i = i + 1
prevtokenvalue = tokenvalue_previous
return c
end
end
function init()
tab_symbols = { }
token = nil
tokenvalue = '<EOF>'
tokenvalue_previous = '<EOF>'
line = 1
srcfilename = ''
tab_linemarks = { }
lookahead = ' '
i = 1
stridllen = nil
end
function lexer(stridl)
if not stridllen then
init()
stridllen = string.len(stridl)
end
while true do
tokenvalue_previous = tokenvalue
if not lookahead then
init()
token = nil
return token
elseif (lookahead == '#') then
lookahead = getchar(stridl)
-- C preprocessor
-- # <linenum> "<filename>" <flags>*
if (lookahead == ' ') then
linemarkDeclared = true
-- linenum
local linenum = getchar(stridl)
lookahead = getchar(stridl)
while is_digit(lookahead) do
linenum = linenum..lookahead
lookahead = getchar(stridl)
end
line = tonumber(linenum)
-- filename
-- '"' char
local _ = getchar(stridl)
local filename = ''
lookahead = getchar(stridl)
while (lookahead ~= '"') do
filename = filename..lookahead
lookahead = getchar(stridl)
end
srcfilename = filename
lookahead = getchar(stridl)
-- flags
local flags = { }
while (lookahead == ' ') do
lookahead = getchar(stridl)
if is_digit(lookahead) then
flags[lookahead] = true
end
lookahead = getchar(stridl)
end
table.insert(tab_linemarks, flags)
-- pragma declarations
elseif (lookahead == 'p') then
lookahead = getchar(stridl)
if (lookahead == 'r') then
lookahead = getchar(stridl)
if (lookahead == 'a') then
lookahead = getchar(stridl)
if (lookahead == 'g') then
lookahead = getchar(stridl)
if (lookahead == 'm') then
lookahead = getchar(stridl)
if (lookahead == 'a') then
lookahead = getchar(stridl)
if is_blank(lookahead) then
lookahead = getchar(stridl)
end
-- pragma prefix
if (lookahead == 'p') then
lookahead = getchar(stridl)
if (lookahead == 'r') then
lookahead = getchar(stridl)
if (lookahead == 'e') then
lookahead = getchar(stridl)
if (lookahead == 'f') then
lookahead = getchar(stridl)
if (lookahead == 'i') then
lookahead = getchar(stridl)
if (lookahead == 'x') then
lookahead = getchar(stridl)
token = tab_tokens.TK_PRAGMA_PREFIX
return token
end
end
end
end
end
-- pragma ID
elseif (lookahead == 'I') then
lookahead = getchar(stridl)
if (lookahead == 'D') then
lookahead = getchar(stridl)
token = tab_tokens.TK_PRAGMA_ID
return token
end
end
end
end
end
end
end
end
-- new lines
elseif (lookahead == '\n') then
-- We does not consider linemark declarations to line counts.
if not linemarkDeclared then
lineCount()
else
linemarkDeclared = false
end
lookahead = getchar(stridl)
-- blank characters
elseif is_blank(lookahead) then
lookahead = getchar(stridl)
-- comments
elseif (lookahead == '/') then
lookahead = getchar(stridl)
if (lookahead == '/') then
while (lookahead ~= '\n') do
lookahead = getchar(stridl)
end
lineCount()
lookahead = getchar(stridl)
elseif (lookahead == '*') then
local first_line = line
lookahead = getchar(stridl)
while true do
if (lookahead == '\n') then
lookahead = getchar(stridl)
lineCount()
elseif not lookahead then
-- where begin nonterminated comment ?
line = first_line
error_lex('nonterminated comment')
elseif (lookahead == '*') then
lookahead = getchar(stridl)
if (lookahead == '/') then
break
end
else
lookahead = getchar(stridl)
end
end
lookahead = getchar(stridl)
else
tokenvalue = '/'
token = tokenvalue
return token
end
-- floating-point literals
-- fixed-point literals
elseif (lookahead == '.') then
tokenvalue = '.'
lookahead = getchar(stridl)
if is_digit(lookahead) then
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
while is_digit(lookahead) do
tokenvalue = tokenvalue..lookahead
lookahead = getchar()
end
if (lookahead == 'e' or lookahead == 'E') then
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
if (lookahead == '-') then
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
if not is_digit(lookahead) then
error_lex('malformed number'..is_near_value(tokenvalue))
else
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
while is_digit(lookahead) do
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
end
end
elseif is_digit(lookahead) then
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
while is_digit(lookahead) do
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
end
else
error_lex('malformed number'..is_near_value(tokenvalue))
end
tokenvalue = tonumber(tokenvalue ,10)
token = tab_tokens.TK_FLOAT_LITERAL
return token
elseif (lookahead == 'd' or lookahead == 'D') then
tokenvalue = tonumber(tokenvalue ,10)
lookahead = getchar(stridl)
token = tab_tokens.TK_FIXED_LITERAL
return token
else
tokenvalue = tonumber(tokenvalue ,10)
token = tab_tokens.TK_FLOAT_LITERAL
return token
end
else
tokenvalue = '.'
token = tokenvalue
return token
end
-- integer literal (decimal)
-- integer literal (hexa)
-- integer literal (octal)
-- floating-point literals
-- fixed-point literals
elseif (lookahead == '0') then
tokenvalue = '0'
lookahead = getchar(stridl)
if (lookahead == 'x' or lookahead == 'X') then
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
while is_hex_digit(lookahead) do
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
end
tokenvalue = tonumber(tokenvalue ,10)
token = tab_tokens.TK_INTEGER_LITERAL
return token
end
while is_digit(lookahead) do
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
end
if (lookahead == '.' or lookahead == 'e' or lookahead == 'E' or
lookahead == 'd' or lookahead == 'd')
then
if (lookahead == '.') then
tokenvalue = tokenvalue..'.'
lookahead = getchar(stridl)
if is_digit(lookahead) then
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
while is_digit(lookahead) do
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
end
end
end
if (lookahead == 'e' or lookahead == 'E') then
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
if (lookahead == '-') then
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
if not is_digit(lookahead) then
error_lex('malformed number near'..is_near_value(tokenvalue))
else
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
while is_digit(lookahead) do
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
end
end
elseif is_digit(lookahead) then
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
while is_digit(lookahead) do
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
end
else
error_lex('malformed number near'..is_near_value(tokenvalue))
end
tokenvalue = tonumber(tokenvalue ,10)
token = tab_tokens.TK_FLOAT_LITERAL
return token
elseif (lookahead == 'd' or lookahead == 'D') then
tokenvalue = tonumber(tokenvalue ,10)
lookahead = getchar(stridl)
token = tab_tokens.TK_FIXED_LITERAL
return token
else
tokenvalue = tonumber(tokenvalue ,10)
token = tab_tokens.TK_FLOAT_LITERAL
return token
end
else
if not (string.find(tokenvalue ,'8') or string.find(tokenvalue ,'9')) then
tokenvalue = tonumber(tokenvalue ,8)
end
token = tab_tokens.TK_INTEGER_LITERAL
return token
end
-- integer literal (decimal)
-- floating-point literals
-- fixed-point literals
elseif is_digit(lookahead) then
tokenvalue = lookahead
lookahead = getchar(stridl)
while is_digit(lookahead) do
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
end
if (lookahead == '.' or lookahead == 'e' or lookahead == 'E' or
lookahead == 'd' or lookahead == 'd')
then
if (lookahead == '.') then
tokenvalue = tokenvalue..'.'
lookahead = getchar(stridl)
if is_digit(lookahead) then
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
while is_digit(lookahead) do
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
end
end
end
if (lookahead == 'e' or lookahead == 'E') then
tokenvalue = tokenvalue..lookahead
lookahead = getchar( stridl )
if (lookahead == '-') then
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
if not is_digit(lookahead) then
error_lex('malformed number near'..is_near_value(tokenvalue))
else
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
while is_digit(lookahead) do
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
end
end
elseif is_digit(lookahead) then
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
while is_digit(lookahead) do
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
end
else
error_lex('malformed number near'..is_near_value(tokenvalue))
end
tokenvalue = tonumber(tokenvalue ,10)
token = tab_tokens.TK_FLOAT_LITERAL
return token
elseif (lookahead == 'd' or lookahead == 'D') then
tokenvalue = tonumber(tokenvalue ,10)
lookahead = getchar(stridl)
token = tab_tokens.TK_FIXED_LITERAL
return token
else
tokenvalue = tonumber(tokenvalue ,10)
token = tab_tokens.TK_FLOAT_LITERAL
return token
end
else
token = tab_tokens.TK_INTEGER_LITERAL
return token
end
-- char literal
-- "The value of a null is 0" ????
elseif (lookahead == '\'') then
tokenvalue = ''
lookahead = getchar(stridl)
if (lookahead == '\\') then
lookahead = getchar(stridl)
if (lookahead == 'n') then
tokenvalue = tokenvalue..'\n'
lookahead = getchar(stridl)
elseif (lookahead == 't') then
tokenvalue = tokenvalue..'\t'
lookahead = getchar(stridl)
elseif (lookahead == 'v') then
tokenvalue = tokenvalue..'\v'
lookahead = getchar(stridl)
elseif (lookahead == 'b') then
tokenvalue = tokenvalue..'\b'
lookahead = getchar(stridl)
elseif (lookahead == 'r') then
tokenvalue = tokenvalue..'\r'
lookahead = getchar(stridl)
elseif (lookahead == 'f') then
tokenvalue = tokenvalue..'\f'
lookahead = getchar(stridl)
elseif (lookahead == 'a') then
tokenvalue = tokenvalue..'\a'
lookahead = getchar(stridl)
elseif (lookahead == '\\') then
tokenvalue = tokenvalue..'\\'
lookahead = getchar(stridl)
elseif (lookahead == '?') then
tokenvalue = tokenvalue..'?'
lookahead = getchar(stridl)
elseif (lookahead == '\'') then
tokenvalue = tokenvalue..'\''
lookahead = getchar(stridl)
elseif (lookahead == '"') then
tokenvalue = tokenvalue..'"'
lookahead = getchar(stridl)
elseif is_octal_digit(lookahead) then
local num_digits = 1
local tokenvalue_tmp = lookahead
lookahead = getchar(stridl)
while is_octal_digit(lookahead) do
tokenvalue_tmp = tokenvalue_tmp..lookahead
lookahead = getchar(stridl)
num_digits = num_digits + 1
if (num_digits == 3) then
lookahed = getchar(stridl)
break
end
end
tokenvalue = tokenvalue..string.char(tonumber(tokenvalue_tmp ,8))
elseif (lookahead == 'x') then
local tokenvalue_tmp = '0x'
lookahead = getchar(stridl)
if is_hex_digit(lookahead) then
tokenvalue_tmp = tokenvalue_tmp..lookahead
lookahead = getchar(stridl)
if is_hex_digit(lookahead) then
tokenvalue_tmp = tokenvalue_tmp..lookahead
lookahead = getchar(stridl)
end
end
tokenvalue = tokenvalue..string.char(tonumber(tokenvalue_tmp ,10))
elseif (lookahead == 'u') then
error_lex('it doest not permited unicode characters in char type')
else
-- When occur an unknown escape sequence, then we apply a common
-- behavior that is to return a proper character. Ex.: '\e' -> e
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
end
elseif (lookahead ~= '\'') then
tokenvalue = lookahead
lookahead = getchar(stridl)
end
if (lookahead ~= '\'') then
if not lookahead then
error_lex('"\'" expected near \'<eof>\'')
else
error_lex('"\'" expected'..is_near_value(tokenvalue))
end
end
lookahead = getchar(stridl)
token = tab_tokens.TK_CHAR_LITERAL
return token
-- string literal
elseif (lookahead == '"') then
lookahead = getchar(stridl)
tokenvalue = ''
while true do
if (lookahead == '\\') then
lookahead = getchar(stridl)
if (lookahead == 'n') then
tokenvalue = tokenvalue..'\n'
lookahead = getchar(stridl)
elseif (lookahead == 't') then
tokenvalue = tokenvalue..'\t'
lookahead = getchar(stridl)
elseif (lookahead == 'v') then
tokenvalue = tokenvalue..'\v'
lookahead = getchar(stridl)
elseif (lookahead == 'b') then
tokenvalue = tokenvalue..'\b'
lookahead = getchar(stridl)
elseif (lookahead == 'r') then
tokenvalue = tokenvalue..'\r'
lookahead = getchar(stridl)
elseif (lookahead == 'f') then
tokenvalue = tokenvalue..'\f'
lookahead = getchar(stridl)
elseif (lookahead == 'a') then
tokenvalue = tokenvalue..'\a'
lookahead = getchar(stridl)
elseif (lookahead == '\\') then
tokenvalue = tokenvalue..'\\'
lookahead = getchar(stridl)
elseif (lookahead == '?') then
tokenvalue = tokenvalue..'?'
lookahead = getchar( stridl )
elseif (lookahead == '\'') then
tokenvalue = tokenvalue..'\''
lookahead = getchar(stridl)
elseif (lookahead == '"') then
tokenvalue = tokenvalue..'"'
lookahead = getchar(stridl)
elseif is_octal_digit(lookahead) then
local num_digits = 1
local tokenvalue_tmp = lookahead
lookahead = getchar(stridl)
while is_octal_digit(lookahead) do
tokenvalue_tmp = tokenvalue_tmp..lookahead
lookahead = getchar(stridl)
num_digits = num_digits + 1
if (num_digits == 3) then
lookahed = getchar(stridl)
break
end
end
tokenvalue = tokenvalue..string.char(tonumber(tokenvalue_tmp ,8))
elseif (lookahead == 'x') then
local tokenvalue_tmp = '0x'
lookahead = getchar(stridl)
if is_hex_digit(lookahead) then
tokenvalue_tmp = tokenvalue_tmp..lookahead
lookahead = getchar(stridl)
if is_hex_digit(lookahead) then
tokenvalue_tmp = tokenvalue_tmp..lookahead
lookahead = getchar(stridl)
end
end
tokenvalue = tokenvalue..string.char(tonumber(tokenvalue_tmp , 10))
elseif (lookahead == 'u') then
error_lex('it doest not permited unicode characters in char type')
else
-- When occur an unknown escape sequence, then we apply a common
-- behavior that is to return a proper character. Ex.: '\e' -> e
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
end
elseif (lookahead == '"') then
break
elseif not lookahead then
error_lex('nonterminated string')
else
tokenvalue = tokenvalue..lookahead
lookahead = getchar(stridl)
end
end
lookahead = getchar(stridl)
token = tab_tokens.TK_STRING_LITERAL
return token
-- identifiers
-- keywords
elseif is_alpha(lookahead) or (lookahead == '_') then
local lexbuf = lookahead
lookahead = getchar(stridl)
while is_alpha(lookahead) or (lookahead == '_') or is_digit(lookahead) do
lexbuf = lexbuf..lookahead
lookahead = getchar(stridl)
end
if (string.sub(lexbuf ,1 ,1) ~= '_') then
tokenvalue, tk = search_symbols(lexbuf , tab_keywords)
if tk == "collide" then
error_lex("'"..lexbuf.."' collides with keyword '"..tokenvalue.."'")
elseif tk then
token = tk
return token
end
else
lexbuf = string.sub(lexbuf ,2)
end
tokenvalue, token = search_symbols_wocollide(lexbuf ,tab_symbols)
-- if token == "collide" then
-- error_lex( "'"..lexbuf.."' and '"..tokenvalue.."' collide" )
if not token then
insert_symbols(lexbuf)
tokenvalue = lexbuf
end
token = tab_tokens.TK_ID
return token
-- operators and other characters
else
tokenvalue = lookahead
lookahead = getchar(stridl)
token = tokenvalue
return token
end
end
end