luaforwindows/files/lua/luaidl/lex.lua

--
-- Project:  LuaIDL
-- Version:  0.8.9b
-- Author:   Ricardo Cosme <rcosme@tecgraf.puc-rio.br>
-- Filename: lex.lua
--

local type     = type
local pairs    = pairs
local tonumber = tonumber
local error    = error
local ipairs   = ipairs
local table    = table
local string   = require "string"

module 'luaidl.lex'

tab_tokens = { TK_ID = 257, TK_ABSTRACT = 258, TK_ANY = 259, TK_ATTRIBUTE = 260,
              TK_BOOLEAN = 261, TK_CASE = 262, TK_CHAR = 263, TK_COMPONENT = 264,
              TK_CONST = 265, TK_CONSUMES = 266, TK_CONTEXT = 267, TK_CUSTOM = 268,
              TK_DEFAULT = 269, TK_DOUBLE = 270, TK_EXCEPTION = 271, TK_EMITS = 272,
              TK_ENUM = 273, TK_EVENTTYPE = 274, TK_FACTORY = 275, TK_FALSE = 276,
              TK_FINDER = 277, TK_FIXED = 278, TK_FLOAT = 279, TK_GETRAISES = 280,
              TK_HOME = 281, TK_IMPORT = 282, TK_IN = 283, TK_INOUT = 284,
              TK_INTERFACE = 285, TK_LOCAL = 286, TK_LONG = 287, TK_MODULE = 288,
              TK_MULTIPLE = 289, TK_NATIVE = 290, TK_OBJECT = 291, TK_OCTET = 292,
              TK_ONEWAY = 293, TK_OUT = 294, TK_PRIMARYKEY = 295, TK_PRIVATE = 296,
              TK_PROVIDES = 297, TK_PUBLIC = 298, TK_PUBLISHES = 299, TK_RAISES = 300,
              TK_READONLY = 301, TK_SETRAISES = 302, TK_SEQUENCE = 303, TK_SHORT = 304,
              TK_STRING = 305, TK_STRUCT = 306, TK_SUPPORTS = 307, TK_SWITCH = 308,
              TK_TRUE = 309, TK_TRUNCATABLE = 310, TK_TYPEDEF = 311, TK_TYPEID = 312,
              TK_TYPEPREFIX = 313, TK_UNSIGNED = 314, TK_UNION = 315, TK_USES = 316,
              TK_VALUEBASE = 317, TK_VALUETYPE = 318, TK_VOID = 319, TK_WCHAR = 320,
              TK_WSTRING = 321, TK_INTEGER_LITERAL = 322, TK_FLOAT_LITERAL = 323,
              TK_CHAR_LITERAL = 324, TK_WCHAR_LITERAL = 325, TK_STRING_LITERAL = 326,
              TK_WSTRING_LITERAL = 327, TK_FIXED_LITERAL = 328, TK_PRAGMA_PREFIX = 329,
              TK_PRAGMA_ID = 330, TK_MANAGES = 332,
             }

local tab_keywords = {
                ['abstract']      = { token = tab_tokens.TK_ABSTRACT },
                ['any']           = { token = tab_tokens.TK_ANY },
                ['attribute']     = { token = tab_tokens.TK_ATTRIBUTE },
                ['boolean']       = { token = tab_tokens.TK_BOOLEAN },
                ['case']          = { token = tab_tokens.TK_CASE },
                ['char']          = { token = tab_tokens.TK_CHAR },
                ['component']     = { token = tab_tokens.TK_COMPONENT },
                ['const']         = { token = tab_tokens.TK_CONST },
                ['consumes']      = { token = tab_tokens.TK_CONSUMES },
                ['context']       = { token = tab_tokens.TK_CONTEXT },
                ['custom']        = { token = tab_tokens.TK_CUSTOM },
                ['default']       = { token = tab_tokens.TK_DEFAULT },
                ['double']        = { token = tab_tokens.TK_DOUBLE },
                ['exception']     = { token = tab_tokens.TK_EXCEPTION },
                ['emits']         = { token = tab_tokens.TK_EMITS },
                ['enum']          = { token = tab_tokens.TK_ENUM },
                ['eventtype']     = { token = tab_tokens.TK_EVENTTYPE },
                ['factory']       = { token = tab_tokens.TK_FACTORY },
                ['FALSE']         = { token = tab_tokens.TK_FALSE },
                ['finder']        = { token = tab_tokens.TK_FINDER },
                ['fixed']         = { token = tab_tokens.TK_FIXED },
                ['float']         = { token = tab_tokens.TK_FLOAT },
                ['getraises']     = { token = tab_tokens.TK_GETRAISES },
                ['home']          = { token = tab_tokens.TK_HOME },
                ['import']        = { token = tab_tokens.TK_IMPORT },
                ['in']            = { token = tab_tokens.TK_IN },
                ['inout']         = { token = tab_tokens.TK_INOUT },
                ['interface']     = { token = tab_tokens.TK_INTERFACE },
                ['local']         = { token = tab_tokens.TK_LOCAL },
                ['long']          = { token = tab_tokens.TK_LONG },
                ['manages']       = { token = tab_tokens.TK_MANAGES },
                ['module']        = { token = tab_tokens.TK_MODULE },
                ['multiple']      = { token = tab_tokens.TK_MULTIPLE },
                ['native']        = { token = tab_tokens.TK_NATIVE },
                ['Object']        = { token = tab_tokens.TK_OBJECT },
                ['octet']         = { token = tab_tokens.TK_OCTET },
                ['oneway']        = { token = tab_tokens.TK_ONEWAY },
                ['out']           = { token = tab_tokens.TK_OUT },
                ['primarykey']    = { token = tab_tokens.TK_PRIMARYKEY },
                ['private']       = { token = tab_tokens.TK_PRIVATE },
                ['provides']      = { token = tab_tokens.TK_PROVIDES },
                ['public']        = { token = tab_tokens.TK_PUBLIC },
                ['publishes']     = { token = tab_tokens.TK_PUBLISHES },
                ['raises']        = { token = tab_tokens.TK_RAISES },
                ['readonly']      = { token = tab_tokens.TK_READONLY },
                ['setraises']     = { token = tab_tokens.TK_SETRAISES },
                ['sequence']      = { token = tab_tokens.TK_SEQUENCE },
                ['short']         = { token = tab_tokens.TK_SHORT },
                ['string']        = { token = tab_tokens.TK_STRING },
                ['struct']        = { token = tab_tokens.TK_STRUCT },
                ['supports']      = { token = tab_tokens.TK_SUPPORTS },
                ['switch']        = { token = tab_tokens.TK_SWITCH },
                ['TRUE']          = { token = tab_tokens.TK_TRUE },
                ['truncatable']   = { token = tab_tokens.TK_TRUNCATABLE },
                ['typedef']       = { token = tab_tokens.TK_TYPEDEF },
                ['typeid']        = { token = tab_tokens.TK_TYPEID },
                ['typeprefix']    = { token = tab_tokens.TK_TYPEPREFIX },
                ['unsigned']      = { token = tab_tokens.TK_UNSIGNED },
                ['union']         = { token = tab_tokens.TK_UNION },
                ['uses']          = { token = tab_tokens.TK_USES },
                ['ValueBase']     = { token = tab_tokens.TK_VALUEBASE },
                ['valuetype']     = { token = tab_tokens.TK_VALUETYPE },
                ['void']          = { token = tab_tokens.TK_VOID },
                ['wchar']         = { token = tab_tokens.TK_WCHAR },
                ['wstring']       = { token = tab_tokens.TK_WSTRING },
              }

local tab_symbols

PRAGMA_VERSION    = '1.0'
ERROR_MSG_TYPE    = '[lexical error]:_LINE:_ERRORMSG.'

local token
local lookahead
local i
local stridllen
local linemarkDeclared

local function is_blank( char )
  if ( lookahead == ' ' or lookahead == '\f' or lookahead == '\r' or
       lookahead == '\t' or lookahead == '\v' ) then
    return true
  else
    return false
  end
end

local function is_digit( char )
  return string.find(char , '%d')
end

local function is_hex_digit( char )
  return string.find(char , '%x')
end

local function is_octal_digit( char )
  return string.find(char , '[0-7]')
end

local function is_alpha( char )
  return string.find(char , '%w')
end

local function is_near_value( lookahead )
  if string.find(lookahead , '%w') then
    return ' near \''..lookahead..'\''
  else
    return ''
  end
end

local function insert_symbols( lexeme )
  tab_symbols[lexeme] = {token = tab_tokens.TK_ID ,descriptions = {}}
end

local function search_symbols( lexeme , tab_symbols )
  if tab_symbols[lexeme] then
    return lexeme ,tab_symbols[lexeme].token
  else
    for key, value in pairs(tab_symbols) do
      if string.upper(lexeme) == string.upper(key) then
        return key ,'collide'
      end
    end
  end
  return nil ,nil
end

local function search_symbols_wocollide( lexeme , tab_symbols )
  if tab_symbols[lexeme] then
    return lexeme ,tab_symbols[lexeme].token
  else
    for key, value in pairs(tab_symbols) do
--      if string.upper( lexeme ) == string.upper( key ) then
--        return key , 'collide'
--      end
    end
  end
  return nil ,nil
end

local function error_lex( error_msg )
  if (type(ERROR_MSG_TYPE) ~= 'string') then
    error('bad value to \'error_msg_type\' variable (string expected)' ,2)
  end
  init()
  local _LINE =  line
  error( string.gsub( string.gsub( ERROR_MSG_TYPE , '_LINE' , _LINE) ,
         '_ERRORMSG' , error_msg ) , 3 )
end

function get_constructor ( symbol_name )
  if tab_symbols[symbol_name] then
    return tab_symbols[symbol_name].descriptions
  else
    return nil
  end
end

function set_constructor ( symbol_name , field_name , value )
  for k , v in ipairs(tab_symbols) do
    if (v.lexeme == symbol_name) then
      v.descriptions[field_name] = value
    end
  end
  return nil
end

local function lineCount()
  line = line + 1
end

local function getchar(stridl)
  if (i > stridllen) then
    return nil
  else
    local c =  string.sub(stridl, i, i)
    i = i + 1
    prevtokenvalue = tokenvalue_previous
    return c
  end
end

function init()
  tab_symbols           = { }
  token                 = nil
  tokenvalue            = '<EOF>'
  tokenvalue_previous   = '<EOF>'
  line                  = 1
  srcfilename           = ''
  tab_linemarks         = { }
  lookahead             = ' '
  i                     = 1
  stridllen             = nil
end

function lexer(stridl)
  if not stridllen then
    init()
    stridllen = string.len(stridl)
  end

  while true do
    tokenvalue_previous = tokenvalue

    if not lookahead then
      init()
      token = nil
      return token
    elseif (lookahead == '#') then
      lookahead = getchar(stridl)
    -- C preprocessor
    -- # <linenum> "<filename>" <flags>*
      if (lookahead == ' ') then
        linemarkDeclared = true
      -- linenum
        local linenum = getchar(stridl)
        lookahead = getchar(stridl)
        while is_digit(lookahead) do
          linenum = linenum..lookahead
          lookahead = getchar(stridl)
        end
        line = tonumber(linenum)
      -- filename
      -- '"' char
        local _ = getchar(stridl)
        local filename = ''
        lookahead = getchar(stridl)
        while (lookahead ~= '"') do
          filename = filename..lookahead
          lookahead = getchar(stridl)
        end
        srcfilename = filename
        lookahead = getchar(stridl)
      -- flags
        local flags = { }
        while (lookahead == ' ') do
          lookahead = getchar(stridl)
          if is_digit(lookahead) then
            flags[lookahead] = true
          end
          lookahead = getchar(stridl)
        end
        table.insert(tab_linemarks, flags)
    -- pragma declarations
      elseif (lookahead == 'p') then
        lookahead = getchar(stridl)
        if (lookahead == 'r') then
          lookahead = getchar(stridl)
          if (lookahead == 'a') then
            lookahead = getchar(stridl)
            if (lookahead == 'g') then
              lookahead = getchar(stridl)
              if (lookahead == 'm') then
                lookahead = getchar(stridl)
                if (lookahead == 'a') then
                  lookahead = getchar(stridl)
                  if is_blank(lookahead) then
                    lookahead = getchar(stridl)
                  end
                -- pragma prefix
                  if (lookahead == 'p') then
                    lookahead = getchar(stridl)
                    if (lookahead == 'r') then
                      lookahead = getchar(stridl)
                      if (lookahead == 'e') then
                        lookahead = getchar(stridl)
                        if (lookahead == 'f') then
                          lookahead = getchar(stridl)
                          if (lookahead == 'i') then
                            lookahead = getchar(stridl)
                            if (lookahead == 'x') then
                              lookahead = getchar(stridl)
                              token = tab_tokens.TK_PRAGMA_PREFIX
                              return token
                            end
                          end
                        end
                      end
                    end
                -- pragma ID
                  elseif (lookahead == 'I') then
                    lookahead = getchar(stridl)
                    if (lookahead == 'D') then
                      lookahead = getchar(stridl)
                      token = tab_tokens.TK_PRAGMA_ID
                      return token
                    end
                  end
                end
              end
            end
          end
        end
      end
  -- new lines
    elseif (lookahead == '\n') then
    -- We does not consider linemark declarations to line counts.
      if not linemarkDeclared then
        lineCount()
      else
        linemarkDeclared = false
      end
      lookahead = getchar(stridl)
  -- blank characters
    elseif is_blank(lookahead) then
      lookahead = getchar(stridl)
  -- comments
    elseif (lookahead == '/') then
      lookahead = getchar(stridl)
      if (lookahead == '/') then
        while (lookahead ~= '\n') do
          lookahead = getchar(stridl)
        end
        lineCount()
        lookahead = getchar(stridl)
      elseif (lookahead == '*') then
        local first_line = line
        lookahead = getchar(stridl)
        while true do
          if (lookahead == '\n') then
            lookahead = getchar(stridl)
            lineCount()
          elseif not lookahead then
          -- where begin nonterminated comment ?
            line = first_line
            error_lex('nonterminated comment')
          elseif (lookahead == '*') then
             lookahead = getchar(stridl)
             if (lookahead == '/') then
               break
             end
          else
             lookahead = getchar(stridl)
          end
        end
        lookahead = getchar(stridl)
      else
        tokenvalue = '/'
        token = tokenvalue
        return token
      end
  -- floating-point literals
  -- fixed-point literals
    elseif (lookahead == '.') then
      tokenvalue = '.'
      lookahead = getchar(stridl)
      if is_digit(lookahead) then
        tokenvalue = tokenvalue..lookahead
        lookahead = getchar(stridl)
        while is_digit(lookahead) do
          tokenvalue = tokenvalue..lookahead
          lookahead  = getchar()
        end
        if (lookahead == 'e' or lookahead == 'E') then
          tokenvalue = tokenvalue..lookahead
          lookahead = getchar(stridl)
          if (lookahead == '-') then
            tokenvalue = tokenvalue..lookahead
            lookahead = getchar(stridl)
            if not is_digit(lookahead) then
              error_lex('malformed number'..is_near_value(tokenvalue))
            else
              tokenvalue = tokenvalue..lookahead
              lookahead = getchar(stridl)
              while is_digit(lookahead) do
                tokenvalue = tokenvalue..lookahead
                lookahead = getchar(stridl)
              end
            end
          elseif is_digit(lookahead) then
            tokenvalue = tokenvalue..lookahead
            lookahead = getchar(stridl)
            while is_digit(lookahead) do
              tokenvalue = tokenvalue..lookahead
              lookahead = getchar(stridl)
            end
          else
            error_lex('malformed number'..is_near_value(tokenvalue))
          end
          tokenvalue = tonumber(tokenvalue ,10)
          token = tab_tokens.TK_FLOAT_LITERAL
          return token
        elseif (lookahead == 'd' or lookahead == 'D') then
          tokenvalue = tonumber(tokenvalue ,10)
          lookahead = getchar(stridl)
          token = tab_tokens.TK_FIXED_LITERAL
          return token
        else
          tokenvalue = tonumber(tokenvalue ,10)
          token = tab_tokens.TK_FLOAT_LITERAL
          return token
        end
      else
        tokenvalue = '.'
        token = tokenvalue
        return token
      end
  -- integer literal (decimal)
  -- integer literal (hexa)
  -- integer literal (octal)
  -- floating-point literals
  -- fixed-point literals
    elseif (lookahead == '0') then
      tokenvalue = '0'
      lookahead = getchar(stridl)
      if (lookahead == 'x' or lookahead == 'X') then
        tokenvalue = tokenvalue..lookahead
        lookahead = getchar(stridl)
        while is_hex_digit(lookahead) do
          tokenvalue = tokenvalue..lookahead
          lookahead = getchar(stridl)
        end
        tokenvalue = tonumber(tokenvalue ,10)
        token = tab_tokens.TK_INTEGER_LITERAL
        return token
      end
      while is_digit(lookahead) do
        tokenvalue = tokenvalue..lookahead
        lookahead = getchar(stridl)
      end
      if (lookahead == '.' or lookahead == 'e' or lookahead == 'E' or
          lookahead == 'd' or lookahead == 'd')
      then
        if (lookahead == '.') then
          tokenvalue = tokenvalue..'.'
          lookahead = getchar(stridl)
          if is_digit(lookahead) then
            tokenvalue = tokenvalue..lookahead
            lookahead = getchar(stridl)
            while is_digit(lookahead) do
              tokenvalue = tokenvalue..lookahead
              lookahead = getchar(stridl)
            end
          end
        end
        if (lookahead == 'e' or lookahead == 'E') then
          tokenvalue = tokenvalue..lookahead
          lookahead = getchar(stridl)
          if (lookahead == '-') then
            tokenvalue = tokenvalue..lookahead
            lookahead = getchar(stridl)
            if not is_digit(lookahead) then
              error_lex('malformed number near'..is_near_value(tokenvalue))
            else
              tokenvalue = tokenvalue..lookahead
              lookahead = getchar(stridl)
              while is_digit(lookahead) do
                tokenvalue = tokenvalue..lookahead
                lookahead = getchar(stridl)
              end
            end
          elseif is_digit(lookahead) then
            tokenvalue = tokenvalue..lookahead
            lookahead = getchar(stridl)
            while is_digit(lookahead) do
              tokenvalue = tokenvalue..lookahead
              lookahead = getchar(stridl)
            end
          else
            error_lex('malformed number near'..is_near_value(tokenvalue))
          end
          tokenvalue = tonumber(tokenvalue ,10)
          token = tab_tokens.TK_FLOAT_LITERAL
          return token
        elseif (lookahead == 'd' or lookahead == 'D') then
          tokenvalue = tonumber(tokenvalue ,10)
          lookahead = getchar(stridl)
          token = tab_tokens.TK_FIXED_LITERAL
          return token
        else
          tokenvalue = tonumber(tokenvalue ,10)
          token = tab_tokens.TK_FLOAT_LITERAL
          return token
        end
      else
        if not (string.find(tokenvalue ,'8') or string.find(tokenvalue ,'9')) then
          tokenvalue = tonumber(tokenvalue ,8)
        end
        token = tab_tokens.TK_INTEGER_LITERAL
        return token
      end
  -- integer literal (decimal)
  -- floating-point literals
  -- fixed-point literals
    elseif is_digit(lookahead) then
      tokenvalue = lookahead
      lookahead = getchar(stridl)
      while is_digit(lookahead) do
        tokenvalue = tokenvalue..lookahead
        lookahead = getchar(stridl)
      end
      if (lookahead == '.' or lookahead == 'e' or lookahead == 'E' or
          lookahead == 'd' or lookahead == 'd')
      then
        if (lookahead == '.') then
          tokenvalue = tokenvalue..'.'
          lookahead = getchar(stridl)
          if is_digit(lookahead) then
            tokenvalue = tokenvalue..lookahead
            lookahead = getchar(stridl)
            while is_digit(lookahead) do
              tokenvalue = tokenvalue..lookahead
              lookahead = getchar(stridl)
            end
          end
        end
        if (lookahead == 'e' or lookahead == 'E') then
          tokenvalue = tokenvalue..lookahead
          lookahead = getchar( stridl )
          if (lookahead == '-') then
            tokenvalue = tokenvalue..lookahead
            lookahead = getchar(stridl)
            if not is_digit(lookahead) then
              error_lex('malformed number near'..is_near_value(tokenvalue))
            else
              tokenvalue = tokenvalue..lookahead
              lookahead = getchar(stridl)
              while is_digit(lookahead) do
                tokenvalue = tokenvalue..lookahead
                lookahead = getchar(stridl)
              end
            end
          elseif is_digit(lookahead) then
            tokenvalue = tokenvalue..lookahead
            lookahead = getchar(stridl)
            while is_digit(lookahead) do
              tokenvalue = tokenvalue..lookahead
              lookahead = getchar(stridl)
            end
          else
            error_lex('malformed number near'..is_near_value(tokenvalue))
          end
          tokenvalue = tonumber(tokenvalue ,10)
          token = tab_tokens.TK_FLOAT_LITERAL
          return token
        elseif (lookahead == 'd' or lookahead == 'D') then
          tokenvalue = tonumber(tokenvalue ,10)
          lookahead = getchar(stridl)
          token = tab_tokens.TK_FIXED_LITERAL
          return token
        else
          tokenvalue = tonumber(tokenvalue ,10)
          token = tab_tokens.TK_FLOAT_LITERAL
          return token
        end
      else
        token = tab_tokens.TK_INTEGER_LITERAL
        return token
      end
  -- char literal
  -- "The value of a null is 0" ????
    elseif (lookahead == '\'') then
      tokenvalue = ''
      lookahead = getchar(stridl)
      if (lookahead == '\\') then
        lookahead = getchar(stridl)
        if (lookahead == 'n') then
          tokenvalue = tokenvalue..'\n'
          lookahead = getchar(stridl)
        elseif (lookahead == 't') then
          tokenvalue = tokenvalue..'\t'
          lookahead = getchar(stridl)
        elseif (lookahead == 'v') then
          tokenvalue = tokenvalue..'\v'
          lookahead = getchar(stridl)
        elseif (lookahead == 'b') then
          tokenvalue = tokenvalue..'\b'
          lookahead = getchar(stridl)
        elseif (lookahead == 'r') then
          tokenvalue = tokenvalue..'\r'
          lookahead = getchar(stridl)
        elseif (lookahead == 'f') then
          tokenvalue = tokenvalue..'\f'
          lookahead = getchar(stridl)
        elseif (lookahead == 'a') then
          tokenvalue = tokenvalue..'\a'
          lookahead = getchar(stridl)
        elseif (lookahead == '\\') then
          tokenvalue = tokenvalue..'\\'
          lookahead = getchar(stridl)
        elseif (lookahead == '?') then
          tokenvalue = tokenvalue..'?'
          lookahead = getchar(stridl)
        elseif (lookahead == '\'') then
          tokenvalue = tokenvalue..'\''
          lookahead = getchar(stridl)
        elseif (lookahead == '"') then
          tokenvalue = tokenvalue..'"'
          lookahead = getchar(stridl)
        elseif is_octal_digit(lookahead) then
          local num_digits = 1
          local tokenvalue_tmp = lookahead
          lookahead = getchar(stridl)
          while is_octal_digit(lookahead) do
            tokenvalue_tmp = tokenvalue_tmp..lookahead
            lookahead = getchar(stridl)
            num_digits = num_digits + 1
            if (num_digits == 3) then
              lookahed = getchar(stridl)
              break
            end
          end
          tokenvalue = tokenvalue..string.char(tonumber(tokenvalue_tmp ,8))
        elseif (lookahead == 'x') then
          local tokenvalue_tmp = '0x'
          lookahead = getchar(stridl)
          if is_hex_digit(lookahead) then
            tokenvalue_tmp = tokenvalue_tmp..lookahead
            lookahead = getchar(stridl)
            if is_hex_digit(lookahead) then
              tokenvalue_tmp = tokenvalue_tmp..lookahead
              lookahead = getchar(stridl)
            end
          end
          tokenvalue = tokenvalue..string.char(tonumber(tokenvalue_tmp ,10))
        elseif (lookahead == 'u') then
          error_lex('it doest not permited unicode characters in char type')
        else
        -- When occur an unknown escape sequence, then we apply a common
        -- behavior that is to return a proper character. Ex.: '\e' -> e
          tokenvalue = tokenvalue..lookahead
          lookahead = getchar(stridl)
        end
      elseif (lookahead ~= '\'') then
        tokenvalue = lookahead
        lookahead = getchar(stridl)
      end
      if (lookahead ~= '\'') then
        if not lookahead then
          error_lex('"\'" expected near \'<eof>\'')
        else
          error_lex('"\'" expected'..is_near_value(tokenvalue))
        end
      end
      lookahead = getchar(stridl)
      token = tab_tokens.TK_CHAR_LITERAL
      return token
  -- string literal
    elseif (lookahead == '"') then
      lookahead = getchar(stridl)
      tokenvalue = ''
      while true do
        if (lookahead == '\\') then
          lookahead = getchar(stridl)
          if (lookahead == 'n') then
            tokenvalue = tokenvalue..'\n'
            lookahead = getchar(stridl)
          elseif (lookahead == 't') then
            tokenvalue = tokenvalue..'\t'
            lookahead = getchar(stridl)
          elseif (lookahead == 'v') then
            tokenvalue = tokenvalue..'\v'
            lookahead = getchar(stridl)
          elseif (lookahead == 'b') then
            tokenvalue = tokenvalue..'\b'
            lookahead = getchar(stridl)
          elseif (lookahead == 'r') then
            tokenvalue = tokenvalue..'\r'
            lookahead = getchar(stridl)
          elseif (lookahead == 'f') then
            tokenvalue = tokenvalue..'\f'
            lookahead = getchar(stridl)
          elseif (lookahead == 'a') then
            tokenvalue = tokenvalue..'\a'
            lookahead = getchar(stridl)
          elseif (lookahead == '\\') then
            tokenvalue = tokenvalue..'\\'
            lookahead = getchar(stridl)
          elseif (lookahead == '?') then
            tokenvalue = tokenvalue..'?'
            lookahead = getchar( stridl )
          elseif (lookahead == '\'') then
            tokenvalue = tokenvalue..'\''
            lookahead = getchar(stridl)
          elseif (lookahead == '"') then
            tokenvalue = tokenvalue..'"'
            lookahead = getchar(stridl)
          elseif is_octal_digit(lookahead) then
            local num_digits = 1
            local tokenvalue_tmp = lookahead
            lookahead = getchar(stridl)
            while is_octal_digit(lookahead) do
              tokenvalue_tmp = tokenvalue_tmp..lookahead
              lookahead = getchar(stridl)
              num_digits = num_digits + 1
              if (num_digits == 3) then
                lookahed = getchar(stridl)
                break
              end
            end
            tokenvalue = tokenvalue..string.char(tonumber(tokenvalue_tmp ,8))
          elseif (lookahead == 'x') then
            local tokenvalue_tmp = '0x'
            lookahead = getchar(stridl)
            if is_hex_digit(lookahead) then
              tokenvalue_tmp = tokenvalue_tmp..lookahead
              lookahead = getchar(stridl)
              if is_hex_digit(lookahead) then
                tokenvalue_tmp = tokenvalue_tmp..lookahead
                lookahead = getchar(stridl)
              end
            end
            tokenvalue = tokenvalue..string.char(tonumber(tokenvalue_tmp , 10))
          elseif (lookahead == 'u') then
            error_lex('it doest not permited unicode characters in char type')
          else
          -- When occur an unknown escape sequence, then we apply a common
          -- behavior that is to return a proper character. Ex.: '\e' -> e
            tokenvalue = tokenvalue..lookahead
            lookahead = getchar(stridl)
          end
        elseif (lookahead == '"') then
          break
        elseif not lookahead then
          error_lex('nonterminated string')
        else
          tokenvalue = tokenvalue..lookahead
          lookahead = getchar(stridl)
        end
      end
      lookahead = getchar(stridl)
      token = tab_tokens.TK_STRING_LITERAL
      return token
  -- identifiers
  -- keywords
    elseif is_alpha(lookahead) or (lookahead == '_') then
      local lexbuf = lookahead
      lookahead = getchar(stridl)
      while is_alpha(lookahead) or (lookahead == '_') or is_digit(lookahead) do
        lexbuf = lexbuf..lookahead
        lookahead = getchar(stridl)
      end
      if (string.sub(lexbuf ,1 ,1) ~= '_') then
        tokenvalue, tk = search_symbols(lexbuf , tab_keywords)
        if tk == "collide" then
          error_lex("'"..lexbuf.."' collides with keyword '"..tokenvalue.."'")
        elseif tk then
          token = tk
          return token
        end
      else
        lexbuf = string.sub(lexbuf ,2)
      end
      tokenvalue, token = search_symbols_wocollide(lexbuf ,tab_symbols)
--      if token == "collide" then
--        error_lex( "'"..lexbuf.."' and '"..tokenvalue.."' collide" )
      if not token then
        insert_symbols(lexbuf)
        tokenvalue = lexbuf
      end
      token = tab_tokens.TK_ID
      return token
  -- operators and other characters
    else
      tokenvalue = lookahead
      lookahead = getchar(stridl)
      token = tokenvalue
      return token
    end
  end
end