(#299) Introduce tokenizer

master
rexim 2018-08-27 03:36:23 +07:00
parent 42ccac9242
commit 349779a8c8
3 changed files with 123 additions and 0 deletions

View File

@ -45,6 +45,7 @@ set(SOURCE_FILES
src/math/triangle.c
src/script/expr.c
src/script/parser.c
src/script/tokenizer.c
src/sdl/renderer.c
src/system/error.c
src/system/lt.c
@ -82,6 +83,7 @@ set(HEADER_FILES
src/math/triangle.h
src/script/expr.h
src/script/parser.h
src/script/tokenizer.h
src/sdl/renderer.h
src/system/error.h
src/system/lt.h
@ -96,6 +98,8 @@ add_executable(script_test
src/script/expr.c
src/script/parser.h
src/script/parser.c
src/script/tokenizer.h
src/script/tokenizer.c
)
target_link_libraries(nothing ${SDL2_LIBRARY} ${SDL2_MIXER_LIBRARY})

97
src/script/tokenizer.c Normal file
View File

@ -0,0 +1,97 @@
#include <stdbool.h>
#include <assert.h>
#include <ctype.h>
#include <stdlib.h>
#include "./tokenizer.h"
static bool is_symbol_char(char x)
{
static const char forbidden_symbol_chars[] = {
'(', ')', '"', '\'', ';'
};
static const size_t n = sizeof(forbidden_symbol_chars) / sizeof(char);
for (size_t i = 0; i < n; ++i) {
if (x == forbidden_symbol_chars[i] || isspace(x)) {
return false;
}
}
return true;
}
static const char *skip_whitespace(const char *str)
{
assert(str);
while(*str != 0 && isspace(*str)) {
str++;
}
return str;
}
static const char *next_quote(const char *str)
{
assert(str);
while(*str != 0 && *str != '"') {
str++;
}
return str;
}
static const char *skip_until_newline(const char *str)
{
assert(str);
while(*str != 0 && *str != '\n') {
str++;
}
return str;
}
static const char *next_non_symbol(const char *str)
{
assert(str);
while(*str != 0 && is_symbol_char(*str)) {
str++;
}
return str;
}
struct Token next_token(const char *str)
{
if (!str) {
return token(NULL, NULL);
}
str = skip_whitespace(str);
if (*str == 0) {
return token(NULL, NULL);
}
while (*str != 0 && *str == ';') {
str = skip_until_newline(str + 1);
str = skip_whitespace(str);
}
switch (*str) {
case '(':
case ')':
return token(str, str + 1);
case '"': {
const char *str_end = next_quote(str + 1);
return token(str, *str_end == 0 ? str_end : str_end + 1);
}
default:
return token(str, next_non_symbol(str + 1));
}
}

22
src/script/tokenizer.h Normal file
View File

@ -0,0 +1,22 @@
#ifndef TOKENIZER_H_
#define TOKENIZER_H_
struct Token
{
const char *begin;
const char *end;
};
inline struct Token token(const char *begin, const char *end)
{
struct Token token = {
.begin = begin,
.end = end
};
return token;
}
struct Token next_token(const char *str);
#endif // TOKENIZER_H_