/* * Copyright (c) 2015 Andrew Kelley * * This file is part of zig, which is MIT licensed. * See http://opensource.org/licenses/MIT */ #include "util.hpp" #include "buffer.hpp" #include "list.hpp" #include #include #include #include #include #include static Buf *fetch_file(FILE *f) { int fd = fileno(f); struct stat st; if (fstat(fd, &st)) zig_panic("unable to stat file: %s", strerror(errno)); off_t big_size = st.st_size; if (big_size > INT_MAX) zig_panic("file too big"); int size = (int)big_size; Buf *buf = buf_alloc_fixed(size); size_t amt_read = fread(buf_ptr(buf), 1, buf_len(buf), f); if (amt_read != (size_t)buf_len(buf)) zig_panic("error reading: %s", strerror(errno)); return buf; } static int usage(const char *arg0) { fprintf(stderr, "Usage: %s in-grammar.txt out-parser.c\n", arg0); return 1; } struct Token { Buf name; int id; }; struct RuleNode; struct RuleTuple { ZigList children; }; struct RuleMany { RuleNode *child; }; struct RuleOption { ZigList child; }; struct RuleOr { ZigList children; }; struct RuleToken { Token *token; }; struct RuleBlock { Buf *body; }; struct RuleList { RuleNode *rule; RuleToken *separator; }; struct RuleSubRule { RuleNode *child; }; enum RuleNodeType { RuleNodeTypeTuple, RuleNodeTypeMany, RuleNodeTypeList, RuleNodeTypeOption, RuleNodeTypeOr, RuleNodeTypeToken, RuleNodeTypeSubRule, }; struct RuleNode { RuleNodeType type; union { RuleTuple tuple; RuleMany many; RuleList list; RuleOption option; RuleOr _or; RuleToken token; RuleSubRule sub_rule; }; }; enum ParserStateType { ParserStateTypeError, ParserStateTypeOk, }; struct ParserStateError { Buf *msg; }; struct ParserState { ParserStateType type; // One for each token ID. ParserState **transition; int index; union { ParserStateError error; }; }; struct Gen { ParserState *cur_state; ZigList transition_table; ZigList tokens; RuleNode *root; }; static ParserState *create_state(Gen *g, ParserStateType type) { ParserState *state = allocate(1); state->type = type; state->index = g->transition_table.length; state->transition = allocate(g->tokens.length); g->transition_table.append(state); return state; } static void fill_state_with_transition(Gen *g, ParserState *source, ParserState *dest) { for (int i = 0; i < g->tokens.length; i += 1) { source->transition[i] = dest; } } static void gen(Gen *g, RuleNode *node) { switch (node->type) { case RuleNodeTypeToken: { ParserState *ok_state = create_state(g, ParserStateTypeOk); ParserState *err_state = create_state(g, ParserStateTypeError); err_state->error.msg = buf_sprintf("expected token '%s'", buf_ptr(&node->token.token->name)); fill_state_with_transition(g, g->cur_state, err_state); g->cur_state->transition[node->token.token->id] = ok_state; g->cur_state = ok_state; } break; case RuleNodeTypeTuple: { for (int i = 0; i < node->tuple.children.length; i += 1) { RuleNode *child = node->tuple.children.at(i); gen(g, child); } } break; case RuleNodeTypeMany: zig_panic("TODO"); break; case RuleNodeTypeList: zig_panic("TODO"); break; case RuleNodeTypeOption: zig_panic("TODO"); break; case RuleNodeTypeOr: zig_panic("TODO"); break; case RuleNodeTypeSubRule: zig_panic("TODO"); break; } } static Token *find_token_by_name(Gen *g, Buf *name) { for (int i = 0; i < g->tokens.length; i += 1) { Token *token = g->tokens.at(i); if (buf_eql_buf(name, &token->name)) return token; } return nullptr; } static Token *find_or_create_token(Gen *g, Buf *name) { Token *token = find_token_by_name(g, name); if (!token) { token = allocate(1); token->id = g->tokens.length; buf_init_from_mem(&token->name, buf_ptr(name), buf_len(name)); g->tokens.append(token); } return token; } int main(int argc, char **argv) { const char *in_filename = argv[1]; const char *out_filename = argv[2]; if (!in_filename || !out_filename) return usage(argv[0]); FILE *in_f; if (strcmp(in_filename, "-") == 0) { in_f = stdin; } else { in_f = fopen(in_filename, "rb"); } FILE *out_f; if (strcmp(out_filename, "-") == 0) { out_f = stdout; } else { out_f = fopen(out_filename, "wb"); } if (!in_f || !out_f) zig_panic("unable to open file(s)"); Buf *in_buf = fetch_file(in_f); ZigList rules = {0}; Gen g = {0}; //zig_panic("TODO initialize rules"); { Token *star_token = find_or_create_token(&g, buf_create_from_str("Star")); Token *lparen_token = find_or_create_token(&g, buf_create_from_str("LParen")); Token *eof_token = find_or_create_token(&g, buf_create_from_str("Eof")); RuleNode *root = allocate(1); root->type = RuleNodeTypeTuple; RuleNode *star_node = allocate(1); star_node->type = RuleNodeTypeToken; star_node->token.token = star_token; root->tuple.children.append(star_node); RuleNode *lparen_node = allocate(1); lparen_node->type = RuleNodeTypeToken; lparen_node->token.token = lparen_token; root->tuple.children.append(lparen_node); RuleNode *eof_node = allocate(1); eof_node->type = RuleNodeTypeToken; eof_node->token.token = eof_token; root->tuple.children.append(eof_node); rules.append(root); } g.root = rules.at(0); g.cur_state = create_state(&g, ParserStateTypeOk); gen(&g, g.root); (void)in_buf; fprintf(out_f, "/* This file is auto-generated by parsergen.cpp */\n"); fprintf(out_f, "#include \"src/parser.hpp\"\n"); fprintf(out_f, "#include \n"); fprintf(out_f, "\n"); fprintf(out_f, "/*\n"); fprintf(out_f, "enum TokenId {\n"); for (int i = 0; i < g.tokens.length; i += 1) { Token *token = g.tokens.at(i); fprintf(out_f, " TokenId%s = %d,\n", buf_ptr(&token->name), token->id); } fprintf(out_f, "};\n"); fprintf(out_f, "*/\n"); for (int i = 0; i < g.tokens.length; i += 1) { Token *token = g.tokens.at(i); fprintf(out_f, "static_assert(TokenId%s == %d, \"wrong token id\");\n", buf_ptr(&token->name), token->id); } fprintf(out_f, "AstNode * ast_parse(Buf *buf, ZigList *tokens) {\n"); fprintf(out_f, " static const int transition[%d][%d] = {\n", g.transition_table.length, g.tokens.length); for (int state_index = 0; state_index < g.transition_table.length; state_index += 1) { ParserState *state = g.transition_table.at(state_index); fprintf(out_f, " {\n"); for (int token_id = 0; token_id < g.tokens.length; token_id += 1) { ParserState *dest = state->transition[token_id]; fprintf(out_f, " %d,\n", dest ? dest->index : -1); } fprintf(out_f, " },\n"); } fprintf(out_f, " };\n"); fprintf(out_f, " int state = 0;\n"); fprintf(out_f, " AstNode *root = nullptr;\n"); fprintf(out_f, " for (int i = 0; i < tokens->length; i += 1) {\n"); fprintf(out_f, " Token *token = &tokens->at(i);\n"); fprintf(out_f, " switch (state) {\n"); for (int i = 0; i < g.transition_table.length; i += 1) { ParserState *state = g.transition_table.at(i); fprintf(out_f, " case %d:\n", i); fprintf(out_f, " fprintf(stderr, \"state = %%d\\n\", state);\n"); switch (state->type) { case ParserStateTypeError: fprintf(out_f, " ast_error(token, \"%s\");\n", buf_ptr(state->error.msg)); break; case ParserStateTypeOk: fprintf(out_f, " assert(transition[%d][token->id] >= 0);\n", state->index); fprintf(out_f, " assert(transition[%d][token->id] < %d);\n", state->index, g.transition_table.length); fprintf(out_f, " state = transition[%d][token->id];\n", state->index); break; } fprintf(out_f, " break;\n"); } fprintf(out_f, " default:\n"); fprintf(out_f, " zig_panic(\"unreachable\");\n"); fprintf(out_f, " }\n"); fprintf(out_f, " }\n"); fprintf(out_f, " return root;\n"); fprintf(out_f, "}\n"); }