progress toward more complex parser gen

2015-11-04 17:15:46 -07:00 · 2015-11-04 17:15:46 -07:00 · 174baa49bd
parent c36cd9d313
commit 174baa49bd
9 changed files with 151 additions and 101 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -21,14 +21,15 @@ include_directories(
 )

 set(GRAMMAR_TXT "${CMAKE_BINARY_DIR}/simple.txt")
-set(PARSER_CPP "${CMAKE_BINARY_DIR}/parser.cpp")
+set(PARSER_GENERATED_CPP "${CMAKE_BINARY_DIR}/parser_generated.cpp")

 set(ZIG_SOURCES
    "${CMAKE_SOURCE_DIR}/src/main.cpp"
    "${CMAKE_SOURCE_DIR}/src/util.cpp"
    "${CMAKE_SOURCE_DIR}/src/buffer.cpp"
    "${CMAKE_SOURCE_DIR}/src/tokenizer.cpp"
-    ${PARSER_CPP}
+    "${CMAKE_SOURCE_DIR}/src/parser.cpp"
+    ${PARSER_GENERATED_CPP}
 )

 set(PARSERGEN_SOURCES
@ -68,8 +69,8 @@ set_target_properties(parsergen PROPERTIES


 add_custom_command(
-    OUTPUT ${PARSER_CPP}
-    COMMAND parsergen ARGS ${GRAMMAR_TXT} ${PARSER_CPP}
+    OUTPUT ${PARSER_GENERATED_CPP}
+    COMMAND parsergen ARGS ${GRAMMAR_TXT} ${PARSER_GENERATED_CPP}
    DEPENDS ${GRAMMAR_TXT} ${PARSERGEN_SOURCES}
    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
 )
--- a/README.md
+++ b/README.md
@ -19,7 +19,7 @@ readable, safe, optimal, and concise code to solve any computing problem.
 * Eliminate the need for C headers (when using zig internally).
 * Ability to declare dependencies as Git URLS with commit locking (can
   provide a tag or sha1).
- * Rust-style enums.
+ * Tagged union enum type.
 * Opinionated when it makes life easier.
   - Tab character in source code is a compile error.
   - Whitespace at the end of line is a compile error.
@ -32,23 +32,29 @@ readable, safe, optimal, and concise code to solve any computing problem.
 * Hello, world.
   - Build AST
   - Code Gen
+   - Produce .o file.
+ * Produce executable file instead of .o file.
+ * Add debugging symbols.
+ * Debug/Release mode.
 * C style comments.
 * Unit tests.
 * Simple .so library
 * How should the Widget use case be solved? In Genesis I'm using C++ and inheritance.

-## Grammar
+### Primitive Numeric Types:

-```
-Root : FnDecl*
-FnDecl : TokenFn TokenSymbol TokenLParen list(ParamDecl, TokenComma, 0) TokenRParen (TokenArrow Type)? Block
-ParamDecl : TokenSymbol TokenColon Type
-Type : TokenSymbol | PointerType
-PointerType : TokenStar (TokenConst | TokenMut) Type
-Block : TokenLBrace Statement* Expression? TokenRBrace
-Statement : ExpressionStatement | ReturnStatement
-ExpressionStatement : Expression TokenSemicolon
-ReturnStatement : TokenReturn Expression TokenSemicolon
-Expression : TokenNumber | TokenString | FnCall
-FnCall : TokenSymbol TokenLParen list(Expression, TokenComma, 0) TokenRParen
-```
+zig    | C equivalent | Description
+-------|--------------|-------------------------------
+    i8 |       int8_t |    signed 8-bit integer
+    u8 |      uint8_t |  unsigned 8-bit integer
+   i16 |      int16_t |   signed 16-bit integer
+   u16 |     uint16_t | unsigned 16-bit integer
+   i32 |      int32_t |   signed 32-bit integer
+   u32 |     uint32_t | unsigned 32-bit integer
+   i64 |      int64_t |   signed 64-bit integer
+   u64 |     uint64_t | unsigned 64-bit integer
+   f32 |        float |  32-bit IEE754 floating point
+   f64 |       double |  64-bit IEE754 floating point
+  f128 |  long double | 128-bit IEE754 floating point
+ isize |      ssize_t |   signed pointer sized integer
+ usize |       size_t | unsigned pointer sized integer
--- a/src/main.cpp
+++ b/src/main.cpp
@ -15,7 +15,6 @@
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
-#include <stdarg.h>
 #include <limits.h>
 #include <stdint.h>
 #include <errno.h>
@ -50,82 +49,6 @@ static Buf *fetch_file(FILE *f) {
    return buf;
 }

-void ast_error(Token *token, const char *format, ...) {
-    int line = token->start_line + 1;
-    int column = token->start_column + 1;
-
-    va_list ap;
-    va_start(ap, format);
-    fprintf(stderr, "Error: Line %d, column %d: ", line, column);
-    vfprintf(stderr, format, ap);
-    fprintf(stderr, "\n");
-    va_end(ap);
-    exit(EXIT_FAILURE);
-}
-
-static const char *node_type_str(NodeType node_type) {
-    switch (node_type) {
-        case NodeTypeRoot:
-            return "Root";
-        case NodeTypeFnDecl:
-            return "FnDecl";
-        case NodeTypeParamDecl:
-            return "ParamDecl";
-        case NodeTypeType:
-            return "Type";
-        case NodeTypePointerType:
-            return "PointerType";
-        case NodeTypeBlock:
-            return "Block";
-        case NodeTypeStatement:
-            return "Statement";
-        case NodeTypeExpressionStatement:
-            return "ExpressionStatement";
-        case NodeTypeReturnStatement:
-            return "ReturnStatement";
-        case NodeTypeExpression:
-            return "Expression";
-        case NodeTypeFnCall:
-            return "FnCall";
-    }
-    zig_panic("unreachable");
-}
-
-static void ast_print(AstNode *node, int indent) {
-    for (int i = 0; i < indent; i += 1) {
-        fprintf(stderr, " ");
-    }
-
-    switch (node->type) {
-        case NodeTypeRoot:
-            fprintf(stderr, "%s\n", node_type_str(node->type));
-            for (int i = 0; i < node->data.root.fn_decls.length; i += 1) {
-                AstNode *child = node->data.root.fn_decls.at(i);
-                ast_print(child, indent + 2);
-            }
-            break;
-        case NodeTypeFnDecl:
-            {
-                Buf *name_buf = &node->data.fn_decl.name;
-                fprintf(stderr, "%s '%s'\n", node_type_str(node->type), buf_ptr(name_buf));
-
-                for (int i = 0; i < node->data.fn_decl.params.length; i += 1) {
-                    AstNode *child = node->data.fn_decl.params.at(i);
-                    ast_print(child, indent + 2);
-                }
-
-                ast_print(node->data.fn_decl.return_type, indent + 2);
-
-                ast_print(node->data.fn_decl.body, indent + 2);
-
-                break;
-            }
-        default:
-            fprintf(stderr, "%s\n", node_type_str(node->type));
-            break;
-    }
-}
-
 char cur_dir[1024];

 int main(int argc, char **argv) {
--- a/src/parser.cpp
+++ b/src/parser.cpp
@ -0,0 +1,80 @@
+#include "parser.hpp"
+
+#include <stdarg.h>
+#include <stdio.h>
+
+void ast_error(Token *token, const char *format, ...) {
+    int line = token->start_line + 1;
+    int column = token->start_column + 1;
+
+    va_list ap;
+    va_start(ap, format);
+    fprintf(stderr, "Error: Line %d, column %d: ", line, column);
+    vfprintf(stderr, format, ap);
+    fprintf(stderr, "\n");
+    va_end(ap);
+    exit(EXIT_FAILURE);
+}
+
+const char *node_type_str(NodeType node_type) {
+    switch (node_type) {
+        case NodeTypeRoot:
+            return "Root";
+        case NodeTypeFnDecl:
+            return "FnDecl";
+        case NodeTypeParamDecl:
+            return "ParamDecl";
+        case NodeTypeType:
+            return "Type";
+        case NodeTypePointerType:
+            return "PointerType";
+        case NodeTypeBlock:
+            return "Block";
+        case NodeTypeStatement:
+            return "Statement";
+        case NodeTypeExpressionStatement:
+            return "ExpressionStatement";
+        case NodeTypeReturnStatement:
+            return "ReturnStatement";
+        case NodeTypeExpression:
+            return "Expression";
+        case NodeTypeFnCall:
+            return "FnCall";
+    }
+    zig_panic("unreachable");
+}
+
+void ast_print(AstNode *node, int indent) {
+    for (int i = 0; i < indent; i += 1) {
+        fprintf(stderr, " ");
+    }
+
+    switch (node->type) {
+        case NodeTypeRoot:
+            fprintf(stderr, "%s\n", node_type_str(node->type));
+            for (int i = 0; i < node->data.root.fn_decls.length; i += 1) {
+                AstNode *child = node->data.root.fn_decls.at(i);
+                ast_print(child, indent + 2);
+            }
+            break;
+        case NodeTypeFnDecl:
+            {
+                Buf *name_buf = &node->data.fn_decl.name;
+                fprintf(stderr, "%s '%s'\n", node_type_str(node->type), buf_ptr(name_buf));
+
+                for (int i = 0; i < node->data.fn_decl.params.length; i += 1) {
+                    AstNode *child = node->data.fn_decl.params.at(i);
+                    ast_print(child, indent + 2);
+                }
+
+                ast_print(node->data.fn_decl.return_type, indent + 2);
+
+                ast_print(node->data.fn_decl.body, indent + 2);
+
+                break;
+            }
+        default:
+            fprintf(stderr, "%s\n", node_type_str(node->type));
+            break;
+    }
+}
--- a/src/parser.hpp
+++ b/src/parser.hpp
@ -82,6 +82,11 @@ struct AstNode {
 __attribute__ ((format (printf, 2, 3)))
 void ast_error(Token *token, const char *format, ...);

+// This function is provided by generated code, generated by parsergen.cpp
 AstNode * ast_parse(Buf *buf, ZigList<Token> *tokens);

+const char *node_type_str(NodeType node_type);
+
+void ast_print(AstNode *node, int indent);
+
 #endif
--- a/src/parsergen.cpp
+++ b/src/parsergen.cpp
@ -190,12 +190,17 @@ struct RuleNode {
 enum ParserStateType {
    ParserStateTypeError,
    ParserStateTypeOk,
+    ParserStateTypeCapture,
 };

 struct ParserStateError {
    Buf *msg;
 };

+struct ParserStateCapture {
+    Buf *body;
+};
+
 struct ParserState {
    ParserStateType type;
    // One for each token ID.
@ -203,6 +208,7 @@ struct ParserState {
    int index;
    union {
        ParserStateError error;
+        ParserStateCapture capture;
    };
 };

@ -278,6 +284,8 @@ static void gen(Gen *g, RuleNode *node) {
                    RuleNode *child = node->tuple.children.at(i);
                    gen(g, child);
                }
+                g->cur_state->type = ParserStateTypeCapture;
+                g->cur_state->capture.body = &node->tuple.body;
            }
            break;
        case RuleNodeTypeMany:
@ -598,7 +606,8 @@ int main(int argc, char **argv) {
    g.cur_state = create_state(&g, ParserStateTypeOk);
    gen(&g, g.root);

-    fprintf(out_f, "/* This file is auto-generated by parsergen.cpp */\n");
+    fprintf(out_f, "/* This file is generated by parsergen.cpp */\n");
+    fprintf(out_f, "\n");
    fprintf(out_f, "#include \"src/parser.hpp\"\n");
    fprintf(out_f, "#include <stdio.h>\n");

@ -616,6 +625,17 @@ int main(int argc, char **argv) {
        fprintf(out_f, "static_assert(TokenId%s == %d, \"wrong token id\");\n",
                buf_ptr(&token->name), token->id);
    }
+    fprintf(out_f, "\n");
+
+    /* TODO
+    fprintf(out_f, "struct ParserGenNode{\n");
+    fprintf(out_f, "    union {\n");
+    fprintf(out_f, "        [%d];\n", biggest_tuple_len);
+    fprintf(out_f, "        Token *token;\n");
+    fprintf(out_f, "    };\n");
+    fprintf(out_f, "};\n");
+    fprintf(out_f, "\n");
+    */

    fprintf(out_f, "AstNode * ast_parse(Buf *buf, ZigList<Token> *tokens) {\n");

@ -644,7 +664,6 @@ int main(int argc, char **argv) {
    for (int i = 0; i < g.transition_table.length; i += 1) {
        ParserState *state = g.transition_table.at(i);
        fprintf(out_f, "            case %d:\n", i);
-        fprintf(out_f, "                fprintf(stderr, \"state = %%d\\n\", state);\n");
        switch (state->type) {
            case ParserStateTypeError:
                fprintf(out_f, "                ast_error(token, \"%s\");\n", buf_ptr(state->error.msg));
@ -655,6 +674,10 @@ int main(int argc, char **argv) {
                        state->index, g.transition_table.length);
                fprintf(out_f, "                state = transition[%d][token->id];\n", state->index);
                break;
+            case ParserStateTypeCapture:
+                // TODO fprintf(out_f, "                %s\n", buf_ptr(state->capture.body));
+                fprintf(out_f, "                state = transition[%d][token->id];\n", state->index);
+                break;
        }
        fprintf(out_f, "                break;\n");
    }
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@ -1,3 +1,10 @@
+/*
+ * Copyright (c) 2015 Andrew Kelley
+ *
+ * This file is part of zig, which is MIT licensed.
+ * See http://opensource.org/licenses/MIT
+ */
+
 #include "tokenizer.hpp"
 #include "util.hpp"

--- a/src/tokenizer.hpp
+++ b/src/tokenizer.hpp
@ -1,3 +1,10 @@
+/*
+ * Copyright (c) 2015 Andrew Kelley
+ *
+ * This file is part of zig, which is MIT licensed.
+ * See http://opensource.org/licenses/MIT
+ */
+
 #ifndef ZIG_TOKENIZER_HPP
 #define ZIG_TOKENIZER_HPP

--- a/test/hello.zig
+++ b/test/hello.zig
@ -1,6 +1,4 @@
-
-
-fn main(argc: isize, argv: *mut u8) -> isize {
+fn main(argc: i32, argv: *mut u8) -> i32 {
    puts("Hello, world!\n");
    return 0;
 }