semantic analysis checks for multiple definitions of functions
parent
a22bc8d20a
commit
3b4a2afb65
|
@ -29,6 +29,7 @@ set(ZIG_SOURCES
|
|||
"${CMAKE_SOURCE_DIR}/src/parser.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/src/tokenizer.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/src/util.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/src/codegen.cpp"
|
||||
)
|
||||
|
||||
set(CONFIGURE_OUT_FILE "${CMAKE_BINARY_DIR}/config.h")
|
||||
|
|
|
@ -149,5 +149,14 @@ static inline Buf *buf_dirname(Buf *buf) {
|
|||
return buf_create_from_mem("", 0);
|
||||
}
|
||||
|
||||
static inline uint32_t buf_hash(Buf *buf) {
|
||||
// FNV 32-bit hash
|
||||
uint32_t h = 2166136261;
|
||||
for (int i = 0; i < buf_len(buf); i += 1) {
|
||||
h = h ^ ((uint8_t)buf->list.at(i));
|
||||
h = h * 16777619;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
#include "codegen.hpp"
|
||||
#include "hash_map.hpp"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
struct CodeGen {
|
||||
AstNode *root;
|
||||
HashMap<Buf *, AstNode *, buf_hash, buf_eql_buf> fn_decls;
|
||||
ZigList<ErrorMsg> errors;
|
||||
};
|
||||
|
||||
CodeGen *create_codegen(AstNode *root) {
|
||||
CodeGen *g = allocate<CodeGen>(1);
|
||||
g->root = root;
|
||||
g->fn_decls.init(32);
|
||||
return g;
|
||||
}
|
||||
|
||||
static void add_node_error(CodeGen *g, AstNode *node, Buf *msg) {
|
||||
g->errors.add_one();
|
||||
ErrorMsg *last_msg = &g->errors.last();
|
||||
last_msg->line_start = node->line;
|
||||
last_msg->column_start = node->column;
|
||||
last_msg->line_end = -1;
|
||||
last_msg->column_end = -1;
|
||||
last_msg->msg = msg;
|
||||
}
|
||||
|
||||
static void analyze_node(CodeGen *g, AstNode *node) {
|
||||
switch (node->type) {
|
||||
case NodeTypeRoot:
|
||||
for (int i = 0; i < node->data.root.fn_decls.length; i += 1) {
|
||||
AstNode *child = node->data.root.fn_decls.at(i);
|
||||
analyze_node(g, child);
|
||||
}
|
||||
break;
|
||||
case NodeTypeFnDecl:
|
||||
{
|
||||
auto entry = g->fn_decls.maybe_get(&node->data.fn_decl.name);
|
||||
if (entry) {
|
||||
add_node_error(g, node,
|
||||
buf_sprintf("redefinition of '%s'", buf_ptr(&node->data.fn_decl.name)));
|
||||
} else {
|
||||
g->fn_decls.put(&node->data.fn_decl.name, node);
|
||||
for (int i = 0; i < node->data.fn_decl.params.length; i += 1) {
|
||||
AstNode *child = node->data.fn_decl.params.at(i);
|
||||
analyze_node(g, child);
|
||||
}
|
||||
analyze_node(g, node->data.fn_decl.return_type);
|
||||
analyze_node(g, node->data.fn_decl.body);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case NodeTypeParamDecl:
|
||||
analyze_node(g, node->data.param_decl.type);
|
||||
break;
|
||||
case NodeTypeType:
|
||||
break;
|
||||
case NodeTypePointerType:
|
||||
break;
|
||||
case NodeTypeBlock:
|
||||
break;
|
||||
case NodeTypeStatement:
|
||||
break;
|
||||
case NodeTypeExpressionStatement:
|
||||
break;
|
||||
case NodeTypeReturnStatement:
|
||||
break;
|
||||
case NodeTypeExpression:
|
||||
break;
|
||||
case NodeTypeFnCall:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void semantic_analyze(CodeGen *g) {
|
||||
// Pass 1.
|
||||
analyze_node(g, g->root);
|
||||
}
|
||||
|
||||
void code_gen(CodeGen *g) {
|
||||
|
||||
}
|
||||
|
||||
ZigList<ErrorMsg> *codegen_error_messages(CodeGen *g) {
|
||||
return &g->errors;
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
#ifndef ZIG_CODEGEN_HPP
|
||||
#define ZIG_CODEGEN_HPP
|
||||
|
||||
#include "parser.hpp"
|
||||
|
||||
struct CodeGen;
|
||||
|
||||
struct ErrorMsg {
|
||||
int line_start;
|
||||
int column_start;
|
||||
int line_end;
|
||||
int column_end;
|
||||
Buf *msg;
|
||||
};
|
||||
|
||||
|
||||
CodeGen *create_codegen(AstNode *root);
|
||||
|
||||
void semantic_analyze(CodeGen *g);
|
||||
|
||||
void code_gen(CodeGen *g);
|
||||
|
||||
ZigList<ErrorMsg> *codegen_error_messages(CodeGen *g);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,220 @@
|
|||
#ifndef ZIG_HASH_MAP_HPP
|
||||
#define ZIG_HASH_MAP_HPP
|
||||
|
||||
#include "util.hpp"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
template<typename K, typename V, uint32_t (*HashFunction)(K key), bool (*EqualFn)(K a, K b)>
|
||||
class HashMap {
|
||||
public:
|
||||
void init(int capacity) {
|
||||
init_capacity(capacity);
|
||||
}
|
||||
void deinit(void) {
|
||||
free(_entries);
|
||||
}
|
||||
|
||||
struct Entry {
|
||||
bool used;
|
||||
int distance_from_start_index;
|
||||
K key;
|
||||
V value;
|
||||
};
|
||||
|
||||
void clear() {
|
||||
for (int i = 0; i < _capacity; i += 1) {
|
||||
_entries[i].used = false;
|
||||
}
|
||||
_size = 0;
|
||||
_max_distance_from_start_index = 0;
|
||||
_modification_count += 1;
|
||||
}
|
||||
|
||||
int size() const {
|
||||
return _size;
|
||||
}
|
||||
|
||||
void put(const K &key, const V &value) {
|
||||
_modification_count += 1;
|
||||
internal_put(key, value);
|
||||
|
||||
// if we get too full (80%), double the capacity
|
||||
if (_size * 5 >= _capacity * 4) {
|
||||
Entry *old_entries = _entries;
|
||||
int old_capacity = _capacity;
|
||||
init_capacity(_capacity * 2);
|
||||
// dump all of the old elements into the new table
|
||||
for (int i = 0; i < old_capacity; i += 1) {
|
||||
Entry *old_entry = &old_entries[i];
|
||||
if (old_entry->used)
|
||||
internal_put(old_entry->key, old_entry->value);
|
||||
}
|
||||
free(old_entries);
|
||||
}
|
||||
}
|
||||
|
||||
const V &get(const K &key) const {
|
||||
Entry *entry = internal_get(key);
|
||||
if (!entry)
|
||||
zig_panic("key not found");
|
||||
return entry->value;
|
||||
}
|
||||
|
||||
Entry *maybe_get(const K &key) const {
|
||||
return internal_get(key);
|
||||
}
|
||||
|
||||
void remove(const K &key) {
|
||||
_modification_count += 1;
|
||||
int start_index = key_to_index(key);
|
||||
for (int roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) {
|
||||
int index = (start_index + roll_over) % _capacity;
|
||||
Entry *entry = &_entries[index];
|
||||
|
||||
if (!entry->used)
|
||||
zig_panic("key not found");
|
||||
|
||||
if (!EqualFn(entry->key, key))
|
||||
continue;
|
||||
|
||||
for (; roll_over < _capacity; roll_over += 1) {
|
||||
int next_index = (start_index + roll_over + 1) % _capacity;
|
||||
Entry *next_entry = &_entries[next_index];
|
||||
if (!next_entry->used || next_entry->distance_from_start_index == 0) {
|
||||
entry->used = false;
|
||||
_size -= 1;
|
||||
return;
|
||||
}
|
||||
*entry = *next_entry;
|
||||
entry->distance_from_start_index -= 1;
|
||||
entry = next_entry;
|
||||
}
|
||||
zig_panic("shifting everything in the table");
|
||||
}
|
||||
zig_panic("key not found");
|
||||
}
|
||||
|
||||
class Iterator {
|
||||
public:
|
||||
Entry *next() {
|
||||
if (_inital_modification_count != _table->_modification_count)
|
||||
zig_panic("concurrent modification");
|
||||
if (_count >= _table->size())
|
||||
return NULL;
|
||||
for (; _index < _table->_capacity; _index += 1) {
|
||||
Entry *entry = &_table->_entries[_index];
|
||||
if (entry->used) {
|
||||
_index += 1;
|
||||
_count += 1;
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
zig_panic("no next item");
|
||||
}
|
||||
private:
|
||||
const HashMap * _table;
|
||||
// how many items have we returned
|
||||
int _count = 0;
|
||||
// iterator through the entry array
|
||||
int _index = 0;
|
||||
// used to detect concurrent modification
|
||||
uint32_t _inital_modification_count;
|
||||
Iterator(const HashMap * table) :
|
||||
_table(table), _inital_modification_count(table->_modification_count) {
|
||||
}
|
||||
friend HashMap;
|
||||
};
|
||||
|
||||
// you must not modify the underlying HashMap while this iterator is still in use
|
||||
Iterator entry_iterator() const {
|
||||
return Iterator(this);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
Entry *_entries;
|
||||
int _capacity;
|
||||
int _size;
|
||||
int _max_distance_from_start_index;
|
||||
// this is used to detect bugs where a hashtable is edited while an iterator is running.
|
||||
uint32_t _modification_count = 0;
|
||||
|
||||
void init_capacity(int capacity) {
|
||||
_capacity = capacity;
|
||||
_entries = allocate<Entry>(_capacity);
|
||||
_size = 0;
|
||||
_max_distance_from_start_index = 0;
|
||||
for (int i = 0; i < _capacity; i += 1) {
|
||||
_entries[i].used = false;
|
||||
}
|
||||
}
|
||||
|
||||
void internal_put(K key, V value) {
|
||||
int start_index = key_to_index(key);
|
||||
for (int roll_over = 0, distance_from_start_index = 0;
|
||||
roll_over < _capacity; roll_over += 1, distance_from_start_index += 1)
|
||||
{
|
||||
int index = (start_index + roll_over) % _capacity;
|
||||
Entry *entry = &_entries[index];
|
||||
|
||||
if (entry->used && !EqualFn(entry->key, key)) {
|
||||
if (entry->distance_from_start_index < distance_from_start_index) {
|
||||
// robin hood to the rescue
|
||||
Entry tmp = *entry;
|
||||
if (distance_from_start_index > _max_distance_from_start_index)
|
||||
_max_distance_from_start_index = distance_from_start_index;
|
||||
*entry = {
|
||||
true,
|
||||
distance_from_start_index,
|
||||
key,
|
||||
value,
|
||||
};
|
||||
key = tmp.key;
|
||||
value = tmp.value;
|
||||
distance_from_start_index = tmp.distance_from_start_index;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!entry->used) {
|
||||
// adding an entry. otherwise overwriting old value with
|
||||
// same key
|
||||
_size += 1;
|
||||
}
|
||||
|
||||
if (distance_from_start_index > _max_distance_from_start_index)
|
||||
_max_distance_from_start_index = distance_from_start_index;
|
||||
*entry = {
|
||||
true,
|
||||
distance_from_start_index,
|
||||
key,
|
||||
value,
|
||||
};
|
||||
return;
|
||||
}
|
||||
zig_panic("put into a full HashMap");
|
||||
}
|
||||
|
||||
|
||||
Entry *internal_get(const K &key) const {
|
||||
int start_index = key_to_index(key);
|
||||
for (int roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) {
|
||||
int index = (start_index + roll_over) % _capacity;
|
||||
Entry *entry = &_entries[index];
|
||||
|
||||
if (!entry->used)
|
||||
return NULL;
|
||||
|
||||
if (EqualFn(entry->key, key))
|
||||
return entry;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int key_to_index(const K &key) const {
|
||||
return (int)(HashFunction(key) % ((uint32_t)_capacity));
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
30
src/main.cpp
30
src/main.cpp
|
@ -12,6 +12,7 @@
|
|||
#include "parser.hpp"
|
||||
#include "tokenizer.hpp"
|
||||
#include "error.hpp"
|
||||
#include "codegen.hpp"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
@ -75,24 +76,41 @@ static int build(const char *arg0, const char *in_file, const char *out_file, Zi
|
|||
cur_dir_path = buf_dirname(buf_create_from_str(in_file));
|
||||
}
|
||||
|
||||
Buf *in_data = fetch_file(in_f);
|
||||
|
||||
fprintf(stderr, "Original source:\n");
|
||||
fprintf(stderr, "----------------\n");
|
||||
Buf *in_data = fetch_file(in_f);
|
||||
fprintf(stderr, "%s\n", buf_ptr(in_data));
|
||||
|
||||
ZigList<Token> *tokens = tokenize(in_data, cur_dir_path);
|
||||
|
||||
fprintf(stderr, "\nTokens:\n");
|
||||
fprintf(stderr, "---------\n");
|
||||
ZigList<Token> *tokens = tokenize(in_data, cur_dir_path);
|
||||
print_tokens(in_data, tokens);
|
||||
|
||||
AstNode *root = ast_parse(in_data, tokens);
|
||||
assert(root);
|
||||
fprintf(stderr, "\nAST:\n");
|
||||
fprintf(stderr, "------\n");
|
||||
AstNode *root = ast_parse(in_data, tokens);
|
||||
assert(root);
|
||||
ast_print(root, 0);
|
||||
|
||||
fprintf(stderr, "\nSemantic Analysis:\n");
|
||||
fprintf(stderr, "--------------------\n");
|
||||
CodeGen *codegen = create_codegen(root);
|
||||
semantic_analyze(codegen);
|
||||
ZigList<ErrorMsg> *errors = codegen_error_messages(codegen);
|
||||
if (errors->length == 0) {
|
||||
fprintf(stderr, "OK\n");
|
||||
} else {
|
||||
for (int i = 0; i < errors->length; i += 1) {
|
||||
ErrorMsg *err = &errors->at(i);
|
||||
fprintf(stderr, "Error: Line %d, column %d: %s\n", err->line_start, err->column_start,
|
||||
buf_ptr(err->msg));
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
fprintf(stderr, "\nCode Generation:\n");
|
||||
fprintf(stderr, "------------------\n");
|
||||
code_gen(codegen);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -133,9 +133,11 @@ struct ParseContext {
|
|||
ZigList<Token> *tokens;
|
||||
};
|
||||
|
||||
static AstNode *ast_create_node(NodeType type) {
|
||||
static AstNode *ast_create_node(NodeType type, Token *first_token) {
|
||||
AstNode *node = allocate<AstNode>(1);
|
||||
node->type = type;
|
||||
node->line = first_token->start_line;
|
||||
node->column = first_token->start_column;
|
||||
return node;
|
||||
}
|
||||
|
||||
|
@ -163,11 +165,11 @@ Type : token(Symbol) | PointerType;
|
|||
PointerType : token(Star) token(Const) Type | token(Star) token(Mut) Type;
|
||||
*/
|
||||
static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token_index) {
|
||||
AstNode *node = ast_create_node(NodeTypeType);
|
||||
|
||||
Token *token = &pc->tokens->at(token_index);
|
||||
token_index += 1;
|
||||
|
||||
AstNode *node = ast_create_node(NodeTypeType, token);
|
||||
|
||||
if (token->id == TokenIdSymbol) {
|
||||
node->data.type.type = AstNodeTypeTypePrimitive;
|
||||
ast_buf_from_token(pc, token, &node->data.type.primitive_name);
|
||||
|
@ -197,12 +199,13 @@ ParamDecl<node> : token(Symbol) token(Colon) Type {
|
|||
};
|
||||
*/
|
||||
static AstNode *ast_parse_param_decl(ParseContext *pc, int token_index, int *new_token_index) {
|
||||
AstNode *node = ast_create_node(NodeTypeParamDecl);
|
||||
|
||||
Token *param_name = &pc->tokens->at(token_index);
|
||||
token_index += 1;
|
||||
ast_expect_token(pc, param_name, TokenIdSymbol);
|
||||
|
||||
AstNode *node = ast_create_node(NodeTypeParamDecl, param_name);
|
||||
|
||||
|
||||
ast_buf_from_token(pc, param_name, &node->data.param_decl.name);
|
||||
|
||||
Token *colon = &pc->tokens->at(token_index);
|
||||
|
@ -280,12 +283,13 @@ static void ast_parse_fn_call_param_list(ParseContext *pc, int token_index, int
|
|||
FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ;
|
||||
*/
|
||||
static AstNode *ast_parse_fn_call(ParseContext *pc, int token_index, int *new_token_index) {
|
||||
AstNode *node = ast_create_node(NodeTypeFnCall);
|
||||
|
||||
Token *fn_name = &pc->tokens->at(token_index);
|
||||
token_index += 1;
|
||||
ast_expect_token(pc, fn_name, TokenIdSymbol);
|
||||
|
||||
AstNode *node = ast_create_node(NodeTypeFnCall, fn_name);
|
||||
|
||||
|
||||
ast_buf_from_token(pc, fn_name, &node->data.fn_call.name);
|
||||
|
||||
ast_parse_fn_call_param_list(pc, token_index, &token_index, &node->data.fn_call.params);
|
||||
|
@ -295,9 +299,8 @@ static AstNode *ast_parse_fn_call(ParseContext *pc, int token_index, int *new_to
|
|||
}
|
||||
|
||||
static AstNode *ast_parse_expression(ParseContext *pc, int token_index, int *new_token_index) {
|
||||
AstNode *node = ast_create_node(NodeTypeExpression);
|
||||
|
||||
Token *token = &pc->tokens->at(token_index);
|
||||
AstNode *node = ast_create_node(NodeTypeExpression, token);
|
||||
if (token->id == TokenIdSymbol) {
|
||||
node->data.expression.type = AstNodeExpressionTypeFnCall;
|
||||
node->data.expression.data.fn_call = ast_parse_fn_call(pc, token_index, &token_index);
|
||||
|
@ -329,9 +332,9 @@ Expression : token(Number) | token(String) | FnCall ;
|
|||
FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ;
|
||||
*/
|
||||
static AstNode *ast_parse_statement(ParseContext *pc, int token_index, int *new_token_index) {
|
||||
AstNode *node = ast_create_node(NodeTypeStatement);
|
||||
|
||||
Token *token = &pc->tokens->at(token_index);
|
||||
AstNode *node = ast_create_node(NodeTypeStatement, token);
|
||||
|
||||
if (token->id == TokenIdKeywordReturn) {
|
||||
token_index += 1;
|
||||
node->data.statement.type = AstNodeStatementTypeReturn;
|
||||
|
@ -362,12 +365,13 @@ static AstNode *ast_parse_statement(ParseContext *pc, int token_index, int *new_
|
|||
Block : token(LBrace) many(Statement) token(RBrace);
|
||||
*/
|
||||
static AstNode *ast_parse_block(ParseContext *pc, int token_index, int *new_token_index) {
|
||||
AstNode *node = ast_create_node(NodeTypeBlock);
|
||||
|
||||
Token *l_brace = &pc->tokens->at(token_index);
|
||||
token_index += 1;
|
||||
ast_expect_token(pc, l_brace, TokenIdLBrace);
|
||||
|
||||
AstNode *node = ast_create_node(NodeTypeBlock, l_brace);
|
||||
|
||||
|
||||
for (;;) {
|
||||
Token *token = &pc->tokens->at(token_index);
|
||||
if (token->id == TokenIdRBrace) {
|
||||
|
@ -386,12 +390,13 @@ static AstNode *ast_parse_block(ParseContext *pc, int token_index, int *new_toke
|
|||
FnDecl : token(Fn) token(Symbol) ParamDeclList option(token(Arrow) Type) Block;
|
||||
*/
|
||||
static AstNode *ast_parse_fn_decl(ParseContext *pc, int token_index, int *new_token_index) {
|
||||
AstNode *node = ast_create_node(NodeTypeFnDecl);
|
||||
|
||||
Token *fn_token = &pc->tokens->at(token_index);
|
||||
token_index += 1;
|
||||
ast_expect_token(pc, fn_token, TokenIdKeywordFn);
|
||||
|
||||
AstNode *node = ast_create_node(NodeTypeFnDecl, fn_token);
|
||||
|
||||
|
||||
Token *fn_name = &pc->tokens->at(token_index);
|
||||
token_index += 1;
|
||||
ast_expect_token(pc, fn_name, TokenIdSymbol);
|
||||
|
@ -437,7 +442,7 @@ static void ast_parse_fn_decl_list(ParseContext *pc, int token_index, ZigList<As
|
|||
AstNode *ast_parse(Buf *buf, ZigList<Token> *tokens) {
|
||||
ParseContext pc = {0};
|
||||
pc.buf = buf;
|
||||
pc.root = ast_create_node(NodeTypeRoot);
|
||||
pc.root = ast_create_node(NodeTypeRoot, &tokens->at(0));
|
||||
pc.tokens = tokens;
|
||||
|
||||
int new_token_index;
|
||||
|
|
|
@ -97,6 +97,8 @@ struct AstNodeFnCall {
|
|||
struct AstNode {
|
||||
enum NodeType type;
|
||||
AstNode *parent;
|
||||
int line;
|
||||
int column;
|
||||
union {
|
||||
AstNodeRoot root;
|
||||
AstNodeFnDecl fn_decl;
|
||||
|
|
Loading…
Reference in New Issue