factor analysis code out of codegen

master
Josh Wolfe 2015-11-30 09:14:58 -07:00
parent 020f854f6f
commit 9e0ff6faa2
9 changed files with 643 additions and 595 deletions

View File

@ -22,6 +22,7 @@ include_directories(
)
set(ZIG_SOURCES
"${CMAKE_SOURCE_DIR}/src/analyze.cpp"
"${CMAKE_SOURCE_DIR}/src/buffer.cpp"
"${CMAKE_SOURCE_DIR}/src/error.cpp"
"${CMAKE_SOURCE_DIR}/src/main.cpp"

498
src/analyze.cpp Normal file
View File

@ -0,0 +1,498 @@
/*
* Copyright (c) 2015 Andrew Kelley
*
* This file is part of zig, which is MIT licensed.
* See http://opensource.org/licenses/MIT
*/
#include "analyze.hpp"
#include "semantic_info.hpp"
#include "error.hpp"
#include "zig_llvm.hpp"
static void add_node_error(CodeGen *g, AstNode *node, Buf *msg) {
g->errors.add_one();
ErrorMsg *last_msg = &g->errors.last();
last_msg->line_start = node->line;
last_msg->column_start = node->column;
last_msg->line_end = -1;
last_msg->column_end = -1;
last_msg->msg = msg;
}
static int parse_version_string(Buf *buf, int *major, int *minor, int *patch) {
char *dot1 = strstr(buf_ptr(buf), ".");
if (!dot1)
return ErrorInvalidFormat;
char *dot2 = strstr(dot1 + 1, ".");
if (!dot2)
return ErrorInvalidFormat;
*major = (int)strtol(buf_ptr(buf), nullptr, 10);
*minor = (int)strtol(dot1 + 1, nullptr, 10);
*patch = (int)strtol(dot2 + 1, nullptr, 10);
return ErrorNone;
}
static void set_root_export_version(CodeGen *g, Buf *version_buf, AstNode *node) {
int err;
if ((err = parse_version_string(version_buf, &g->version_major, &g->version_minor, &g->version_patch))) {
add_node_error(g, node,
buf_sprintf("invalid version string"));
}
}
static void find_declarations(CodeGen *g, AstNode *node);
static void resolve_type_and_recurse(CodeGen *g, AstNode *node) {
assert(!node->codegen_node);
node->codegen_node = allocate<CodeGenNode>(1);
TypeNode *type_node = &node->codegen_node->data.type_node;
switch (node->data.type.type) {
case AstNodeTypeTypePrimitive:
{
Buf *name = &node->data.type.primitive_name;
auto table_entry = g->type_table.maybe_get(name);
if (table_entry) {
type_node->entry = table_entry->value;
} else {
add_node_error(g, node,
buf_sprintf("invalid type name: '%s'", buf_ptr(name)));
type_node->entry = g->invalid_type_entry;
}
break;
}
case AstNodeTypeTypePointer:
{
find_declarations(g, node->data.type.child_type);
TypeNode *child_type_node = &node->data.type.child_type->codegen_node->data.type_node;
if (child_type_node->entry->id == TypeIdUnreachable) {
add_node_error(g, node,
buf_create_from_str("pointer to unreachable not allowed"));
}
TypeTableEntry **parent_pointer = node->data.type.is_const ?
&child_type_node->entry->pointer_const_parent :
&child_type_node->entry->pointer_mut_parent;
const char *const_or_mut_str = node->data.type.is_const ? "const" : "mut";
if (*parent_pointer) {
type_node->entry = *parent_pointer;
} else {
TypeTableEntry *entry = allocate<TypeTableEntry>(1);
entry->id = TypeIdPointer;
entry->type_ref = LLVMPointerType(child_type_node->entry->type_ref, 0);
buf_resize(&entry->name, 0);
buf_appendf(&entry->name, "*%s %s", const_or_mut_str, buf_ptr(&child_type_node->entry->name));
entry->di_type = g->dbuilder->createPointerType(child_type_node->entry->di_type,
g->pointer_size_bytes * 8, g->pointer_size_bytes * 8, buf_ptr(&entry->name));
g->type_table.put(&entry->name, entry);
type_node->entry = entry;
*parent_pointer = entry;
}
break;
}
}
}
static void find_declarations(CodeGen *g, AstNode *node) {
switch (node->type) {
case NodeTypeExternBlock:
for (int i = 0; i < node->data.extern_block.directives->length; i += 1) {
AstNode *directive_node = node->data.extern_block.directives->at(i);
Buf *name = &directive_node->data.directive.name;
Buf *param = &directive_node->data.directive.param;
if (buf_eql_str(name, "link")) {
g->link_table.put(param, true);
} else {
add_node_error(g, directive_node,
buf_sprintf("invalid directive: '%s'", buf_ptr(name)));
}
}
for (int fn_decl_i = 0; fn_decl_i < node->data.extern_block.fn_decls.length; fn_decl_i += 1) {
AstNode *fn_decl = node->data.extern_block.fn_decls.at(fn_decl_i);
assert(fn_decl->type == NodeTypeFnDecl);
AstNode *fn_proto = fn_decl->data.fn_decl.fn_proto;
find_declarations(g, fn_proto);
Buf *name = &fn_proto->data.fn_proto.name;
FnTableEntry *fn_table_entry = allocate<FnTableEntry>(1);
fn_table_entry->proto_node = fn_proto;
fn_table_entry->is_extern = true;
fn_table_entry->calling_convention = LLVMCCallConv;
g->fn_table.put(name, fn_table_entry);
}
break;
case NodeTypeFnDef:
{
AstNode *proto_node = node->data.fn_def.fn_proto;
assert(proto_node->type == NodeTypeFnProto);
Buf *proto_name = &proto_node->data.fn_proto.name;
auto entry = g->fn_table.maybe_get(proto_name);
if (entry) {
add_node_error(g, node,
buf_sprintf("redefinition of '%s'", buf_ptr(proto_name)));
assert(!node->codegen_node);
node->codegen_node = allocate<CodeGenNode>(1);
node->codegen_node->data.fn_def_node.skip = true;
} else {
FnTableEntry *fn_table_entry = allocate<FnTableEntry>(1);
fn_table_entry->proto_node = proto_node;
fn_table_entry->fn_def_node = node;
fn_table_entry->internal_linkage = proto_node->data.fn_proto.visib_mod != FnProtoVisibModExport;
if (fn_table_entry->internal_linkage) {
fn_table_entry->calling_convention = LLVMFastCallConv;
} else {
fn_table_entry->calling_convention = LLVMCCallConv;
}
g->fn_table.put(proto_name, fn_table_entry);
g->fn_defs.append(fn_table_entry);
find_declarations(g, proto_node);
}
break;
}
case NodeTypeFnProto:
{
for (int i = 0; i < node->data.fn_proto.directives->length; i += 1) {
AstNode *directive_node = node->data.fn_proto.directives->at(i);
Buf *name = &directive_node->data.directive.name;
add_node_error(g, directive_node,
buf_sprintf("invalid directive: '%s'", buf_ptr(name)));
}
for (int i = 0; i < node->data.fn_proto.params.length; i += 1) {
AstNode *child = node->data.fn_proto.params.at(i);
find_declarations(g, child);
}
find_declarations(g, node->data.fn_proto.return_type);
break;
}
break;
case NodeTypeParamDecl:
find_declarations(g, node->data.param_decl.type);
break;
case NodeTypeType:
resolve_type_and_recurse(g, node);
break;
case NodeTypeDirective:
// we handled directives in the parent function
break;
case NodeTypeRootExportDecl:
for (int i = 0; i < node->data.root_export_decl.directives->length; i += 1) {
AstNode *directive_node = node->data.root_export_decl.directives->at(i);
Buf *name = &directive_node->data.directive.name;
Buf *param = &directive_node->data.directive.param;
if (buf_eql_str(name, "version")) {
set_root_export_version(g, param, directive_node);
} else {
add_node_error(g, directive_node,
buf_sprintf("invalid directive: '%s'", buf_ptr(name)));
}
}
break;
case NodeTypeFnDecl:
case NodeTypeReturnExpr:
case NodeTypeRoot:
case NodeTypeBlock:
case NodeTypeBinOpExpr:
case NodeTypeFnCallExpr:
case NodeTypeNumberLiteral:
case NodeTypeStringLiteral:
case NodeTypeUnreachable:
case NodeTypeSymbol:
case NodeTypeCastExpr:
case NodeTypePrefixOpExpr:
zig_unreachable();
}
}
static void check_fn_def_control_flow(CodeGen *g, AstNode *node) {
// Follow the execution flow and make sure the code returns appropriately.
// * A `return` statement in an unreachable type function should be an error.
// * Control flow should not be able to reach the end of an unreachable type function.
// * Functions that have a type other than void should not return without a value.
// * void functions without explicit return statements at the end need the
// add_implicit_return flag set on the codegen node.
assert(node->type == NodeTypeFnDef);
AstNode *proto_node = node->data.fn_def.fn_proto;
assert(proto_node->type == NodeTypeFnProto);
AstNode *return_type_node = proto_node->data.fn_proto.return_type;
assert(return_type_node->type == NodeTypeType);
node->codegen_node = allocate<CodeGenNode>(1);
FnDefNode *codegen_fn_def = &node->codegen_node->data.fn_def_node;
assert(return_type_node->codegen_node);
TypeTableEntry *type_entry = return_type_node->codegen_node->data.type_node.entry;
assert(type_entry);
TypeId type_id = type_entry->id;
AstNode *body_node = node->data.fn_def.body;
assert(body_node->type == NodeTypeBlock);
// TODO once we understand types, do this pass after type checking, and
// if an expression has an unreachable value then stop looking at statements after
// it. then we can remove the check to `unreachable` in the end of this function.
bool prev_statement_return = false;
for (int i = 0; i < body_node->data.block.statements.length; i += 1) {
AstNode *statement_node = body_node->data.block.statements.at(i);
if (statement_node->type == NodeTypeReturnExpr) {
if (type_id == TypeIdUnreachable) {
add_node_error(g, statement_node,
buf_sprintf("return statement in function with unreachable return type"));
return;
} else {
prev_statement_return = true;
}
} else if (prev_statement_return) {
add_node_error(g, statement_node,
buf_sprintf("unreachable code"));
}
}
if (!prev_statement_return) {
if (type_id == TypeIdVoid) {
codegen_fn_def->add_implicit_return = true;
} else if (type_id != TypeIdUnreachable) {
add_node_error(g, node,
buf_sprintf("control reaches end of non-void function"));
}
}
}
static void analyze_node(CodeGen *g, AstNode *node) {
switch (node->type) {
case NodeTypeRoot:
{
// Iterate once over the top level declarations to build the function table
for (int i = 0; i < node->data.root.top_level_decls.length; i += 1) {
AstNode *child = node->data.root.top_level_decls.at(i);
find_declarations(g, child);
}
for (int i = 0; i < node->data.root.top_level_decls.length; i += 1) {
AstNode *child = node->data.root.top_level_decls.at(i);
analyze_node(g, child);
}
if (!g->out_name) {
add_node_error(g, node,
buf_sprintf("missing export declaration and output name not provided"));
} else if (g->out_type == OutTypeUnknown) {
add_node_error(g, node,
buf_sprintf("missing export declaration and export type not provided"));
}
break;
}
case NodeTypeRootExportDecl:
if (g->root_export_decl) {
add_node_error(g, node,
buf_sprintf("only one root export declaration allowed"));
} else {
g->root_export_decl = node;
if (!g->out_name)
g->out_name = &node->data.root_export_decl.name;
Buf *out_type = &node->data.root_export_decl.type;
OutType export_out_type;
if (buf_eql_str(out_type, "executable")) {
export_out_type = OutTypeExe;
} else if (buf_eql_str(out_type, "library")) {
export_out_type = OutTypeLib;
} else if (buf_eql_str(out_type, "object")) {
export_out_type = OutTypeObj;
} else {
add_node_error(g, node,
buf_sprintf("invalid export type: '%s'", buf_ptr(out_type)));
}
if (g->out_type == OutTypeUnknown)
g->out_type = export_out_type;
}
break;
case NodeTypeExternBlock:
for (int fn_decl_i = 0; fn_decl_i < node->data.extern_block.fn_decls.length; fn_decl_i += 1) {
AstNode *fn_decl = node->data.extern_block.fn_decls.at(fn_decl_i);
analyze_node(g, fn_decl);
}
break;
case NodeTypeFnDef:
{
if (node->codegen_node && node->codegen_node->data.fn_def_node.skip) {
// we detected an error with this function definition which prevents us
// from further analyzing it.
break;
}
AstNode *proto_node = node->data.fn_def.fn_proto;
assert(proto_node->type == NodeTypeFnProto);
analyze_node(g, proto_node);
check_fn_def_control_flow(g, node);
analyze_node(g, node->data.fn_def.body);
break;
}
case NodeTypeFnDecl:
{
AstNode *proto_node = node->data.fn_decl.fn_proto;
assert(proto_node->type == NodeTypeFnProto);
analyze_node(g, proto_node);
break;
}
case NodeTypeFnProto:
{
for (int i = 0; i < node->data.fn_proto.params.length; i += 1) {
AstNode *child = node->data.fn_proto.params.at(i);
analyze_node(g, child);
}
analyze_node(g, node->data.fn_proto.return_type);
break;
}
case NodeTypeParamDecl:
analyze_node(g, node->data.param_decl.type);
break;
case NodeTypeType:
// ignore; we handled types with find_declarations
break;
case NodeTypeBlock:
for (int i = 0; i < node->data.block.statements.length; i += 1) {
AstNode *child = node->data.block.statements.at(i);
analyze_node(g, child);
}
break;
case NodeTypeReturnExpr:
if (node->data.return_expr.expr) {
analyze_node(g, node->data.return_expr.expr);
}
break;
case NodeTypeBinOpExpr:
analyze_node(g, node->data.bin_op_expr.op1);
analyze_node(g, node->data.bin_op_expr.op2);
break;
case NodeTypeFnCallExpr:
{
Buf *name = hack_get_fn_call_name(g, node->data.fn_call_expr.fn_ref_expr);
auto entry = g->fn_table.maybe_get(name);
if (!entry) {
add_node_error(g, node,
buf_sprintf("undefined function: '%s'", buf_ptr(name)));
} else {
FnTableEntry *fn_table_entry = entry->value;
assert(fn_table_entry->proto_node->type == NodeTypeFnProto);
int expected_param_count = fn_table_entry->proto_node->data.fn_proto.params.length;
int actual_param_count = node->data.fn_call_expr.params.length;
if (expected_param_count != actual_param_count) {
add_node_error(g, node,
buf_sprintf("wrong number of arguments. Expected %d, got %d.",
expected_param_count, actual_param_count));
}
}
for (int i = 0; i < node->data.fn_call_expr.params.length; i += 1) {
AstNode *child = node->data.fn_call_expr.params.at(i);
analyze_node(g, child);
}
break;
}
case NodeTypeDirective:
// we looked at directives in the parent node
break;
case NodeTypeCastExpr:
zig_panic("TODO");
break;
case NodeTypePrefixOpExpr:
zig_panic("TODO");
break;
case NodeTypeNumberLiteral:
case NodeTypeStringLiteral:
case NodeTypeUnreachable:
case NodeTypeSymbol:
// nothing to do
break;
}
}
static void add_types(CodeGen *g) {
{
TypeTableEntry *entry = allocate<TypeTableEntry>(1);
entry->id = TypeIdU8;
entry->type_ref = LLVMInt8Type();
buf_init_from_str(&entry->name, "u8");
entry->di_type = g->dbuilder->createBasicType(buf_ptr(&entry->name), 8, 8, llvm::dwarf::DW_ATE_unsigned);
g->type_table.put(&entry->name, entry);
}
{
TypeTableEntry *entry = allocate<TypeTableEntry>(1);
entry->id = TypeIdI32;
entry->type_ref = LLVMInt32Type();
buf_init_from_str(&entry->name, "i32");
entry->di_type = g->dbuilder->createBasicType(buf_ptr(&entry->name), 32, 32,
llvm::dwarf::DW_ATE_signed);
g->type_table.put(&entry->name, entry);
}
{
TypeTableEntry *entry = allocate<TypeTableEntry>(1);
entry->id = TypeIdVoid;
entry->type_ref = LLVMVoidType();
buf_init_from_str(&entry->name, "void");
entry->di_type = g->dbuilder->createBasicType(buf_ptr(&entry->name), 0, 0,
llvm::dwarf::DW_ATE_unsigned);
g->type_table.put(&entry->name, entry);
// invalid types are void
g->invalid_type_entry = entry;
}
{
TypeTableEntry *entry = allocate<TypeTableEntry>(1);
entry->id = TypeIdUnreachable;
entry->type_ref = LLVMVoidType();
buf_init_from_str(&entry->name, "unreachable");
entry->di_type = g->invalid_type_entry->di_type;
g->type_table.put(&entry->name, entry);
}
}
void semantic_analyze(CodeGen *g) {
LLVMInitializeAllTargets();
LLVMInitializeAllTargetMCs();
LLVMInitializeAllAsmPrinters();
LLVMInitializeAllAsmParsers();
LLVMInitializeNativeTarget();
g->is_native_target = true;
char *native_triple = LLVMGetDefaultTargetTriple();
LLVMTargetRef target_ref;
char *err_msg = nullptr;
if (LLVMGetTargetFromTriple(native_triple, &target_ref, &err_msg)) {
zig_panic("unable to get target from triple: %s", err_msg);
}
char *native_cpu = LLVMZigGetHostCPUName();
char *native_features = LLVMZigGetNativeFeatures();
LLVMCodeGenOptLevel opt_level = (g->build_type == CodeGenBuildTypeDebug) ?
LLVMCodeGenLevelNone : LLVMCodeGenLevelAggressive;
LLVMRelocMode reloc_mode = g->is_static ? LLVMRelocStatic : LLVMRelocPIC;
g->target_machine = LLVMCreateTargetMachine(target_ref, native_triple,
native_cpu, native_features, opt_level, reloc_mode, LLVMCodeModelDefault);
g->target_data_ref = LLVMGetTargetMachineData(g->target_machine);
g->module = LLVMModuleCreateWithName("ZigModule");
g->pointer_size_bytes = LLVMPointerSize(g->target_data_ref);
g->builder = LLVMCreateBuilder();
g->dbuilder = new llvm::DIBuilder(*llvm::unwrap(g->module), true);
add_types(g);
analyze_node(g, g->root);
}

15
src/analyze.hpp Normal file
View File

@ -0,0 +1,15 @@
/*
* Copyright (c) 2015 Andrew Kelley
*
* This file is part of zig, which is MIT licensed.
* See http://opensource.org/licenses/MIT
*/
#ifndef ZIG_ANALYZE_HPP
#define ZIG_ANALYZE_HPP
struct CodeGen;
void semantic_analyze(CodeGen *g);
#endif

View File

@ -45,3 +45,20 @@ void buf_appendf(Buf *buf, const char *format, ...) {
va_end(ap2);
va_end(ap);
}
// these functions are not static inline so they can be better used as template parameters
bool buf_eql_buf(Buf *buf, Buf *other) {
assert(buf->list.length);
return buf_eql_mem(buf, buf_ptr(other), buf_len(other));
}
uint32_t buf_hash(Buf *buf) {
assert(buf->list.length);
// FNV 32-bit hash
uint32_t h = 2166136261;
for (int i = 0; i < buf_len(buf); i += 1) {
h = h ^ ((uint8_t)buf->list.at(i));
h = h * 16777619;
}
return h;
}

View File

@ -132,21 +132,8 @@ static inline bool buf_eql_str(Buf *buf, const char *str) {
return buf_eql_mem(buf, str, strlen(str));
}
static inline bool buf_eql_buf(Buf *buf, Buf *other) {
assert(buf->list.length);
return buf_eql_mem(buf, buf_ptr(other), buf_len(other));
}
static inline uint32_t buf_hash(Buf *buf) {
assert(buf->list.length);
// FNV 32-bit hash
uint32_t h = 2166136261;
for (int i = 0; i < buf_len(buf); i += 1) {
h = h ^ ((uint8_t)buf->list.at(i));
h = h * 16777619;
}
return h;
}
bool buf_eql_buf(Buf *buf, Buf *other);
uint32_t buf_hash(Buf *buf);
static inline void buf_upcase(Buf *buf) {
for (int i = 0; i < buf_len(buf); i += 1) {

View File

@ -12,6 +12,8 @@
#include "config.h"
#include "error.hpp"
#include "semantic_info.hpp"
#include <stdio.h>
#include <llvm/IR/IRBuilder.h>
@ -21,88 +23,6 @@
#include <llvm/Target/TargetMachine.h>
#include <llvm/Support/TargetParser.h>
struct FnTableEntry {
LLVMValueRef fn_value;
AstNode *proto_node;
AstNode *fn_def_node;
bool is_extern;
bool internal_linkage;
unsigned calling_convention;
};
enum TypeId {
TypeIdUserDefined,
TypeIdPointer,
TypeIdU8,
TypeIdI32,
TypeIdVoid,
TypeIdUnreachable,
};
struct TypeTableEntry {
TypeId id;
LLVMTypeRef type_ref;
llvm::DIType *di_type;
TypeTableEntry *pointer_child;
bool pointer_is_const;
int user_defined_id;
Buf name;
TypeTableEntry *pointer_const_parent;
TypeTableEntry *pointer_mut_parent;
};
struct CodeGen {
LLVMModuleRef module;
AstNode *root;
ZigList<ErrorMsg> errors;
LLVMBuilderRef builder;
llvm::DIBuilder *dbuilder;
llvm::DICompileUnit *compile_unit;
HashMap<Buf *, FnTableEntry *, buf_hash, buf_eql_buf> fn_table;
HashMap<Buf *, LLVMValueRef, buf_hash, buf_eql_buf> str_table;
HashMap<Buf *, TypeTableEntry *, buf_hash, buf_eql_buf> type_table;
HashMap<Buf *, bool, buf_hash, buf_eql_buf> link_table;
TypeTableEntry *invalid_type_entry;
LLVMTargetDataRef target_data_ref;
unsigned pointer_size_bytes;
bool is_static;
bool strip_debug_symbols;
CodeGenBuildType build_type;
LLVMTargetMachineRef target_machine;
bool is_native_target;
Buf in_file;
Buf in_dir;
ZigList<llvm::DIScope *> block_scopes;
llvm::DIFile *di_file;
ZigList<FnTableEntry *> fn_defs;
Buf *out_name;
OutType out_type;
FnTableEntry *cur_fn;
bool c_stdint_used;
AstNode *root_export_decl;
int version_major;
int version_minor;
int version_patch;
};
struct TypeNode {
TypeTableEntry *entry;
};
struct FnDefNode {
bool add_implicit_return;
bool skip;
LLVMValueRef *params;
};
struct CodeGenNode {
union {
TypeNode type_node; // for NodeTypeType
FnDefNode fn_def_node; // for NodeTypeFnDef
} data;
};
CodeGen *create_codegen(AstNode *root, Buf *in_full_path) {
CodeGen *g = allocate<CodeGen>(1);
g->root = root;
@ -140,15 +60,7 @@ void codegen_set_out_name(CodeGen *g, Buf *out_name) {
g->out_name = out_name;
}
static void add_node_error(CodeGen *g, AstNode *node, Buf *msg) {
g->errors.add_one();
ErrorMsg *last_msg = &g->errors.last();
last_msg->line_start = node->line;
last_msg->column_start = node->column;
last_msg->line_end = -1;
last_msg->column_end = -1;
last_msg->msg = msg;
}
static LLVMValueRef gen_expr(CodeGen *g, AstNode *expr_node);
static LLVMTypeRef to_llvm_type(AstNode *type_node) {
assert(type_node->type == NodeTypeType);
@ -166,7 +78,6 @@ static llvm::DIType *to_llvm_debug_type(AstNode *type_node) {
return type_node->codegen_node->data.type_node.entry->di_type;
}
static bool type_is_unreachable(AstNode *type_node) {
assert(type_node->type == NodeTypeType);
assert(type_node->codegen_node);
@ -174,492 +85,6 @@ static bool type_is_unreachable(AstNode *type_node) {
return type_node->codegen_node->data.type_node.entry->id == TypeIdUnreachable;
}
static int parse_version_string(Buf *buf, int *major, int *minor, int *patch) {
char *dot1 = strstr(buf_ptr(buf), ".");
if (!dot1)
return ErrorInvalidFormat;
char *dot2 = strstr(dot1 + 1, ".");
if (!dot2)
return ErrorInvalidFormat;
*major = (int)strtol(buf_ptr(buf), nullptr, 10);
*minor = (int)strtol(dot1 + 1, nullptr, 10);
*patch = (int)strtol(dot2 + 1, nullptr, 10);
return ErrorNone;
}
static void set_root_export_version(CodeGen *g, Buf *version_buf, AstNode *node) {
int err;
if ((err = parse_version_string(version_buf, &g->version_major, &g->version_minor, &g->version_patch))) {
add_node_error(g, node,
buf_sprintf("invalid version string"));
}
}
static void find_declarations(CodeGen *g, AstNode *node);
static void resolve_type_and_recurse(CodeGen *g, AstNode *node) {
assert(!node->codegen_node);
node->codegen_node = allocate<CodeGenNode>(1);
TypeNode *type_node = &node->codegen_node->data.type_node;
switch (node->data.type.type) {
case AstNodeTypeTypePrimitive:
{
Buf *name = &node->data.type.primitive_name;
auto table_entry = g->type_table.maybe_get(name);
if (table_entry) {
type_node->entry = table_entry->value;
} else {
add_node_error(g, node,
buf_sprintf("invalid type name: '%s'", buf_ptr(name)));
type_node->entry = g->invalid_type_entry;
}
break;
}
case AstNodeTypeTypePointer:
{
find_declarations(g, node->data.type.child_type);
TypeNode *child_type_node = &node->data.type.child_type->codegen_node->data.type_node;
if (child_type_node->entry->id == TypeIdUnreachable) {
add_node_error(g, node,
buf_create_from_str("pointer to unreachable not allowed"));
}
TypeTableEntry **parent_pointer = node->data.type.is_const ?
&child_type_node->entry->pointer_const_parent :
&child_type_node->entry->pointer_mut_parent;
const char *const_or_mut_str = node->data.type.is_const ? "const" : "mut";
if (*parent_pointer) {
type_node->entry = *parent_pointer;
} else {
TypeTableEntry *entry = allocate<TypeTableEntry>(1);
entry->id = TypeIdPointer;
entry->type_ref = LLVMPointerType(child_type_node->entry->type_ref, 0);
buf_resize(&entry->name, 0);
buf_appendf(&entry->name, "*%s %s", const_or_mut_str, buf_ptr(&child_type_node->entry->name));
entry->di_type = g->dbuilder->createPointerType(child_type_node->entry->di_type,
g->pointer_size_bytes * 8, g->pointer_size_bytes * 8, buf_ptr(&entry->name));
g->type_table.put(&entry->name, entry);
type_node->entry = entry;
*parent_pointer = entry;
}
break;
}
}
}
static void find_declarations(CodeGen *g, AstNode *node) {
switch (node->type) {
case NodeTypeExternBlock:
for (int i = 0; i < node->data.extern_block.directives->length; i += 1) {
AstNode *directive_node = node->data.extern_block.directives->at(i);
Buf *name = &directive_node->data.directive.name;
Buf *param = &directive_node->data.directive.param;
if (buf_eql_str(name, "link")) {
g->link_table.put(param, true);
} else {
add_node_error(g, directive_node,
buf_sprintf("invalid directive: '%s'", buf_ptr(name)));
}
}
for (int fn_decl_i = 0; fn_decl_i < node->data.extern_block.fn_decls.length; fn_decl_i += 1) {
AstNode *fn_decl = node->data.extern_block.fn_decls.at(fn_decl_i);
assert(fn_decl->type == NodeTypeFnDecl);
AstNode *fn_proto = fn_decl->data.fn_decl.fn_proto;
find_declarations(g, fn_proto);
Buf *name = &fn_proto->data.fn_proto.name;
FnTableEntry *fn_table_entry = allocate<FnTableEntry>(1);
fn_table_entry->proto_node = fn_proto;
fn_table_entry->is_extern = true;
fn_table_entry->calling_convention = LLVMCCallConv;
g->fn_table.put(name, fn_table_entry);
}
break;
case NodeTypeFnDef:
{
AstNode *proto_node = node->data.fn_def.fn_proto;
assert(proto_node->type == NodeTypeFnProto);
Buf *proto_name = &proto_node->data.fn_proto.name;
auto entry = g->fn_table.maybe_get(proto_name);
if (entry) {
add_node_error(g, node,
buf_sprintf("redefinition of '%s'", buf_ptr(proto_name)));
assert(!node->codegen_node);
node->codegen_node = allocate<CodeGenNode>(1);
node->codegen_node->data.fn_def_node.skip = true;
} else {
FnTableEntry *fn_table_entry = allocate<FnTableEntry>(1);
fn_table_entry->proto_node = proto_node;
fn_table_entry->fn_def_node = node;
fn_table_entry->internal_linkage = proto_node->data.fn_proto.visib_mod != FnProtoVisibModExport;
if (fn_table_entry->internal_linkage) {
fn_table_entry->calling_convention = LLVMFastCallConv;
} else {
fn_table_entry->calling_convention = LLVMCCallConv;
}
g->fn_table.put(proto_name, fn_table_entry);
g->fn_defs.append(fn_table_entry);
find_declarations(g, proto_node);
}
break;
}
case NodeTypeFnProto:
{
for (int i = 0; i < node->data.fn_proto.directives->length; i += 1) {
AstNode *directive_node = node->data.fn_proto.directives->at(i);
Buf *name = &directive_node->data.directive.name;
add_node_error(g, directive_node,
buf_sprintf("invalid directive: '%s'", buf_ptr(name)));
}
for (int i = 0; i < node->data.fn_proto.params.length; i += 1) {
AstNode *child = node->data.fn_proto.params.at(i);
find_declarations(g, child);
}
find_declarations(g, node->data.fn_proto.return_type);
break;
}
break;
case NodeTypeParamDecl:
find_declarations(g, node->data.param_decl.type);
break;
case NodeTypeType:
resolve_type_and_recurse(g, node);
break;
case NodeTypeDirective:
// we handled directives in the parent function
break;
case NodeTypeRootExportDecl:
for (int i = 0; i < node->data.root_export_decl.directives->length; i += 1) {
AstNode *directive_node = node->data.root_export_decl.directives->at(i);
Buf *name = &directive_node->data.directive.name;
Buf *param = &directive_node->data.directive.param;
if (buf_eql_str(name, "version")) {
set_root_export_version(g, param, directive_node);
} else {
add_node_error(g, directive_node,
buf_sprintf("invalid directive: '%s'", buf_ptr(name)));
}
}
break;
case NodeTypeFnDecl:
case NodeTypeReturnExpr:
case NodeTypeRoot:
case NodeTypeBlock:
case NodeTypeBinOpExpr:
case NodeTypeFnCallExpr:
case NodeTypeNumberLiteral:
case NodeTypeStringLiteral:
case NodeTypeUnreachable:
case NodeTypeSymbol:
case NodeTypeCastExpr:
case NodeTypePrefixOpExpr:
zig_unreachable();
}
}
static void check_fn_def_control_flow(CodeGen *g, AstNode *node) {
// Follow the execution flow and make sure the code returns appropriately.
// * A `return` statement in an unreachable type function should be an error.
// * Control flow should not be able to reach the end of an unreachable type function.
// * Functions that have a type other than void should not return without a value.
// * void functions without explicit return statements at the end need the
// add_implicit_return flag set on the codegen node.
assert(node->type == NodeTypeFnDef);
AstNode *proto_node = node->data.fn_def.fn_proto;
assert(proto_node->type == NodeTypeFnProto);
AstNode *return_type_node = proto_node->data.fn_proto.return_type;
assert(return_type_node->type == NodeTypeType);
node->codegen_node = allocate<CodeGenNode>(1);
FnDefNode *codegen_fn_def = &node->codegen_node->data.fn_def_node;
assert(return_type_node->codegen_node);
TypeTableEntry *type_entry = return_type_node->codegen_node->data.type_node.entry;
assert(type_entry);
TypeId type_id = type_entry->id;
AstNode *body_node = node->data.fn_def.body;
assert(body_node->type == NodeTypeBlock);
// TODO once we understand types, do this pass after type checking, and
// if an expression has an unreachable value then stop looking at statements after
// it. then we can remove the check to `unreachable` in the end of this function.
bool prev_statement_return = false;
for (int i = 0; i < body_node->data.block.statements.length; i += 1) {
AstNode *statement_node = body_node->data.block.statements.at(i);
if (statement_node->type == NodeTypeReturnExpr) {
if (type_id == TypeIdUnreachable) {
add_node_error(g, statement_node,
buf_sprintf("return statement in function with unreachable return type"));
return;
} else {
prev_statement_return = true;
}
} else if (prev_statement_return) {
add_node_error(g, statement_node,
buf_sprintf("unreachable code"));
}
}
if (!prev_statement_return) {
if (type_id == TypeIdVoid) {
codegen_fn_def->add_implicit_return = true;
} else if (type_id != TypeIdUnreachable) {
add_node_error(g, node,
buf_sprintf("control reaches end of non-void function"));
}
}
}
static Buf *hack_get_fn_call_name(CodeGen *g, AstNode *node) {
// Assume that the expression evaluates to a simple name and return the buf
// TODO after type checking works we should be able to remove this hack
assert(node->type == NodeTypeSymbol);
return &node->data.symbol;
}
static void analyze_node(CodeGen *g, AstNode *node) {
switch (node->type) {
case NodeTypeRoot:
{
// Iterate once over the top level declarations to build the function table
for (int i = 0; i < node->data.root.top_level_decls.length; i += 1) {
AstNode *child = node->data.root.top_level_decls.at(i);
find_declarations(g, child);
}
for (int i = 0; i < node->data.root.top_level_decls.length; i += 1) {
AstNode *child = node->data.root.top_level_decls.at(i);
analyze_node(g, child);
}
if (!g->out_name) {
add_node_error(g, node,
buf_sprintf("missing export declaration and output name not provided"));
} else if (g->out_type == OutTypeUnknown) {
add_node_error(g, node,
buf_sprintf("missing export declaration and export type not provided"));
}
break;
}
case NodeTypeRootExportDecl:
if (g->root_export_decl) {
add_node_error(g, node,
buf_sprintf("only one root export declaration allowed"));
} else {
g->root_export_decl = node;
if (!g->out_name)
g->out_name = &node->data.root_export_decl.name;
Buf *out_type = &node->data.root_export_decl.type;
OutType export_out_type;
if (buf_eql_str(out_type, "executable")) {
export_out_type = OutTypeExe;
} else if (buf_eql_str(out_type, "library")) {
export_out_type = OutTypeLib;
} else if (buf_eql_str(out_type, "object")) {
export_out_type = OutTypeObj;
} else {
add_node_error(g, node,
buf_sprintf("invalid export type: '%s'", buf_ptr(out_type)));
}
if (g->out_type == OutTypeUnknown)
g->out_type = export_out_type;
}
break;
case NodeTypeExternBlock:
for (int fn_decl_i = 0; fn_decl_i < node->data.extern_block.fn_decls.length; fn_decl_i += 1) {
AstNode *fn_decl = node->data.extern_block.fn_decls.at(fn_decl_i);
analyze_node(g, fn_decl);
}
break;
case NodeTypeFnDef:
{
if (node->codegen_node && node->codegen_node->data.fn_def_node.skip) {
// we detected an error with this function definition which prevents us
// from further analyzing it.
break;
}
AstNode *proto_node = node->data.fn_def.fn_proto;
assert(proto_node->type == NodeTypeFnProto);
analyze_node(g, proto_node);
check_fn_def_control_flow(g, node);
analyze_node(g, node->data.fn_def.body);
break;
}
case NodeTypeFnDecl:
{
AstNode *proto_node = node->data.fn_decl.fn_proto;
assert(proto_node->type == NodeTypeFnProto);
analyze_node(g, proto_node);
break;
}
case NodeTypeFnProto:
{
for (int i = 0; i < node->data.fn_proto.params.length; i += 1) {
AstNode *child = node->data.fn_proto.params.at(i);
analyze_node(g, child);
}
analyze_node(g, node->data.fn_proto.return_type);
break;
}
case NodeTypeParamDecl:
analyze_node(g, node->data.param_decl.type);
break;
case NodeTypeType:
// ignore; we handled types with find_declarations
break;
case NodeTypeBlock:
for (int i = 0; i < node->data.block.statements.length; i += 1) {
AstNode *child = node->data.block.statements.at(i);
analyze_node(g, child);
}
break;
case NodeTypeReturnExpr:
if (node->data.return_expr.expr) {
analyze_node(g, node->data.return_expr.expr);
}
break;
case NodeTypeBinOpExpr:
analyze_node(g, node->data.bin_op_expr.op1);
analyze_node(g, node->data.bin_op_expr.op2);
break;
case NodeTypeFnCallExpr:
{
Buf *name = hack_get_fn_call_name(g, node->data.fn_call_expr.fn_ref_expr);
auto entry = g->fn_table.maybe_get(name);
if (!entry) {
add_node_error(g, node,
buf_sprintf("undefined function: '%s'", buf_ptr(name)));
} else {
FnTableEntry *fn_table_entry = entry->value;
assert(fn_table_entry->proto_node->type == NodeTypeFnProto);
int expected_param_count = fn_table_entry->proto_node->data.fn_proto.params.length;
int actual_param_count = node->data.fn_call_expr.params.length;
if (expected_param_count != actual_param_count) {
add_node_error(g, node,
buf_sprintf("wrong number of arguments. Expected %d, got %d.",
expected_param_count, actual_param_count));
}
}
for (int i = 0; i < node->data.fn_call_expr.params.length; i += 1) {
AstNode *child = node->data.fn_call_expr.params.at(i);
analyze_node(g, child);
}
break;
}
case NodeTypeDirective:
// we looked at directives in the parent node
break;
case NodeTypeCastExpr:
zig_panic("TODO");
break;
case NodeTypePrefixOpExpr:
zig_panic("TODO");
break;
case NodeTypeNumberLiteral:
case NodeTypeStringLiteral:
case NodeTypeUnreachable:
case NodeTypeSymbol:
// nothing to do
break;
}
}
static void add_types(CodeGen *g) {
{
TypeTableEntry *entry = allocate<TypeTableEntry>(1);
entry->id = TypeIdU8;
entry->type_ref = LLVMInt8Type();
buf_init_from_str(&entry->name, "u8");
entry->di_type = g->dbuilder->createBasicType(buf_ptr(&entry->name), 8, 8, llvm::dwarf::DW_ATE_unsigned);
g->type_table.put(&entry->name, entry);
}
{
TypeTableEntry *entry = allocate<TypeTableEntry>(1);
entry->id = TypeIdI32;
entry->type_ref = LLVMInt32Type();
buf_init_from_str(&entry->name, "i32");
entry->di_type = g->dbuilder->createBasicType(buf_ptr(&entry->name), 32, 32,
llvm::dwarf::DW_ATE_signed);
g->type_table.put(&entry->name, entry);
}
{
TypeTableEntry *entry = allocate<TypeTableEntry>(1);
entry->id = TypeIdVoid;
entry->type_ref = LLVMVoidType();
buf_init_from_str(&entry->name, "void");
entry->di_type = g->dbuilder->createBasicType(buf_ptr(&entry->name), 0, 0,
llvm::dwarf::DW_ATE_unsigned);
g->type_table.put(&entry->name, entry);
// invalid types are void
g->invalid_type_entry = entry;
}
{
TypeTableEntry *entry = allocate<TypeTableEntry>(1);
entry->id = TypeIdUnreachable;
entry->type_ref = LLVMVoidType();
buf_init_from_str(&entry->name, "unreachable");
entry->di_type = g->invalid_type_entry->di_type;
g->type_table.put(&entry->name, entry);
}
}
void semantic_analyze(CodeGen *g) {
LLVMInitializeAllTargets();
LLVMInitializeAllTargetMCs();
LLVMInitializeAllAsmPrinters();
LLVMInitializeAllAsmParsers();
LLVMInitializeNativeTarget();
g->is_native_target = true;
char *native_triple = LLVMGetDefaultTargetTriple();
LLVMTargetRef target_ref;
char *err_msg = nullptr;
if (LLVMGetTargetFromTriple(native_triple, &target_ref, &err_msg)) {
zig_panic("unable to get target from triple: %s", err_msg);
}
char *native_cpu = LLVMZigGetHostCPUName();
char *native_features = LLVMZigGetNativeFeatures();
LLVMCodeGenOptLevel opt_level = (g->build_type == CodeGenBuildTypeDebug) ?
LLVMCodeGenLevelNone : LLVMCodeGenLevelAggressive;
LLVMRelocMode reloc_mode = g->is_static ? LLVMRelocStatic : LLVMRelocPIC;
g->target_machine = LLVMCreateTargetMachine(target_ref, native_triple,
native_cpu, native_features, opt_level, reloc_mode, LLVMCodeModelDefault);
g->target_data_ref = LLVMGetTargetMachineData(g->target_machine);
g->module = LLVMModuleCreateWithName("ZigModule");
g->pointer_size_bytes = LLVMPointerSize(g->target_data_ref);
g->builder = LLVMCreateBuilder();
g->dbuilder = new llvm::DIBuilder(*llvm::unwrap(g->module), true);
add_types(g);
analyze_node(g, g->root);
}
static LLVMValueRef gen_expr(CodeGen *g, AstNode *expr_node);
static void add_debug_source_node(CodeGen *g, AstNode *node) {
llvm::unwrap(g->builder)->SetCurrentDebugLocation(llvm::DebugLoc::get(
node->line + 1, node->column + 1,

View File

@ -41,8 +41,6 @@ void codegen_set_strip(CodeGen *codegen, bool strip);
void codegen_set_out_type(CodeGen *codegen, OutType out_type);
void codegen_set_out_name(CodeGen *codegen, Buf *out_name);
void semantic_analyze(CodeGen *g);
void code_gen_optimize(CodeGen *g);
void code_gen(CodeGen *g);

View File

@ -13,6 +13,7 @@
#include "tokenizer.hpp"
#include "error.hpp"
#include "codegen.hpp"
#include "analyze.hpp"
#include <stdio.h>
#include <string.h>

106
src/semantic_info.hpp Normal file
View File

@ -0,0 +1,106 @@
/*
* Copyright (c) 2015 Andrew Kelley
*
* This file is part of zig, which is MIT licensed.
* See http://opensource.org/licenses/MIT
*/
#ifndef ZIG_SEMANTIC_INFO_HPP
#define ZIG_SEMANTIC_INFO_HPP
#include "codegen.hpp"
#include "hash_map.hpp"
#include <llvm/IR/DIBuilder.h>
#include <llvm/IR/DiagnosticInfo.h>
struct FnTableEntry {
LLVMValueRef fn_value;
AstNode *proto_node;
AstNode *fn_def_node;
bool is_extern;
bool internal_linkage;
unsigned calling_convention;
};
enum TypeId {
TypeIdUserDefined,
TypeIdPointer,
TypeIdU8,
TypeIdI32,
TypeIdVoid,
TypeIdUnreachable,
};
struct TypeTableEntry {
TypeId id;
LLVMTypeRef type_ref;
llvm::DIType *di_type;
TypeTableEntry *pointer_child;
bool pointer_is_const;
int user_defined_id;
Buf name;
TypeTableEntry *pointer_const_parent;
TypeTableEntry *pointer_mut_parent;
};
struct CodeGen {
LLVMModuleRef module;
AstNode *root;
ZigList<ErrorMsg> errors;
LLVMBuilderRef builder;
llvm::DIBuilder *dbuilder;
llvm::DICompileUnit *compile_unit;
HashMap<Buf *, FnTableEntry *, buf_hash, buf_eql_buf> fn_table;
HashMap<Buf *, LLVMValueRef, buf_hash, buf_eql_buf> str_table;
HashMap<Buf *, TypeTableEntry *, buf_hash, buf_eql_buf> type_table;
HashMap<Buf *, bool, buf_hash, buf_eql_buf> link_table;
TypeTableEntry *invalid_type_entry;
LLVMTargetDataRef target_data_ref;
unsigned pointer_size_bytes;
bool is_static;
bool strip_debug_symbols;
CodeGenBuildType build_type;
LLVMTargetMachineRef target_machine;
bool is_native_target;
Buf in_file;
Buf in_dir;
ZigList<llvm::DIScope *> block_scopes;
llvm::DIFile *di_file;
ZigList<FnTableEntry *> fn_defs;
Buf *out_name;
OutType out_type;
FnTableEntry *cur_fn;
bool c_stdint_used;
AstNode *root_export_decl;
int version_major;
int version_minor;
int version_patch;
};
struct TypeNode {
TypeTableEntry *entry;
};
struct FnDefNode {
bool add_implicit_return;
bool skip;
LLVMValueRef *params;
};
struct CodeGenNode {
union {
TypeNode type_node; // for NodeTypeType
FnDefNode fn_def_node; // for NodeTypeFnDef
} data;
};
static inline Buf *hack_get_fn_call_name(CodeGen *g, AstNode *node) {
// Assume that the expression evaluates to a simple name and return the buf
// TODO after type checking works we should be able to remove this hack
assert(node->type == NodeTypeSymbol);
return &node->data.symbol;
}
#endif