From 173fc842c4eb1da6ca07a4ab6026c4c62bc8c09b Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 9 Sep 2018 18:07:11 -0400 Subject: [PATCH] basic compiler id hash working --- CMakeLists.txt | 1 + src/buffer.hpp | 4 + src/cache_hash.cpp | 407 +++++++++++++++++++++++++++++++++++++++++++++ src/cache_hash.hpp | 56 +++++++ src/codegen.cpp | 152 ++++------------- src/error.cpp | 2 + src/error.hpp | 2 + src/main.cpp | 61 +++++++ src/os.cpp | 314 ++++++++++++++++++++++++++-------- src/os.hpp | 158 ++++++++++-------- src/util.cpp | 39 +++++ src/util.hpp | 12 ++ 12 files changed, 951 insertions(+), 257 deletions(-) create mode 100644 src/cache_hash.cpp create mode 100644 src/cache_hash.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index fbf5b03ea..615128159 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -409,6 +409,7 @@ set(ZIG_SOURCES "${CMAKE_SOURCE_DIR}/src/bigint.cpp" "${CMAKE_SOURCE_DIR}/src/blake2b.cpp" "${CMAKE_SOURCE_DIR}/src/buffer.cpp" + "${CMAKE_SOURCE_DIR}/src/cache_hash.cpp" "${CMAKE_SOURCE_DIR}/src/c_tokenizer.cpp" "${CMAKE_SOURCE_DIR}/src/codegen.cpp" "${CMAKE_SOURCE_DIR}/src/errmsg.cpp" diff --git a/src/buffer.hpp b/src/buffer.hpp index 501e44b5a..8155df87a 100644 --- a/src/buffer.hpp +++ b/src/buffer.hpp @@ -78,6 +78,10 @@ static inline Buf *buf_create_from_mem(const char *ptr, size_t len) { return buf; } +static inline Buf *buf_create_from_slice(Slice slice) { + return buf_create_from_mem((const char *)slice.ptr, slice.len); +} + static inline Buf *buf_create_from_str(const char *str) { return buf_create_from_mem(str, strlen(str)); } diff --git a/src/cache_hash.cpp b/src/cache_hash.cpp new file mode 100644 index 000000000..5447a5f3f --- /dev/null +++ b/src/cache_hash.cpp @@ -0,0 +1,407 @@ +/* + * Copyright (c) 2018 Andrew Kelley + * + * This file is part of zig, which is MIT licensed. + * See http://opensource.org/licenses/MIT + */ + +#include "cache_hash.hpp" +#include "buffer.hpp" +#include "os.hpp" + +#include + +void cache_init(CacheHash *ch, Buf *manifest_dir) { + int rc = blake2b_init(&ch->blake, 48); + assert(rc == 0); + ch->files = {}; + ch->manifest_dir = manifest_dir; + ch->manifest_file_path = nullptr; + ch->manifest_dirty = false; +} + +void cache_str(CacheHash *ch, const char *ptr) { + assert(ch->manifest_file_path == nullptr); + assert(ptr != nullptr); + // + 1 to include the null byte + blake2b_update(&ch->blake, ptr, strlen(ptr) + 1); +} + +void cache_int(CacheHash *ch, int x) { + assert(ch->manifest_file_path == nullptr); + // + 1 to include the null byte + uint8_t buf[sizeof(int) + 1]; + memcpy(buf, &x, sizeof(int)); + buf[sizeof(int)] = 0; + blake2b_update(&ch->blake, buf, sizeof(int) + 1); +} + +void cache_buf(CacheHash *ch, Buf *buf) { + assert(ch->manifest_file_path == nullptr); + assert(buf != nullptr); + // + 1 to include the null byte + blake2b_update(&ch->blake, buf_ptr(buf), buf_len(buf) + 1); +} + +void cache_buf_opt(CacheHash *ch, Buf *buf) { + assert(ch->manifest_file_path == nullptr); + if (buf == nullptr) { + cache_str(ch, ""); + cache_str(ch, ""); + } else { + cache_buf(ch, buf); + } +} + +void cache_list_of_link_lib(CacheHash *ch, LinkLib **ptr, size_t len) { + assert(ch->manifest_file_path == nullptr); + for (size_t i = 0; i < len; i += 1) { + LinkLib *lib = ptr[i]; + if (lib->provided_explicitly) { + cache_buf(ch, lib->name); + } + } + cache_str(ch, ""); +} + +void cache_list_of_buf(CacheHash *ch, Buf **ptr, size_t len) { + assert(ch->manifest_file_path == nullptr); + for (size_t i = 0; i < len; i += 1) { + Buf *buf = ptr[i]; + cache_buf(ch, buf); + } + cache_str(ch, ""); +} + +void cache_file(CacheHash *ch, Buf *file_path) { + assert(ch->manifest_file_path == nullptr); + assert(file_path != nullptr); + Buf *resolved_path = buf_alloc(); + *resolved_path = os_path_resolve(&file_path, 1); + CacheHashFile *chf = ch->files.add_one(); + chf->path = resolved_path; + cache_buf(ch, resolved_path); +} + +void cache_file_opt(CacheHash *ch, Buf *file_path) { + assert(ch->manifest_file_path == nullptr); + if (file_path == nullptr) { + cache_str(ch, ""); + cache_str(ch, ""); + } else { + cache_file(ch, file_path); + } +} + +// Ported from std/base64.zig +static uint8_t base64_fs_alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; +static void base64_encode(Slice dest, Slice source) { + size_t dest_len = ((source.len + 2) / 3) * 4; + assert(dest.len == dest_len); + + size_t i = 0; + size_t out_index = 0; + for (; i + 2 < source.len; i += 3) { + dest.ptr[out_index] = base64_fs_alphabet[(source.ptr[i] >> 2) & 0x3f]; + out_index += 1; + + dest.ptr[out_index] = base64_fs_alphabet[((source.ptr[i] & 0x3) << 4) | ((source.ptr[i + 1] & 0xf0) >> 4)]; + out_index += 1; + + dest.ptr[out_index] = base64_fs_alphabet[((source.ptr[i + 1] & 0xf) << 2) | ((source.ptr[i + 2] & 0xc0) >> 6)]; + out_index += 1; + + dest.ptr[out_index] = base64_fs_alphabet[source.ptr[i + 2] & 0x3f]; + out_index += 1; + } + + // Assert that we never need pad characters. + assert(i == source.len); +} + +// Ported from std/base64.zig +static Error base64_decode(Slice dest, Slice source) { + assert(source.len % 4 == 0); + assert(dest.len == (source.len / 4) * 3); + + // In Zig this is comptime computed. In C++ it's not worth it to do that. + uint8_t char_to_index[256]; + bool char_in_alphabet[256] = {0}; + for (size_t i = 0; i < 64; i += 1) { + uint8_t c = base64_fs_alphabet[i]; + assert(!char_in_alphabet[c]); + char_in_alphabet[c] = true; + char_to_index[c] = i; + } + + size_t src_cursor = 0; + size_t dest_cursor = 0; + + for (;src_cursor < source.len; src_cursor += 4) { + if (!char_in_alphabet[source.ptr[src_cursor + 0]]) return ErrorInvalidFormat; + if (!char_in_alphabet[source.ptr[src_cursor + 1]]) return ErrorInvalidFormat; + if (!char_in_alphabet[source.ptr[src_cursor + 2]]) return ErrorInvalidFormat; + if (!char_in_alphabet[source.ptr[src_cursor + 3]]) return ErrorInvalidFormat; + dest.ptr[dest_cursor + 0] = (char_to_index[source.ptr[src_cursor + 0]] << 2) | (char_to_index[source.ptr[src_cursor + 1]] >> 4); + dest.ptr[dest_cursor + 1] = (char_to_index[source.ptr[src_cursor + 1]] << 4) | (char_to_index[source.ptr[src_cursor + 2]] >> 2); + dest.ptr[dest_cursor + 2] = (char_to_index[source.ptr[src_cursor + 2]] << 6) | (char_to_index[source.ptr[src_cursor + 3]]); + dest_cursor += 3; + } + + assert(src_cursor == source.len); + assert(dest_cursor == dest.len); + return ErrorNone; +} + +static Error hash_file(uint8_t *digest, OsFile handle) { + Error err; + + blake2b_state blake; + int rc = blake2b_init(&blake, 48); + assert(rc == 0); + + for (;;) { + uint8_t buf[4096]; + size_t amt = 4096; + if ((err = os_file_read(handle, buf, &amt))) + return err; + if (amt == 0) { + rc = blake2b_final(&blake, digest, 48); + assert(rc == 0); + return ErrorNone; + } + blake2b_update(&blake, buf, amt); + } +} + +static Error populate_file_hash(CacheHash *ch, CacheHashFile *chf) { + Error err; + + assert(chf->path != nullptr); + + OsFile this_file; + if ((err = os_file_open_r(chf->path, &this_file))) + return err; + + if ((err = os_file_mtime(this_file, &chf->mtime))) { + os_file_close(this_file); + return err; + } + + if ((err = hash_file(chf->bin_digest, this_file))) { + os_file_close(this_file); + return err; + } + os_file_close(this_file); + + blake2b_update(&ch->blake, chf->bin_digest, 48); + + return ErrorNone; +} + +Error cache_hit(CacheHash *ch, Buf *out_digest) { + Error err; + + uint8_t bin_digest[48]; + int rc = blake2b_final(&ch->blake, bin_digest, 48); + assert(rc == 0); + + Buf b64_digest = BUF_INIT; + buf_resize(&b64_digest, 64); + base64_encode(buf_to_slice(&b64_digest), {bin_digest, 48}); + + rc = blake2b_init(&ch->blake, 48); + assert(rc == 0); + blake2b_update(&ch->blake, bin_digest, 48); + + ch->manifest_file_path = buf_alloc(); + os_path_join(ch->manifest_dir, &b64_digest, ch->manifest_file_path); + + buf_append_str(ch->manifest_file_path, ".txt"); + + if ((err = os_file_open_lock_rw(ch->manifest_file_path, &ch->manifest_file))) + return err; + + Buf line_buf = BUF_INIT; + buf_resize(&line_buf, 512); + if ((err = os_file_read_all(ch->manifest_file, &line_buf))) { + os_file_close(ch->manifest_file); + return err; + } + + size_t input_file_count = ch->files.length; + bool any_file_changed = false; + size_t file_i = 0; + SplitIterator line_it = memSplit(buf_to_slice(&line_buf), str("\n")); + for (;; file_i += 1) { + Optional> opt_line = SplitIterator_next(&line_it); + if (!opt_line.is_some) + break; + + CacheHashFile *chf; + if (file_i < input_file_count) { + chf = &ch->files.at(file_i); + } else if (any_file_changed) { + // cache miss. + // keep the the manifest file open with the rw lock + // reset the hash + rc = blake2b_init(&ch->blake, 48); + assert(rc == 0); + blake2b_update(&ch->blake, bin_digest, 48); + ch->files.resize(input_file_count); + // bring the hash up to the input file hashes + for (file_i = 0; file_i < input_file_count; file_i += 1) { + blake2b_update(&ch->blake, ch->files.at(file_i).bin_digest, 48); + } + // caller can notice that out_digest is unmodified. + return ErrorNone; + } else { + chf = ch->files.add_one(); + chf->path = nullptr; + } + + SplitIterator it = memSplit(opt_line.value, str(" ")); + + Optional> opt_mtime_sec = SplitIterator_next(&it); + if (!opt_mtime_sec.is_some) { + os_file_close(ch->manifest_file); + return ErrorInvalidFormat; + } + chf->mtime.sec = strtoull((const char *)opt_mtime_sec.value.ptr, nullptr, 10); + + Optional> opt_mtime_nsec = SplitIterator_next(&it); + if (!opt_mtime_nsec.is_some) { + os_file_close(ch->manifest_file); + return ErrorInvalidFormat; + } + chf->mtime.nsec = strtoull((const char *)opt_mtime_nsec.value.ptr, nullptr, 10); + + Optional> opt_digest = SplitIterator_next(&it); + if (!opt_digest.is_some) { + os_file_close(ch->manifest_file); + return ErrorInvalidFormat; + } + if ((err = base64_decode({chf->bin_digest, 48}, opt_digest.value))) { + os_file_close(ch->manifest_file); + return ErrorInvalidFormat; + } + + Optional> opt_file_path = SplitIterator_next(&it); + if (!opt_file_path.is_some) { + os_file_close(ch->manifest_file); + return ErrorInvalidFormat; + } + Buf *this_path = buf_create_from_slice(opt_file_path.value); + if (chf->path != nullptr && !buf_eql_buf(this_path, chf->path)) { + os_file_close(ch->manifest_file); + return ErrorInvalidFormat; + } + chf->path = this_path; + + // if the mtime matches we can trust the digest + OsFile this_file; + if ((err = os_file_open_r(chf->path, &this_file))) { + os_file_close(ch->manifest_file); + return err; + } + OsTimeStamp actual_mtime; + if ((err = os_file_mtime(this_file, &actual_mtime))) { + os_file_close(this_file); + os_file_close(ch->manifest_file); + return err; + } + if (chf->mtime.sec == actual_mtime.sec && chf->mtime.nsec == actual_mtime.nsec) { + os_file_close(this_file); + } else { + // we have to recompute the digest. + // later we'll rewrite the manifest with the new mtime/digest values + ch->manifest_dirty = true; + chf->mtime = actual_mtime; + + uint8_t actual_digest[48]; + if ((err = hash_file(actual_digest, this_file))) { + os_file_close(this_file); + os_file_close(ch->manifest_file); + return err; + } + os_file_close(this_file); + if (memcmp(chf->bin_digest, actual_digest, 48) != 0) { + memcpy(chf->bin_digest, actual_digest, 48); + // keep going until we have the input file digests + any_file_changed = true; + } + } + if (!any_file_changed) { + blake2b_update(&ch->blake, chf->bin_digest, 48); + } + } + if (file_i < input_file_count) { + // manifest file is empty or missing entries, so this is a cache miss + ch->manifest_dirty = true; + for (; file_i < input_file_count; file_i += 1) { + CacheHashFile *chf = &ch->files.at(file_i); + if ((err = populate_file_hash(ch, chf))) { + os_file_close(ch->manifest_file); + return err; + } + } + return ErrorNone; + } + // Cache Hit + return cache_final(ch, out_digest); +} + +Error cache_add_file(CacheHash *ch, Buf *path) { + Error err; + + assert(ch->manifest_file_path != nullptr); + CacheHashFile *chf = ch->files.add_one(); + chf->path = path; + if ((err = populate_file_hash(ch, chf))) { + os_file_close(ch->manifest_file); + return err; + } + + return ErrorNone; +} + +static Error write_manifest_file(CacheHash *ch) { + Error err; + Buf contents = BUF_INIT; + buf_resize(&contents, 0); + uint8_t encoded_digest[65]; + encoded_digest[64] = 0; + for (size_t i = 0; i < ch->files.length; i += 1) { + CacheHashFile *chf = &ch->files.at(i); + base64_encode({encoded_digest, 64}, {chf->bin_digest, 48}); + buf_appendf(&contents, "%" ZIG_PRI_u64 " %" ZIG_PRI_u64 " %s %s\n", + chf->mtime.sec, chf->mtime.nsec, encoded_digest, buf_ptr(chf->path)); + } + fprintf(stderr, "overwrite with\n%s\n", buf_ptr(&contents)); + if ((err = os_file_overwrite(ch->manifest_file, &contents))) + return err; + + return ErrorNone; +} + +Error cache_final(CacheHash *ch, Buf *out_digest) { + Error err; + + assert(ch->manifest_file_path != nullptr); + + if (ch->manifest_dirty) { + if ((err = write_manifest_file(ch))) { + fprintf(stderr, "Warning: Unable to write cache file '%s': %s\n", + buf_ptr(ch->manifest_file_path), err_str(err)); + } + } + os_file_close(ch->manifest_file); + + uint8_t bin_digest[48]; + int rc = blake2b_final(&ch->blake, bin_digest, 48); + assert(rc == 0); + buf_resize(out_digest, 64); + base64_encode(buf_to_slice(out_digest), {bin_digest, 48}); + + return ErrorNone; +} diff --git a/src/cache_hash.hpp b/src/cache_hash.hpp new file mode 100644 index 000000000..77b22a1e4 --- /dev/null +++ b/src/cache_hash.hpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2018 Andrew Kelley + * + * This file is part of zig, which is MIT licensed. + * See http://opensource.org/licenses/MIT + */ + +#ifndef ZIG_CACHE_HASH_HPP +#define ZIG_CACHE_HASH_HPP + +#include "all_types.hpp" +#include "blake2.h" +#include "os.hpp" + +struct CacheHashFile { + Buf *path; + OsTimeStamp mtime; + uint8_t bin_digest[48]; +}; + +struct CacheHash { + blake2b_state blake; + ZigList files; + Buf *manifest_dir; + Buf *manifest_file_path; + OsFile manifest_file; + bool manifest_dirty; +}; + +// Always call this first to set up. +void cache_init(CacheHash *ch, Buf *manifest_dir); + +// Next, use the hash population functions to add the initial parameters. +void cache_str(CacheHash *ch, const char *ptr); +void cache_int(CacheHash *ch, int x); +void cache_buf(CacheHash *ch, Buf *buf); +void cache_buf_opt(CacheHash *ch, Buf *buf); +void cache_list_of_link_lib(CacheHash *ch, LinkLib **ptr, size_t len); +void cache_list_of_buf(CacheHash *ch, Buf **ptr, size_t len); +void cache_file(CacheHash *ch, Buf *path); +void cache_file_opt(CacheHash *ch, Buf *path); + +// Then call cache_hit when you're ready to see if you can skip the next step. +// out_b64_digest will be left unchanged if it was a cache miss +Error ATTRIBUTE_MUST_USE cache_hit(CacheHash *ch, Buf *out_b64_digest); + +// If you got a cache hit, the flow is done. No more functions to call. +// Next call this function for every file that is depended on. +Error ATTRIBUTE_MUST_USE cache_add_file(CacheHash *ch, Buf *path); + +// If you did not get a cache hit, use the hash population functions again +// and do all the actual work. When done use cache_final to save the cache +// for next time. +Error ATTRIBUTE_MUST_USE cache_final(CacheHash *ch, Buf *out_digest); + +#endif diff --git a/src/codegen.cpp b/src/codegen.cpp index f4823f104..6c5648f62 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -19,7 +19,6 @@ #include "target.hpp" #include "util.hpp" #include "zig_llvm.h" -#include "blake2.h" #include #include @@ -7675,130 +7674,45 @@ void codegen_add_time_event(CodeGen *g, const char *name) { g->timing_events.append({os_get_time(), name}); } -static void add_cache_str(blake2b_state *blake, const char *ptr) { - assert(ptr != nullptr); - // + 1 to include the null byte - blake2b_update(blake, ptr, strlen(ptr) + 1); -} -static void add_cache_int(blake2b_state *blake, int x) { - // + 1 to include the null byte - uint8_t buf[sizeof(int) + 1]; - memcpy(buf, &x, sizeof(int)); - buf[sizeof(int)] = 0; - blake2b_update(blake, buf, sizeof(int) + 1); -} - -static void add_cache_buf(blake2b_state *blake, Buf *buf) { - assert(buf != nullptr); - // + 1 to include the null byte - blake2b_update(blake, buf_ptr(buf), buf_len(buf) + 1); -} - -static void add_cache_buf_opt(blake2b_state *blake, Buf *buf) { - if (buf == nullptr) { - add_cache_str(blake, ""); - add_cache_str(blake, ""); - } else { - add_cache_buf(blake, buf); - } -} - -static void add_cache_list_of_link_lib(blake2b_state *blake, LinkLib **ptr, size_t len) { - for (size_t i = 0; i < len; i += 1) { - LinkLib *lib = ptr[i]; - if (lib->provided_explicitly) { - add_cache_buf(blake, lib->name); - } - } - add_cache_str(blake, ""); -} - -static void add_cache_list_of_buf(blake2b_state *blake, Buf **ptr, size_t len) { - for (size_t i = 0; i < len; i += 1) { - Buf *buf = ptr[i]; - add_cache_buf(blake, buf); - } - add_cache_str(blake, ""); -} - -//static void add_cache_file(CodeGen *g, blake2b_state *blake, Buf *resolved_path) { -// assert(file_name != nullptr); -// g->cache_files.append(resolved_path); -//} +//// Called before init() +//static bool build_with_cache(CodeGen *g) { +// // TODO zig exe & dynamic libraries +// // should be in main.cpp I think. only needs to happen +// // once on startup. // -//static void add_cache_file_opt(CodeGen *g, blake2b_state *blake, Buf *resolved_path) { -// if (resolved_path == nullptr) { -// add_cache_str(blake, ""); -// add_cache_str(blake, ""); -// } else { -// add_cache_file(g, blake, resolved_path); -// } +// CacheHash comp; +// cache_init(&comp); +// +// add_cache_buf(&blake, g->root_out_name); +// add_cache_buf_opt(&blake, get_resolved_root_src_path(g)); // Root source file +// add_cache_list_of_link_lib(&blake, g->link_libs_list.items, g->link_libs_list.length); +// add_cache_list_of_buf(&blake, g->darwin_frameworks.items, g->darwin_frameworks.length); +// add_cache_list_of_buf(&blake, g->rpath_list.items, g->rpath_list.length); +// add_cache_int(&blake, g->emit_file_type); +// add_cache_int(&blake, g->build_mode); +// add_cache_int(&blake, g->out_type); +// // TODO the rest of the struct CodeGen fields +// +// uint8_t bin_digest[48]; +// rc = blake2b_final(&blake, bin_digest, 48); +// assert(rc == 0); +// +// Buf b64_digest = BUF_INIT; +// buf_resize(&b64_digest, 64); +// base64_encode(buf_to_slice(&b64_digest), {bin_digest, 48}); +// +// fprintf(stderr, "input params hash: %s\n", buf_ptr(&b64_digest)); +// // TODO next look for a manifest file that has all the files from the input parameters +// // use that to construct the real hash, which looks up the output directory and another manifest file +// +// return false; //} -// Ported from std/base64.zig -static uint8_t base64_fs_alphabet[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_"; -static void base64_encode(Slice dest, Slice source) { - size_t dest_len = ((source.len + 2) / 3) * 4; - assert(dest.len == dest_len); - - size_t i = 0; - size_t out_index = 0; - for (; i + 2 < source.len; i += 3) { - dest.ptr[out_index] = base64_fs_alphabet[(source.ptr[i] >> 2) & 0x3f]; - out_index += 1; - - dest.ptr[out_index] = base64_fs_alphabet[((source.ptr[i] & 0x3) << 4) | ((source.ptr[i + 1] & 0xf0) >> 4)]; - out_index += 1; - - dest.ptr[out_index] = base64_fs_alphabet[((source.ptr[i + 1] & 0xf) << 2) | ((source.ptr[i + 2] & 0xc0) >> 6)]; - out_index += 1; - - dest.ptr[out_index] = base64_fs_alphabet[source.ptr[i + 2] & 0x3f]; - out_index += 1; - } - - // Assert that we never need pad characters. - assert(i == source.len); -} - -// Called before init() -static bool build_with_cache(CodeGen *g) { - blake2b_state blake; - int rc = blake2b_init(&blake, 48); - assert(rc == 0); - - // TODO zig exe & dynamic libraries - - add_cache_buf(&blake, g->root_out_name); - add_cache_buf_opt(&blake, get_resolved_root_src_path(g)); // Root source file - add_cache_list_of_link_lib(&blake, g->link_libs_list.items, g->link_libs_list.length); - add_cache_list_of_buf(&blake, g->darwin_frameworks.items, g->darwin_frameworks.length); - add_cache_list_of_buf(&blake, g->rpath_list.items, g->rpath_list.length); - add_cache_int(&blake, g->emit_file_type); - add_cache_int(&blake, g->build_mode); - add_cache_int(&blake, g->out_type); - // TODO the rest of the struct CodeGen fields - - uint8_t bin_digest[48]; - rc = blake2b_final(&blake, bin_digest, 48); - assert(rc == 0); - - Buf b64_digest = BUF_INIT; - buf_resize(&b64_digest, 64); - base64_encode(buf_to_slice(&b64_digest), {bin_digest, 48}); - - fprintf(stderr, "input params hash: %s\n", buf_ptr(&b64_digest)); - // TODO next look for a manifest file that has all the files from the input parameters - // use that to construct the real hash, which looks up the output directory and another manifest file - - return false; -} - void codegen_build(CodeGen *g) { assert(g->out_type != OutTypeUnknown); - if (build_with_cache(g)) - return; + //if (build_with_cache(g)) + // return; init(g); codegen_add_time_event(g, "Semantic Analysis"); diff --git a/src/error.cpp b/src/error.cpp index 8303a80a1..d9da6e6c5 100644 --- a/src/error.cpp +++ b/src/error.cpp @@ -27,6 +27,8 @@ const char *err_str(int err) { case ErrorNegativeDenominator: return "negative denominator"; case ErrorShiftedOutOneBits: return "exact shift shifted out one bits"; case ErrorCCompileErrors: return "C compile errors"; + case ErrorEndOfFile: return "end of file"; + case ErrorIsDir: return "is directory"; } return "(invalid error)"; } diff --git a/src/error.hpp b/src/error.hpp index e3b87fc6b..c99c6e6b1 100644 --- a/src/error.hpp +++ b/src/error.hpp @@ -27,6 +27,8 @@ enum Error { ErrorNegativeDenominator, ErrorShiftedOutOneBits, ErrorCCompileErrors, + ErrorEndOfFile, + ErrorIsDir, }; const char *err_str(int err); diff --git a/src/main.cpp b/src/main.cpp index eed31438d..172fe04da 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -13,6 +13,7 @@ #include "link.hpp" #include "os.hpp" #include "target.hpp" +#include "cache_hash.hpp" #include @@ -270,6 +271,66 @@ int main(int argc, char **argv) { return 0; } + if (argc == 2 && strcmp(argv[1], "TEST") == 0) { + Error err; + Buf app_data_dir = BUF_INIT; + if ((err = os_get_app_data_dir(&app_data_dir, "zig"))) { + fprintf(stderr, "get app dir: %s\n", err_str(err)); + return 1; + } + Buf *stage1_dir = buf_alloc(); + os_path_join(&app_data_dir, buf_create_from_str("stage1"), stage1_dir); + Buf *manifest_dir = buf_alloc(); + os_path_join(stage1_dir, buf_create_from_str("exe"), manifest_dir); + + if ((err = os_make_path(manifest_dir))) { + fprintf(stderr, "make path: %s\n", err_str(err)); + return 1; + } + CacheHash cache_hash; + CacheHash *ch = &cache_hash; + cache_init(ch, manifest_dir); + Buf self_exe_path = BUF_INIT; + if ((err = os_self_exe_path(&self_exe_path))) { + fprintf(stderr, "self exe path: %s\n", err_str(err)); + return 1; + } + + cache_file(ch, &self_exe_path); + + Buf exe_digest = BUF_INIT; + buf_resize(&exe_digest, 0); + if ((err = cache_hit(ch, &exe_digest))) { + fprintf(stderr, "cache hit error: %s\n", err_str(err)); + return 1; + } + if (buf_len(&exe_digest) != 0) { + fprintf(stderr, "cache hit: %s\n", buf_ptr(&exe_digest)); + return 0; + } + fprintf(stderr, "cache miss\n"); + ZigList lib_paths = {}; + if ((err = os_self_exe_shared_libs(lib_paths))) { + fprintf(stderr, "finding out shared libs: %s\n", err_str(err)); + return 1; + } + for (size_t i = 0; i < lib_paths.length; i += 1) { + Buf *lib_path = lib_paths.at(i); + if ((err = cache_add_file(ch, lib_path))) { + fprintf(stderr, "cache add file %s: %s", buf_ptr(lib_path), err_str(err)); + return 1; + } + } + if ((err = cache_final(ch, &exe_digest))) { + fprintf(stderr, "final: %s\n", err_str(err)); + return 1; + } + + + fprintf(stderr, "computed2: %s\n", buf_ptr(&exe_digest)); + return 0; + } + os_init(); char *arg0 = argv[0]; diff --git a/src/os.cpp b/src/os.cpp index 0e62d84a4..d6e17ca5a 100644 --- a/src/os.cpp +++ b/src/os.cpp @@ -40,6 +40,10 @@ typedef SSIZE_T ssize_t; #endif +#if defined(ZIG_OS_LINUX) +#include +#endif + #if defined(__MACH__) #include @@ -57,54 +61,6 @@ static clock_serv_t cclock; #include #include -// Ported from std/mem.zig. -// Coordinate struct fields with memSplit function -struct SplitIterator { - size_t index; - Slice buffer; - Slice split_bytes; -}; - -// Ported from std/mem.zig. -static bool SplitIterator_isSplitByte(SplitIterator *self, uint8_t byte) { - for (size_t i = 0; i < self->split_bytes.len; i += 1) { - if (byte == self->split_bytes.ptr[i]) { - return true; - } - } - return false; -} - -// Ported from std/mem.zig. -static Optional> SplitIterator_next(SplitIterator *self) { - // move to beginning of token - while (self->index < self->buffer.len && - SplitIterator_isSplitByte(self, self->buffer.ptr[self->index])) - { - self->index += 1; - } - size_t start = self->index; - if (start == self->buffer.len) { - return {}; - } - - // move to end of token - while (self->index < self->buffer.len && - !SplitIterator_isSplitByte(self, self->buffer.ptr[self->index])) - { - self->index += 1; - } - size_t end = self->index; - - return Optional>::some(self->buffer.slice(start, end)); -} - -// Ported from std/mem.zig -static SplitIterator memSplit(Slice buffer, Slice split_bytes) { - return SplitIterator{0, buffer, split_bytes}; -} - - #if defined(ZIG_OS_POSIX) static void populate_termination(Termination *term, int status) { if (WIFEXITED(status)) { @@ -1368,16 +1324,16 @@ double os_get_time(void) { #endif } -int os_make_path(Buf *path) { +Error os_make_path(Buf *path) { Buf resolved_path = os_path_resolve(&path, 1); size_t end_index = buf_len(&resolved_path); - int err; + Error err; while (true) { if ((err = os_make_dir(buf_slice(&resolved_path, 0, end_index)))) { if (err == ErrorPathAlreadyExists) { if (end_index == buf_len(&resolved_path)) - return 0; + return ErrorNone; } else if (err == ErrorFileNotFound) { // march end_index backward until next path component while (true) { @@ -1391,7 +1347,7 @@ int os_make_path(Buf *path) { } } if (end_index == buf_len(&resolved_path)) - return 0; + return ErrorNone; // march end_index forward until next path component while (true) { end_index += 1; @@ -1399,10 +1355,10 @@ int os_make_path(Buf *path) { break; } } - return 0; + return ErrorNone; } -int os_make_dir(Buf *path) { +Error os_make_dir(Buf *path) { #if defined(ZIG_OS_WINDOWS) if (!CreateDirectory(buf_ptr(path), NULL)) { if (GetLastError() == ERROR_ALREADY_EXISTS) @@ -1413,7 +1369,7 @@ int os_make_dir(Buf *path) { return ErrorAccess; return ErrorUnexpected; } - return 0; + return ErrorNone; #else if (mkdir(buf_ptr(path), 0755) == -1) { if (errno == EEXIST) @@ -1424,7 +1380,7 @@ int os_make_dir(Buf *path) { return ErrorAccess; return ErrorUnexpected; } - return 0; + return ErrorNone; #endif } @@ -1447,7 +1403,7 @@ int os_init(void) { return 0; } -int os_self_exe_path(Buf *out_path) { +Error os_self_exe_path(Buf *out_path) { #if defined(ZIG_OS_WINDOWS) buf_resize(out_path, 256); for (;;) { @@ -1480,27 +1436,21 @@ int os_self_exe_path(Buf *out_path) { char *real_path = realpath(buf_ptr(tmp), buf_ptr(out_path)); if (!real_path) { buf_init_from_buf(out_path, tmp); - return 0; + return ErrorNone; } // Resize out_path for the correct length. buf_resize(out_path, strlen(buf_ptr(out_path))); - return 0; + return ErrorNone; #elif defined(ZIG_OS_LINUX) - buf_resize(out_path, 256); - for (;;) { - ssize_t amt = readlink("/proc/self/exe", buf_ptr(out_path), buf_len(out_path)); - if (amt == -1) { - return ErrorUnexpected; - } - if (amt == (ssize_t)buf_len(out_path)) { - buf_resize(out_path, buf_len(out_path) * 2); - continue; - } - buf_resize(out_path, amt); - return 0; + buf_resize(out_path, PATH_MAX); + ssize_t amt = readlink("/proc/self/exe", buf_ptr(out_path), buf_len(out_path)); + if (amt == -1) { + return ErrorUnexpected; } + buf_resize(out_path, amt); + return ErrorNone; #endif return ErrorFileNotFound; } @@ -1685,3 +1635,225 @@ int os_get_win32_kern32_path(ZigWindowsSDK *sdk, Buf* output_buf, ZigLLVM_ArchTy return ErrorFileNotFound; #endif } + +// Ported from std/os/get_app_data_dir.zig +Error os_get_app_data_dir(Buf *out_path, const char *appname) { +#if defined(ZIG_OS_WINDOWS) +#error "Unimplemented" +#elif defined(ZIG_OS_DARWIN) + const char *home_dir = getenv("HOME"); + if (home_dir == nullptr) { + // TODO use /etc/passwd + return ErrorFileNotFound; + } + buf_resize(out_path, 0); + buf_appendf(out_path, "%s/Library/Application Support/%s", home_dir, appname); + return ErrorNone; +#elif defined(ZIG_OS_LINUX) + const char *home_dir = getenv("HOME"); + if (home_dir == nullptr) { + // TODO use /etc/passwd + return ErrorFileNotFound; + } + buf_resize(out_path, 0); + buf_appendf(out_path, "%s/.local/share/%s", home_dir, appname); + return ErrorNone; +#endif +} + + +#if defined(ZIG_OS_LINUX) +static int self_exe_shared_libs_callback(struct dl_phdr_info *info, size_t size, void *data) { + ZigList *libs = reinterpret_cast< ZigList *>(data); + if (info->dlpi_name[0] == '/') { + libs->append(buf_create_from_str(info->dlpi_name)); + } + return 0; +} +#endif + +Error os_self_exe_shared_libs(ZigList &paths) { +#if defined(ZIG_OS_LINUX) + paths.resize(0); + dl_iterate_phdr(self_exe_shared_libs_callback, &paths); + return ErrorNone; +#else +#error "unimplemented" +#endif +} + +Error os_file_open_r(Buf *full_path, OsFile *out_file) { +#if defined(ZIG_OS_WINDOWS) +#error "unimplemented" +#else + for (;;) { + int fd = open(buf_ptr(full_path), O_RDONLY|O_CLOEXEC); + if (fd == -1) { + switch (errno) { + case EINTR: + continue; + case EINVAL: + zig_unreachable(); + case EFAULT: + zig_unreachable(); + case EACCES: + return ErrorAccess; + case EISDIR: + return ErrorIsDir; + case ENOENT: + return ErrorFileNotFound; + default: + return ErrorFileSystem; + } + } + *out_file = fd; + return ErrorNone; + } +#endif +} + +Error os_file_open_lock_rw(Buf *full_path, OsFile *out_file) { +#if defined(ZIG_OS_WINDOWS) +#error "unimplemented" +#else + int fd; + for (;;) { + fd = open(buf_ptr(full_path), O_RDWR|O_CLOEXEC|O_CREAT, 0666); + if (fd == -1) { + switch (errno) { + case EINTR: + continue; + case EINVAL: + zig_unreachable(); + case EFAULT: + zig_unreachable(); + case EACCES: + return ErrorAccess; + case EISDIR: + return ErrorIsDir; + case ENOENT: + return ErrorFileNotFound; + default: + return ErrorFileSystem; + } + } + break; + } + for (;;) { + struct flock lock; + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + if (fcntl(fd, F_SETLKW, &lock) == -1) { + switch (errno) { + case EINTR: + continue; + case EBADF: + zig_unreachable(); + case EFAULT: + zig_unreachable(); + case EINVAL: + zig_unreachable(); + default: + close(fd); + return ErrorFileSystem; + } + } + break; + } + *out_file = fd; + return ErrorNone; +#endif +} + +Error os_file_mtime(OsFile file, OsTimeStamp *mtime) { +#if defined(ZIG_OS_WINDOWS) +#error unimplemented +#else + struct stat statbuf; + if (fstat(file, &statbuf) == -1) + return ErrorFileSystem; + + mtime->sec = statbuf.st_mtim.tv_sec; + mtime->nsec = statbuf.st_mtim.tv_nsec; + return ErrorNone; +#endif +} + +Error os_file_read(OsFile file, void *ptr, size_t *len) { +#if defined(ZIG_OS_WINDOWS) +#error unimplemented +#else + for (;;) { + ssize_t rc = read(file, ptr, *len); + if (rc == -1) { + switch (errno) { + case EINTR: + continue; + case EBADF: + zig_unreachable(); + case EFAULT: + zig_unreachable(); + case EISDIR: + zig_unreachable(); + default: + return ErrorFileSystem; + } + } + *len = rc; + return ErrorNone; + } +#endif +} + +Error os_file_read_all(OsFile file, Buf *contents) { + Error err; + size_t index = 0; + for (;;) { + size_t amt = buf_len(contents) - index; + + if (amt < 512) { + buf_resize(contents, buf_len(contents) + 512); + amt += 512; + } + + if ((err = os_file_read(file, buf_ptr(contents) + index, &amt))) + return err; + + if (amt == 0) { + buf_resize(contents, index); + return ErrorNone; + } + + index += amt; + } +} + +Error os_file_overwrite(OsFile file, Buf *contents) { +#if defined(ZIG_OS_WINDOWS) +#error unimplemented +#else + if (lseek(file, 0, SEEK_SET) == -1) + return ErrorFileSystem; + for (;;) { + if (write(file, buf_ptr(contents), buf_len(contents)) == -1) { + switch (errno) { + case EINTR: + continue; + case EINVAL: + zig_unreachable(); + case EBADF: + zig_unreachable(); + default: + return ErrorFileSystem; + } + } + return ErrorNone; + } +#endif +} + +void os_file_close(OsFile file) { + close(file); +} diff --git a/src/os.hpp b/src/os.hpp index a44fa8160..c64eccf8d 100644 --- a/src/os.hpp +++ b/src/os.hpp @@ -13,77 +13,11 @@ #include "error.hpp" #include "zig_llvm.h" #include "windows_sdk.h" +#include "result.hpp" #include #include -enum TermColor { - TermColorRed, - TermColorGreen, - TermColorCyan, - TermColorWhite, - TermColorBold, - TermColorReset, -}; - -enum TerminationId { - TerminationIdClean, - TerminationIdSignaled, - TerminationIdStopped, - TerminationIdUnknown, -}; - -struct Termination { - TerminationId how; - int code; -}; - -int os_init(void); - -void os_spawn_process(const char *exe, ZigList &args, Termination *term); -int os_exec_process(const char *exe, ZigList &args, - Termination *term, Buf *out_stderr, Buf *out_stdout); - -void os_path_dirname(Buf *full_path, Buf *out_dirname); -void os_path_split(Buf *full_path, Buf *out_dirname, Buf *out_basename); -void os_path_extname(Buf *full_path, Buf *out_basename, Buf *out_extname); -void os_path_join(Buf *dirname, Buf *basename, Buf *out_full_path); -int os_path_real(Buf *rel_path, Buf *out_abs_path); -Buf os_path_resolve(Buf **paths_ptr, size_t paths_len); -bool os_path_is_absolute(Buf *path); - -int os_get_global_cache_directory(Buf *out_tmp_path); - -int os_make_path(Buf *path); -int os_make_dir(Buf *path); - -void os_write_file(Buf *full_path, Buf *contents); -int os_copy_file(Buf *src_path, Buf *dest_path); - -int os_fetch_file(FILE *file, Buf *out_contents, bool skip_shebang); -int os_fetch_file_path(Buf *full_path, Buf *out_contents, bool skip_shebang); - -int os_get_cwd(Buf *out_cwd); - -bool os_stderr_tty(void); -void os_stderr_set_color(TermColor color); - -int os_buf_to_tmp_file(Buf *contents, Buf *suffix, Buf *out_tmp_path); -int os_delete_file(Buf *path); - -int os_file_exists(Buf *full_path, bool *result); - -int os_rename(Buf *src_path, Buf *dest_path); -double os_get_time(void); - -bool os_is_sep(uint8_t c); - -int os_self_exe_path(Buf *out_path); - -int os_get_win32_ucrt_include_path(ZigWindowsSDK *sdk, Buf *output_buf); -int os_get_win32_ucrt_lib_path(ZigWindowsSDK *sdk, Buf *output_buf, ZigLLVM_ArchType platform_type); -int os_get_win32_kern32_path(ZigWindowsSDK *sdk, Buf *output_buf, ZigLLVM_ArchType platform_type); - #if defined(__APPLE__) #define ZIG_OS_DARWIN #elif defined(_WIN32) @@ -116,4 +50,94 @@ int os_get_win32_kern32_path(ZigWindowsSDK *sdk, Buf *output_buf, ZigLLVM_ArchTy #define ZIG_OS_SEP_CHAR '/' #endif +enum TermColor { + TermColorRed, + TermColorGreen, + TermColorCyan, + TermColorWhite, + TermColorBold, + TermColorReset, +}; + +enum TerminationId { + TerminationIdClean, + TerminationIdSignaled, + TerminationIdStopped, + TerminationIdUnknown, +}; + +struct Termination { + TerminationId how; + int code; +}; + +#if defined(ZIG_OS_WINDOWS) +#define OsFile (void *) +#else +#define OsFile int +#endif + +struct OsTimeStamp { + uint64_t sec; + uint64_t nsec; +}; + +int os_init(void); + +void os_spawn_process(const char *exe, ZigList &args, Termination *term); +int os_exec_process(const char *exe, ZigList &args, + Termination *term, Buf *out_stderr, Buf *out_stdout); + +void os_path_dirname(Buf *full_path, Buf *out_dirname); +void os_path_split(Buf *full_path, Buf *out_dirname, Buf *out_basename); +void os_path_extname(Buf *full_path, Buf *out_basename, Buf *out_extname); +void os_path_join(Buf *dirname, Buf *basename, Buf *out_full_path); +int os_path_real(Buf *rel_path, Buf *out_abs_path); +Buf os_path_resolve(Buf **paths_ptr, size_t paths_len); +bool os_path_is_absolute(Buf *path); + +int os_get_global_cache_directory(Buf *out_tmp_path); + +Error ATTRIBUTE_MUST_USE os_make_path(Buf *path); +Error ATTRIBUTE_MUST_USE os_make_dir(Buf *path); + +Error ATTRIBUTE_MUST_USE os_file_open_r(Buf *full_path, OsFile *out_file); +Error ATTRIBUTE_MUST_USE os_file_open_lock_rw(Buf *full_path, OsFile *out_file); +Error ATTRIBUTE_MUST_USE os_file_mtime(OsFile file, OsTimeStamp *mtime); +Error ATTRIBUTE_MUST_USE os_file_read(OsFile file, void *ptr, size_t *len); +Error ATTRIBUTE_MUST_USE os_file_read_all(OsFile file, Buf *contents); +Error ATTRIBUTE_MUST_USE os_file_overwrite(OsFile file, Buf *contents); +void os_file_close(OsFile file); + +void os_write_file(Buf *full_path, Buf *contents); +int os_copy_file(Buf *src_path, Buf *dest_path); + +int os_fetch_file(FILE *file, Buf *out_contents, bool skip_shebang); +int os_fetch_file_path(Buf *full_path, Buf *out_contents, bool skip_shebang); + +int os_get_cwd(Buf *out_cwd); + +bool os_stderr_tty(void); +void os_stderr_set_color(TermColor color); + +int os_buf_to_tmp_file(Buf *contents, Buf *suffix, Buf *out_tmp_path); +int os_delete_file(Buf *path); + +int os_file_exists(Buf *full_path, bool *result); + +int os_rename(Buf *src_path, Buf *dest_path); +double os_get_time(void); + +bool os_is_sep(uint8_t c); + +Error ATTRIBUTE_MUST_USE os_self_exe_path(Buf *out_path); + +Error ATTRIBUTE_MUST_USE os_get_app_data_dir(Buf *out_path, const char *appname); + +int os_get_win32_ucrt_include_path(ZigWindowsSDK *sdk, Buf *output_buf); +int os_get_win32_ucrt_lib_path(ZigWindowsSDK *sdk, Buf *output_buf, ZigLLVM_ArchType platform_type); +int os_get_win32_kern32_path(ZigWindowsSDK *sdk, Buf *output_buf, ZigLLVM_ArchType platform_type); + +Error ATTRIBUTE_MUST_USE os_self_exe_shared_libs(ZigList &paths); + #endif diff --git a/src/util.cpp b/src/util.cpp index cafd2c3d8..060d7f8fb 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -43,3 +43,42 @@ uint32_t ptr_hash(const void *ptr) { bool ptr_eq(const void *a, const void *b) { return a == b; } + +// Ported from std/mem.zig. +bool SplitIterator_isSplitByte(SplitIterator *self, uint8_t byte) { + for (size_t i = 0; i < self->split_bytes.len; i += 1) { + if (byte == self->split_bytes.ptr[i]) { + return true; + } + } + return false; +} + +// Ported from std/mem.zig. +Optional> SplitIterator_next(SplitIterator *self) { + // move to beginning of token + while (self->index < self->buffer.len && + SplitIterator_isSplitByte(self, self->buffer.ptr[self->index])) + { + self->index += 1; + } + size_t start = self->index; + if (start == self->buffer.len) { + return {}; + } + + // move to end of token + while (self->index < self->buffer.len && + !SplitIterator_isSplitByte(self, self->buffer.ptr[self->index])) + { + self->index += 1; + } + size_t end = self->index; + + return Optional>::some(self->buffer.slice(start, end)); +} + +// Ported from std/mem.zig +SplitIterator memSplit(Slice buffer, Slice split_bytes) { + return SplitIterator{0, buffer, split_bytes}; +} diff --git a/src/util.hpp b/src/util.hpp index 1e02e3043..f18c369fb 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -254,4 +254,16 @@ static inline void memCopy(Slice dest, Slice src) { memcpy(dest.ptr, src.ptr, src.len * sizeof(T)); } +// Ported from std/mem.zig. +// Coordinate struct fields with memSplit function +struct SplitIterator { + size_t index; + Slice buffer; + Slice split_bytes; +}; + +bool SplitIterator_isSplitByte(SplitIterator *self, uint8_t byte); +Optional> SplitIterator_next(SplitIterator *self); +SplitIterator memSplit(Slice buffer, Slice split_bytes); + #endif