zig/lib/std/cache_hash.zig
2020-05-25 13:48:43 -04:00

364 lines
12 KiB
Zig

const Blake3 = @import("crypto.zig").Blake3;
const fs = @import("fs.zig");
const base64 = @import("base64.zig");
const ArrayList = @import("array_list.zig").ArrayList;
const debug = @import("debug.zig");
const testing = @import("testing.zig");
const mem = @import("mem.zig");
const fmt = @import("fmt.zig");
const Allocator = mem.Allocator;
const Buffer = @import("buffer.zig").Buffer;
const os = @import("os.zig");
const base64_encoder = fs.base64_encoder;
const base64_decoder = fs.base64_decoder;
const BIN_DIGEST_LEN = 48;
const BASE64_DIGEST_LEN = base64.Base64Encoder.calcSize(BIN_DIGEST_LEN);
pub const File = struct {
path: ?[]const u8,
stat: fs.File.Stat,
file_handle: os.fd_t,
bin_digest: [BIN_DIGEST_LEN]u8,
contents: ?[]const u8,
pub fn deinit(self: *@This(), alloc: *Allocator) void {
if (self.path) |owned_slice| {
alloc.free(owned_slice);
self.path = null;
}
if (self.contents) |owned_slice| {
alloc.free(owned_slice);
self.contents = null;
}
}
};
pub const CacheHash = struct {
alloc: *Allocator,
blake3: Blake3,
manifest_dir: fs.Dir,
manifest_file: ?fs.File,
manifest_dirty: bool,
force_check_manifest: bool,
files: ArrayList(File),
b64_digest: [BASE64_DIGEST_LEN]u8,
pub fn init(alloc: *Allocator, manifest_dir_path: []const u8) !@This() {
try fs.cwd().makePath(manifest_dir_path);
const manifest_dir = try fs.cwd().openDirTraverse(manifest_dir_path);
return CacheHash{
.alloc = alloc,
.blake3 = Blake3.init(),
.manifest_dir = manifest_dir,
.manifest_file = null,
.manifest_dirty = false,
.force_check_manifest = false,
.files = ArrayList(File).init(alloc),
.b64_digest = undefined,
};
}
pub fn addSlice(self: *@This(), val: []const u8) void {
debug.assert(self.manifest_file == null);
self.blake3.update(val);
self.blake3.update(&[_]u8{0});
}
pub fn addBool(self: *@This(), val: bool) void {
debug.assert(self.manifest_file == null);
self.blake3.update(&[_]u8{@boolToInt(val)});
}
pub fn addInt(self: *@This(), val: var) void {
debug.assert(self.manifest_file == null);
switch (@typeInfo(@TypeOf(val))) {
.Int => |int_info| {
if (int_info.bits == 0 or int_info.bits % 8 != 0) {
@compileError("Unsupported integer size. Please use a multiple of 8, manually convert to a u8 slice.");
}
const buf_len = @divExact(int_info.bits, 8);
var buf: [buf_len]u8 = undefined;
mem.writeIntNative(@TypeOf(val), &buf, val);
self.addSlice(&buf);
self.blake3.update(&[_]u8{0});
},
else => @compileError("Type must be an integer."),
}
}
pub fn add(self: *@This(), val: var) void {
debug.assert(self.manifest_file == null);
const val_type = @TypeOf(val);
switch (@typeInfo(val_type)) {
.Int => self.addInt(val),
.Bool => self.addBool(val),
.Array => |array_info| if (array_info.child == u8) {
self.addSlice(val[0..]);
} else {
@compileError("Unsupported array type");
},
.Pointer => |ptr_info| switch (ptr_info.size) {
.Slice => if (ptr_info.child == u8) {
self.addSlice(val);
},
.One => self.add(val.*),
else => {
@compileLog("Pointer type: ", ptr_info.size, ptr_info.child);
@compileError("Unsupported pointer type");
},
},
else => @compileError("Unsupported type"),
}
}
pub fn addFile(self: *@This(), file_path: []const u8) !void {
debug.assert(self.manifest_file == null);
var cache_hash_file = try self.files.addOne();
cache_hash_file.path = try fs.path.resolve(self.alloc, &[_][]const u8{file_path});
self.addSlice(cache_hash_file.path.?);
}
pub fn hit(self: *@This()) !?[BASE64_DIGEST_LEN]u8 {
debug.assert(self.manifest_file == null);
var bin_digest: [BIN_DIGEST_LEN]u8 = undefined;
self.blake3.final(&bin_digest);
base64_encoder.encode(self.b64_digest[0..], &bin_digest);
if (self.files.toSlice().len == 0 and !self.force_check_manifest) {
return self.b64_digest;
}
self.blake3 = Blake3.init();
self.blake3.update(&bin_digest);
{
const manifest_file_path = try fmt.allocPrint(self.alloc, "{}.txt", .{self.b64_digest});
defer self.alloc.free(manifest_file_path);
self.manifest_file = try self.manifest_dir.createFile(manifest_file_path, .{ .read = true, .truncate = false });
}
// create a buffer instead of using readAllAlloc
// See: https://github.com/ziglang/zig/issues/4656
var file_buffer = try Buffer.initCapacity(self.alloc, 16 * 1024);
defer file_buffer.deinit();
// TODO: Figure out a good max value?
try self.manifest_file.?.inStream().stream.readAllBuffer(&file_buffer, 16 * 1024);
const file_contents = file_buffer.toSliceConst();
const input_file_count = self.files.len;
var any_file_changed = false;
var line_iter = mem.tokenize(file_contents, "\n");
var idx: usize = 0;
while (line_iter.next()) |line| {
defer idx += 1;
var cache_hash_file: *File = undefined;
if (idx < input_file_count) {
cache_hash_file = self.files.ptrAt(idx);
} else {
cache_hash_file = try self.files.addOne();
cache_hash_file.path = null;
}
var iter = mem.tokenize(line, " ");
const file_handle_str = iter.next() orelse return error.InvalidFormat;
const mtime_nsec_str = iter.next() orelse return error.InvalidFormat;
const digest_str = iter.next() orelse return error.InvalidFormat;
const file_path = iter.rest();
cache_hash_file.file_handle = fmt.parseInt(os.fd_t, file_handle_str, 10) catch return error.InvalidFormat;
cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat;
base64_decoder.decode(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat;
if (file_path.len == 0) {
return error.InvalidFormat;
}
if (cache_hash_file.path != null and !mem.eql(u8, file_path, cache_hash_file.path.?)) {
return error.InvalidFormat;
}
const this_file = fs.cwd().openFile(cache_hash_file.path.?, .{ .read = true }) catch {
self.manifest_file.?.close();
self.manifest_file = null;
return error.CacheUnavailable;
};
defer this_file.close();
cache_hash_file.stat = try this_file.stat();
// TODO: check mtime
if (false) {} else {
self.manifest_dirty = true;
// TODO: check for problematic timestamp
var actual_digest: [BIN_DIGEST_LEN]u8 = undefined;
try hash_file(self.alloc, &actual_digest, &this_file);
if (!mem.eql(u8, &cache_hash_file.bin_digest, &actual_digest)) {
mem.copy(u8, &cache_hash_file.bin_digest, &actual_digest);
// keep going until we have the input file digests
any_file_changed = true;
}
}
if (!any_file_changed) {
self.blake3.update(&cache_hash_file.bin_digest);
}
}
if (any_file_changed) {
// cache miss
// keep the manifest file open (TODO: with rw lock)
// reset the hash
self.blake3 = Blake3.init();
self.blake3.update(&bin_digest);
try self.files.resize(input_file_count);
for (self.files.toSlice()) |file| {
self.blake3.update(&file.bin_digest);
}
return null;
}
if (idx < input_file_count or idx == 0) {
self.manifest_dirty = true;
while (idx < input_file_count) : (idx += 1) {
var cache_hash_file = self.files.ptrAt(idx);
self.populate_file_hash(cache_hash_file) catch |err| {
self.manifest_file.?.close();
self.manifest_file = null;
return error.CacheUnavailable;
};
}
return null;
}
return try self.final();
}
pub fn populate_file_hash(self: *@This(), cache_hash_file: *File) !void {
debug.assert(cache_hash_file.path != null);
const this_file = try fs.cwd().openFile(cache_hash_file.path.?, .{});
defer this_file.close();
cache_hash_file.stat = try this_file.stat();
// TODO: check for problematic timestamp
try hash_file(self.alloc, &cache_hash_file.bin_digest, &this_file);
self.blake3.update(&cache_hash_file.bin_digest);
}
pub fn final(self: *@This()) ![BASE64_DIGEST_LEN]u8 {
debug.assert(self.manifest_file != null);
var bin_digest: [BIN_DIGEST_LEN]u8 = undefined;
self.blake3.final(&bin_digest);
var out_digest: [BASE64_DIGEST_LEN]u8 = undefined;
base64_encoder.encode(&out_digest, &bin_digest);
return out_digest;
}
pub fn write_manifest(self: *@This()) !void {
debug.assert(self.manifest_file != null);
var encoded_digest = try Buffer.initSize(self.alloc, BASE64_DIGEST_LEN);
defer encoded_digest.deinit();
var contents = try Buffer.init(self.alloc, "");
defer contents.deinit();
for (self.files.toSlice()) |file| {
base64_encoder.encode(encoded_digest.toSlice(), &file.bin_digest);
try contents.print("{} {} {} {}\n", .{ file.file_handle, file.stat.mtime, encoded_digest.toSlice(), file.path });
}
try self.manifest_file.?.seekTo(0);
try self.manifest_file.?.writeAll(contents.toSlice());
}
pub fn release(self: *@This()) void {
debug.assert(self.manifest_file != null);
if (self.manifest_dirty) {
self.write_manifest() catch |err| {
debug.warn("Unable to write cache file '{}': {}\n", .{ self.b64_digest, err });
};
}
self.manifest_file.?.close();
for (self.files.toSlice()) |*file| {
file.deinit(self.alloc);
}
self.files.deinit();
self.manifest_dir.close();
}
};
fn hash_file(alloc: *Allocator, bin_digest: []u8, handle: *const fs.File) !void {
var blake3 = Blake3.init();
var in_stream = handle.inStream().stream;
const contents = try handle.inStream().stream.readAllAlloc(alloc, 64 * 1024);
defer alloc.free(contents);
blake3.update(contents);
blake3.final(bin_digest);
}
test "cache file and the recall it" {
const cwd = fs.cwd();
const temp_manifest_dir = "temp_manifest_dir";
try cwd.writeFile("test.txt", "Hello, world!\n");
var digest1: [BASE64_DIGEST_LEN]u8 = undefined;
var digest2: [BASE64_DIGEST_LEN]u8 = undefined;
{
var ch = try CacheHash.init(testing.allocator, temp_manifest_dir);
defer ch.release();
ch.add(true);
ch.add(@as(u16, 1234));
ch.add("1234");
try ch.addFile("test.txt");
// There should be nothing in the cache
debug.assert((try ch.hit()) == null);
digest1 = try ch.final();
}
{
var ch = try CacheHash.init(testing.allocator, temp_manifest_dir);
defer ch.release();
ch.add(true);
ch.add(@as(u16, 1234));
ch.add("1234");
try ch.addFile("test.txt");
// Cache hit! We just "built" the same file
digest2 = (try ch.hit()).?;
}
debug.assert(mem.eql(u8, digest1[0..], digest2[0..]));
try cwd.deleteTree(temp_manifest_dir);
}