std: Add a gzip decoder
parent
1edf097871
commit
9fe4c89230
|
@ -128,6 +128,7 @@ pub fn build(b: *Builder) !void {
|
|||
"README.md",
|
||||
".z.0",
|
||||
".z.9",
|
||||
".gz",
|
||||
"rfc1951.txt",
|
||||
},
|
||||
});
|
||||
|
|
|
@ -6,8 +6,10 @@
|
|||
const std = @import("std.zig");
|
||||
|
||||
pub const deflate = @import("compress/deflate.zig");
|
||||
pub const gzip = @import("compress/gzip.zig");
|
||||
pub const zlib = @import("compress/zlib.zig");
|
||||
|
||||
test "" {
|
||||
_ = gzip;
|
||||
_ = zlib;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,248 @@
|
|||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2015-2020 Zig Contributors
|
||||
// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
|
||||
// The MIT license requires this copyright notice to be included in all copies
|
||||
// and substantial portions of the software.
|
||||
//
|
||||
// Decompressor for GZIP data streams (RFC1952)
|
||||
|
||||
const std = @import("std");
|
||||
const io = std.io;
|
||||
const fs = std.fs;
|
||||
const testing = std.testing;
|
||||
const mem = std.mem;
|
||||
const deflate = std.compress.deflate;
|
||||
|
||||
// Flags for the FLG field in the header
|
||||
const FTEXT = 1 << 0;
|
||||
const FHCRC = 1 << 1;
|
||||
const FEXTRA = 1 << 2;
|
||||
const FNAME = 1 << 3;
|
||||
const FCOMMENT = 1 << 4;
|
||||
|
||||
pub fn GzipStream(comptime ReaderType: type) type {
|
||||
return struct {
|
||||
const Self = @This();
|
||||
|
||||
pub const Error = ReaderType.Error ||
|
||||
deflate.InflateStream(ReaderType).Error ||
|
||||
error{ CorruptedData, WrongChecksum };
|
||||
pub const Reader = io.Reader(*Self, Error, read);
|
||||
|
||||
allocator: *mem.Allocator,
|
||||
inflater: deflate.InflateStream(ReaderType),
|
||||
in_reader: ReaderType,
|
||||
hasher: std.hash.Crc32,
|
||||
window_slice: []u8,
|
||||
read_amt: usize,
|
||||
|
||||
info: struct {
|
||||
filename: ?[]const u8,
|
||||
comment: ?[]const u8,
|
||||
modification_time: u32,
|
||||
},
|
||||
|
||||
fn init(allocator: *mem.Allocator, source: ReaderType) !Self {
|
||||
// gzip header format is specified in RFC1952
|
||||
const header = try source.readBytesNoEof(10);
|
||||
|
||||
// Check the ID1/ID2 fields
|
||||
if (header[0] != 0x1f or header[1] != 0x8b)
|
||||
return error.BadHeader;
|
||||
|
||||
const CM = header[2];
|
||||
// The CM field must be 8 to indicate the use of DEFLATE
|
||||
if (CM != 8) return error.InvalidCompression;
|
||||
// Flags
|
||||
const FLG = header[3];
|
||||
// Modification time, as a Unix timestamp.
|
||||
// If zero there's no timestamp available.
|
||||
const MTIME = mem.readIntLittle(u32, header[4..8]);
|
||||
// Extra flags
|
||||
const XFL = header[8];
|
||||
// Operating system where the compression took place
|
||||
const OS = header[9];
|
||||
|
||||
if (FLG & FEXTRA != 0) {
|
||||
// Skip the extra data, we could read and expose it to the user
|
||||
// if somebody needs it.
|
||||
const len = try source.readIntLittle(u16);
|
||||
try source.skipBytes(len, .{});
|
||||
}
|
||||
|
||||
var filename: ?[]const u8 = null;
|
||||
if (FLG & FNAME != 0) {
|
||||
filename = try source.readUntilDelimiterAlloc(
|
||||
allocator,
|
||||
0,
|
||||
std.math.maxInt(usize),
|
||||
);
|
||||
}
|
||||
errdefer if (filename) |p| allocator.free(p);
|
||||
|
||||
var comment: ?[]const u8 = null;
|
||||
if (FLG & FCOMMENT != 0) {
|
||||
comment = try source.readUntilDelimiterAlloc(
|
||||
allocator,
|
||||
0,
|
||||
std.math.maxInt(usize),
|
||||
);
|
||||
}
|
||||
errdefer if (comment) |p| allocator.free(p);
|
||||
|
||||
if (FLG & FHCRC != 0) {
|
||||
// TODO: Evaluate and check the header checksum. The stdlib has
|
||||
// no CRC16 yet :(
|
||||
_ = try source.readIntLittle(u16);
|
||||
}
|
||||
|
||||
// The RFC doesn't say anything about the DEFLATE window size to be
|
||||
// used, default to 32K.
|
||||
var window_slice = try allocator.alloc(u8, 32 * 1024);
|
||||
|
||||
return Self{
|
||||
.allocator = allocator,
|
||||
.inflater = deflate.inflateStream(source, window_slice),
|
||||
.in_reader = source,
|
||||
.hasher = std.hash.Crc32.init(),
|
||||
.window_slice = window_slice,
|
||||
.info = .{
|
||||
.filename = filename,
|
||||
.comment = comment,
|
||||
.modification_time = MTIME,
|
||||
},
|
||||
.read_amt = 0,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
self.allocator.free(self.window_slice);
|
||||
if (self.info.filename) |filename|
|
||||
self.allocator.free(filename);
|
||||
if (self.info.comment) |comment|
|
||||
self.allocator.free(comment);
|
||||
}
|
||||
|
||||
// Implements the io.Reader interface
|
||||
pub fn read(self: *Self, buffer: []u8) Error!usize {
|
||||
if (buffer.len == 0)
|
||||
return 0;
|
||||
|
||||
// Read from the compressed stream and update the computed checksum
|
||||
const r = try self.inflater.read(buffer);
|
||||
if (r != 0) {
|
||||
self.hasher.update(buffer[0..r]);
|
||||
self.read_amt += r;
|
||||
return r;
|
||||
}
|
||||
|
||||
// We've reached the end of stream, check if the checksum matches
|
||||
const hash = try self.in_reader.readIntLittle(u32);
|
||||
if (hash != self.hasher.final())
|
||||
return error.WrongChecksum;
|
||||
|
||||
// The ISIZE field is the size of the uncompressed input modulo 2^32
|
||||
const input_size = try self.in_reader.readIntLittle(u32);
|
||||
if (self.read_amt & 0xffffffff != input_size)
|
||||
return error.CorruptedData;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
pub fn reader(self: *Self) Reader {
|
||||
return .{ .context = self };
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn gzipStream(allocator: *mem.Allocator, reader: anytype) !GzipStream(@TypeOf(reader)) {
|
||||
return GzipStream(@TypeOf(reader)).init(allocator, reader);
|
||||
}
|
||||
|
||||
fn testReader(data: []const u8, comptime expected: []const u8) !void {
|
||||
var in_stream = io.fixedBufferStream(data);
|
||||
|
||||
var gzip_stream = try gzipStream(testing.allocator, in_stream.reader());
|
||||
defer gzip_stream.deinit();
|
||||
|
||||
// Read and decompress the whole file
|
||||
const buf = try gzip_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize));
|
||||
defer testing.allocator.free(buf);
|
||||
// Calculate its SHA256 hash and check it against the reference
|
||||
var hash: [32]u8 = undefined;
|
||||
std.crypto.hash.sha2.Sha256.hash(buf, hash[0..], .{});
|
||||
|
||||
assertEqual(expected, &hash);
|
||||
}
|
||||
|
||||
// Assert `expected` == `input` where `input` is a bytestring.
|
||||
pub fn assertEqual(comptime expected: []const u8, input: []const u8) void {
|
||||
var expected_bytes: [expected.len / 2]u8 = undefined;
|
||||
for (expected_bytes) |*r, i| {
|
||||
r.* = std.fmt.parseInt(u8, expected[2 * i .. 2 * i + 2], 16) catch unreachable;
|
||||
}
|
||||
|
||||
testing.expectEqualSlices(u8, &expected_bytes, input);
|
||||
}
|
||||
|
||||
// All the test cases are obtained by compressing the RFC1952 text
|
||||
//
|
||||
// https://tools.ietf.org/rfc/rfc1952.txt length=25037 bytes
|
||||
// SHA256=164ef0897b4cbec63abf1b57f069f3599bd0fb7c72c2a4dee21bd7e03ec9af67
|
||||
test "compressed data" {
|
||||
try testReader(
|
||||
@embedFile("rfc1952.txt.gz"),
|
||||
"164ef0897b4cbec63abf1b57f069f3599bd0fb7c72c2a4dee21bd7e03ec9af67",
|
||||
);
|
||||
}
|
||||
|
||||
test "sanity checks" {
|
||||
// Truncated header
|
||||
testing.expectError(
|
||||
error.EndOfStream,
|
||||
testReader(&[_]u8{ 0x1f, 0x8B }, ""),
|
||||
);
|
||||
// Wrong CM
|
||||
testing.expectError(
|
||||
error.InvalidCompression,
|
||||
testReader(&[_]u8{
|
||||
0x1f, 0x8b, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03,
|
||||
}, ""),
|
||||
);
|
||||
// Wrong checksum
|
||||
testing.expectError(
|
||||
error.WrongChecksum,
|
||||
testReader(&[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
}, ""),
|
||||
);
|
||||
// Truncated checksum
|
||||
testing.expectError(
|
||||
error.EndOfStream,
|
||||
testReader(&[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00,
|
||||
}, ""),
|
||||
);
|
||||
// Wrong initial size
|
||||
testing.expectError(
|
||||
error.CorruptedData,
|
||||
testReader(&[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x01,
|
||||
}, ""),
|
||||
);
|
||||
// Truncated initial size field
|
||||
testing.expectError(
|
||||
error.EndOfStream,
|
||||
testReader(&[_]u8{
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00,
|
||||
}, ""),
|
||||
);
|
||||
}
|
Binary file not shown.
Loading…
Reference in New Issue