zig/lib/std/compress/zlib.zig

// SPDX-License-Identifier: MIT
// Copyright (c) 2015-2020 Zig Contributors
// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
// The MIT license requires this copyright notice to be included in all copies
// and substantial portions of the software.
//
// Decompressor for ZLIB data streams (RFC1950)

const std = @import("std");
const io = std.io;
const fs = std.fs;
const testing = std.testing;
const mem = std.mem;
const deflate = std.compress.deflate;

pub fn ZlibStream(comptime ReaderType: type) type {
    return struct {
        const Self = @This();

        pub const Error = ReaderType.Error ||
            deflate.InflateStream(ReaderType).Error ||
            error{ WrongChecksum, Unsupported };
        pub const Reader = io.Reader(*Self, Error, read);

        allocator: *mem.Allocator,
        inflater: deflate.InflateStream(ReaderType),
        in_reader: ReaderType,
        hasher: std.hash.Adler32,
        window_slice: []u8,

        fn init(allocator: *mem.Allocator, source: ReaderType) !Self {
            // Zlib header format is specified in RFC1950
            const header = try source.readBytesNoEof(2);

            const CM = @truncate(u4, header[0]);
            const CINFO = @truncate(u4, header[0] >> 4);
            const FCHECK = @truncate(u5, header[1]);
            const FDICT = @truncate(u1, header[1] >> 5);

            if ((@as(u16, header[0]) << 8 | header[1]) % 31 != 0)
                return error.BadHeader;

            // The CM field must be 8 to indicate the use of DEFLATE
            if (CM != 8) return error.InvalidCompression;
            // CINFO is the base-2 logarithm of the window size, minus 8.
            // Values above 7 are unspecified and therefore rejected.
            if (CINFO > 7) return error.InvalidWindowSize;
            const window_size: u16 = @as(u16, 1) << (CINFO + 8);

            // TODO: Support this case
            if (FDICT != 0)
                return error.Unsupported;

            var window_slice = try allocator.alloc(u8, window_size);

            return Self{
                .allocator = allocator,
                .inflater = deflate.inflateStream(source, window_slice),
                .in_reader = source,
                .hasher = std.hash.Adler32.init(),
                .window_slice = window_slice,
            };
        }

        pub fn deinit(self: *Self) void {
            self.allocator.free(self.window_slice);
        }

        // Implements the io.Reader interface
        pub fn read(self: *Self, buffer: []u8) Error!usize {
            if (buffer.len == 0)
                return 0;

            // Read from the compressed stream and update the computed checksum
            const r = try self.inflater.read(buffer);
            if (r != 0) {
                self.hasher.update(buffer[0..r]);
                return r;
            }

            // We've reached the end of stream, check if the checksum matches
            const hash = try self.in_reader.readIntBig(u32);
            if (hash != self.hasher.final())
                return error.WrongChecksum;

            return 0;
        }

        pub fn reader(self: *Self) Reader {
            return .{ .context = self };
        }
    };
}

pub fn zlibStream(allocator: *mem.Allocator, reader: anytype) !ZlibStream(@TypeOf(reader)) {
    return ZlibStream(@TypeOf(reader)).init(allocator, reader);
}

fn testReader(data: []const u8, comptime expected: []const u8) !void {
    var in_stream = io.fixedBufferStream(data);

    var zlib_stream = try zlibStream(testing.allocator, in_stream.reader());
    defer zlib_stream.deinit();

    // Read and decompress the whole file
    const buf = try zlib_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize));
    defer testing.allocator.free(buf);
    // Calculate its SHA256 hash and check it against the reference
    var hash: [32]u8 = undefined;
    std.crypto.hash.sha2.Sha256.hash(buf, hash[0..], .{});

    assertEqual(expected, &hash);
}

// Assert `expected` == `input` where `input` is a bytestring.
pub fn assertEqual(comptime expected: []const u8, input: []const u8) void {
    var expected_bytes: [expected.len / 2]u8 = undefined;
    for (expected_bytes) |*r, i| {
        r.* = std.fmt.parseInt(u8, expected[2 * i .. 2 * i + 2], 16) catch unreachable;
    }

    testing.expectEqualSlices(u8, &expected_bytes, input);
}

// All the test cases are obtained by compressing the RFC1950 text
//
// https://tools.ietf.org/rfc/rfc1950.txt length=36944 bytes
// SHA256=5ebf4b5b7fe1c3a0c0ab9aa3ac8c0f3853a7dc484905e76e03b0b0f301350009
test "compressed data" {
    // Compressed with compression level = 0
    try testReader(
        @embedFile("rfc1951.txt.z.0"),
        "5ebf4b5b7fe1c3a0c0ab9aa3ac8c0f3853a7dc484905e76e03b0b0f301350009",
    );
    // Compressed with compression level = 9
    try testReader(
        @embedFile("rfc1951.txt.z.9"),
        "5ebf4b5b7fe1c3a0c0ab9aa3ac8c0f3853a7dc484905e76e03b0b0f301350009",
    );
    // Compressed with compression level = 9 and fixed Huffman codes
    try testReader(
        @embedFile("rfc1951.txt.fixed.z.9"),
        "5ebf4b5b7fe1c3a0c0ab9aa3ac8c0f3853a7dc484905e76e03b0b0f301350009",
    );
}

test "don't read past deflate stream's end" {
    try testReader(
        &[_]u8{
            0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0xc0, 0x00, 0xc1, 0xff,
            0xff, 0x43, 0x30, 0x03, 0x03, 0xc3, 0xff, 0xff, 0xff, 0x01,
            0x83, 0x95, 0x0b, 0xf5,
        },
        // SHA256 of
        // 00ff 0000 00ff 0000 00ff 00ff ffff 00ff ffff 0000 0000 ffff ff
        "3bbba1cc65408445c81abb61f3d2b86b1b60ee0d70b4c05b96d1499091a08c93",
    );
}

test "sanity checks" {
    // Truncated header
    testing.expectError(
        error.EndOfStream,
        testReader(&[_]u8{0x78}, ""),
    );
    // Failed FCHECK check
    testing.expectError(
        error.BadHeader,
        testReader(&[_]u8{ 0x78, 0x9D }, ""),
    );
    // Wrong CM
    testing.expectError(
        error.InvalidCompression,
        testReader(&[_]u8{ 0x79, 0x94 }, ""),
    );
    // Wrong CINFO
    testing.expectError(
        error.InvalidWindowSize,
        testReader(&[_]u8{ 0x88, 0x98 }, ""),
    );
    // Wrong checksum
    testing.expectError(
        error.WrongChecksum,
        testReader(&[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, ""),
    );
    // Truncated checksum
    testing.expectError(
        error.EndOfStream,
        testReader(&[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00 }, ""),
    );
}