diff --git a/zlibWrapper/Makefile b/zlibWrapper/Makefile index 9ad1c01d..ed296931 100644 --- a/zlibWrapper/Makefile +++ b/zlibWrapper/Makefile @@ -24,7 +24,7 @@ LDFLAGS = $(LOC) RM = rm -f -all: clean test testzstd +all: clean test testzstd testfitblk test: example ./example @@ -35,8 +35,14 @@ testdll: example_d testzstd: example_zstd ./example_zstd +testfitblk: fitblk + ./fitblk 10240 <../zstd_compression_format.md + .c.o: $(CC) $(CFLAGS) -c -o $@ $< + +fitblk: $(EXAMPLE_PATH)/fitblk.o $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.o + $(CC) $(LDFLAGS) -o $@ $(EXAMPLE_PATH)/fitblk.o $(ZLIBWRAPPER_PATH)/zstdTurnedOn_zlibwrapper.o $(STATICLIB) example: $(EXAMPLE_PATH)/example.o $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.o $(CC) $(LDFLAGS) -o $@ $(EXAMPLE_PATH)/example.o $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.o $(STATICLIB) @@ -47,6 +53,9 @@ example_d: $(EXAMPLE_PATH)/example.o $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.o example_zstd: $(EXAMPLE_PATH)/example.o $(ZLIBWRAPPER_PATH)/zstdTurnedOn_zlibwrapper.o $(CC) $(LDFLAGS) -o $@ $(EXAMPLE_PATH)/example.o $(ZLIBWRAPPER_PATH)/zstdTurnedOn_zlibwrapper.o $(STATICLIB) +$(EXAMPLE_PATH)/fitblk.o: $(EXAMPLE_PATH)/fitblk.c + $(CC) $(CFLAGS) -I. -c -o $@ $(EXAMPLE_PATH)/fitblk.c + $(EXAMPLE_PATH)/example.o: $(EXAMPLE_PATH)/example.c $(CC) $(CFLAGS) -I. -c -o $@ $(EXAMPLE_PATH)/example.c diff --git a/zlibWrapper/examples/fitblk.c b/zlibWrapper/examples/fitblk.c new file mode 100644 index 00000000..35b067b4 --- /dev/null +++ b/zlibWrapper/examples/fitblk.c @@ -0,0 +1,246 @@ +/* fitblk.c: example of fitting compressed output to a specified size + Not copyrighted -- provided to the public domain + Version 1.1 25 November 2004 Mark Adler */ + +/* Version history: + 1.0 24 Nov 2004 First version + 1.1 25 Nov 2004 Change deflateInit2() to deflateInit() + Use fixed-size, stack-allocated raw buffers + Simplify code moving compression to subroutines + Use assert() for internal errors + Add detailed description of approach + */ + +/* Approach to just fitting a requested compressed size: + + fitblk performs three compression passes on a portion of the input + data in order to determine how much of that input will compress to + nearly the requested output block size. The first pass generates + enough deflate blocks to produce output to fill the requested + output size plus a specfied excess amount (see the EXCESS define + below). The last deflate block may go quite a bit past that, but + is discarded. The second pass decompresses and recompresses just + the compressed data that fit in the requested plus excess sized + buffer. The deflate process is terminated after that amount of + input, which is less than the amount consumed on the first pass. + The last deflate block of the result will be of a comparable size + to the final product, so that the header for that deflate block and + the compression ratio for that block will be about the same as in + the final product. The third compression pass decompresses the + result of the second step, but only the compressed data up to the + requested size minus an amount to allow the compressed stream to + complete (see the MARGIN define below). That will result in a + final compressed stream whose length is less than or equal to the + requested size. Assuming sufficient input and a requested size + greater than a few hundred bytes, the shortfall will typically be + less than ten bytes. + + If the input is short enough that the first compression completes + before filling the requested output size, then that compressed + stream is return with no recompression. + + EXCESS is chosen to be just greater than the shortfall seen in a + two pass approach similar to the above. That shortfall is due to + the last deflate block compressing more efficiently with a smaller + header on the second pass. EXCESS is set to be large enough so + that there is enough uncompressed data for the second pass to fill + out the requested size, and small enough so that the final deflate + block of the second pass will be close in size to the final deflate + block of the third and final pass. MARGIN is chosen to be just + large enough to assure that the final compression has enough room + to complete in all cases. + */ + +#include +#include +#include +//#include "zlib.h" +#include "zstd_zlibwrapper.h" + +#define local static + +/* print nastygram and leave */ +local void quit(char *why) +{ + fprintf(stderr, "fitblk abort: %s\n", why); + exit(1); +} + +#define RAWLEN 4096 /* intermediate uncompressed buffer size */ + +/* compress from file to def until provided buffer is full or end of + input reached; return last deflate() return value, or Z_ERRNO if + there was read error on the file */ +local int partcompress(FILE *in, z_streamp def) +{ + int ret, flush; + unsigned char raw[RAWLEN]; + + flush = Z_NO_FLUSH; + do { + def->avail_in = fread(raw, 1, RAWLEN, in); + printf("def->avail_in=%d\n", def->avail_in); + if (ferror(in)) + return Z_ERRNO; + def->next_in = raw; + if (feof(in)) + flush = Z_FINISH; + ret = deflate(def, flush); + assert(ret != Z_STREAM_ERROR); + } while (def->avail_out != 0 && flush == Z_NO_FLUSH); + return ret; +} + +/* recompress from inf's input to def's output; the input for inf and + the output for def are set in those structures before calling; + return last deflate() return value, or Z_MEM_ERROR if inflate() + was not able to allocate enough memory when it needed to */ +local int recompress(z_streamp inf, z_streamp def) +{ + int ret, flush; + unsigned char raw[RAWLEN]; + + flush = Z_NO_FLUSH; + do { + /* decompress */ + inf->avail_out = RAWLEN; + printf("inf->avail_out=%d\n", inf->avail_out); + + inf->next_out = raw; + ret = inflate(inf, Z_NO_FLUSH); + assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && + ret != Z_NEED_DICT); + if (ret == Z_MEM_ERROR) + return ret; + + /* compress what was decompresed until done or no room */ + def->avail_in = RAWLEN - inf->avail_out; + def->next_in = raw; + if (inf->avail_out != 0) + flush = Z_FINISH; + ret = deflate(def, flush); + assert(ret != Z_STREAM_ERROR); + } while (ret != Z_STREAM_END && def->avail_out != 0); + return ret; +} + +#define EXCESS 256 /* empirically determined stream overage */ +#define MARGIN 8 /* amount to back off for completion */ + +/* compress from stdin to fixed-size block on stdout */ +int main(int argc, char **argv) +{ + int ret; /* return code */ + unsigned size; /* requested fixed output block size */ + unsigned have; /* bytes written by deflate() call */ + unsigned char *blk; /* intermediate and final stream */ + unsigned char *tmp; /* close to desired size stream */ + z_stream def, inf; /* zlib deflate and inflate states */ + + /* get requested output size */ + if (argc != 2) + quit("need one argument: size of output block"); + ret = strtol(argv[1], argv + 1, 10); + if (argv[1][0] != 0) + quit("argument must be a number"); + if (ret < 8) /* 8 is minimum zlib stream size */ + quit("need positive size of 8 or greater"); + size = (unsigned)ret; + + printf("zlib version %s = 0x%04x, compile flags = 0x%lx\n", + ZLIB_VERSION, ZLIB_VERNUM, zlibCompileFlags()); + if (isUsingZSTD()) printf("zstd version %s\n", zstdVersion()); + + /* allocate memory for buffers and compression engine */ + blk = malloc(size + EXCESS); + def.zalloc = Z_NULL; + def.zfree = Z_NULL; + def.opaque = Z_NULL; + ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); + if (ret != Z_OK || blk == NULL) + quit("out of memory"); + + /* compress from stdin until output full, or no more input */ + def.avail_out = size + EXCESS; + def.next_out = blk; + ret = partcompress(stdin, &def); + if (ret == Z_ERRNO) + quit("error reading input"); + +printf("partcompress def.avail_out=%d\n", def.avail_out); + /* if it all fit, then size was undersubscribed -- done! */ + if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { + /* write block to stdout */ + have = size + EXCESS - def.avail_out; + // if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) + // quit("error writing output"); + + /* clean up and print results to stderr */ + ret = deflateEnd(&def); + assert(ret != Z_STREAM_ERROR); + free(blk); + fprintf(stderr, + "%u bytes unused out of %u requested (all input)\n", + size - have, size); + return 0; + } + + /* it didn't all fit -- set up for recompression */ + inf.zalloc = Z_NULL; + inf.zfree = Z_NULL; + inf.opaque = Z_NULL; + inf.avail_in = 0; + inf.next_in = Z_NULL; + ret = inflateInit(&inf); + tmp = malloc(size + EXCESS); + if (ret != Z_OK || tmp == NULL) + quit("out of memory"); + ret = deflateReset(&def); + assert(ret != Z_STREAM_ERROR); + + /* do first recompression close to the right amount */ + inf.avail_in = size + EXCESS; + inf.next_in = blk; + def.avail_out = size + EXCESS; + def.next_out = tmp; +printf("recompress1 inf.avail_in=%d def.avail_out=%d\n", inf.avail_in, def.avail_out); + ret = recompress(&inf, &def); +printf("recompress1 inf.avail_in=%d def.avail_out=%d\n", inf.avail_in, def.avail_out); + if (ret == Z_MEM_ERROR) + quit("out of memory"); + + /* set up for next reocmpression */ + ret = inflateReset(&inf); + assert(ret != Z_STREAM_ERROR); + ret = deflateReset(&def); + assert(ret != Z_STREAM_ERROR); + + /* do second and final recompression (third compression) */ + inf.avail_in = size - MARGIN; /* assure stream will complete */ + inf.next_in = tmp; + def.avail_out = size; + def.next_out = blk; +printf("recompress2 inf.avail_in=%d def.avail_out=%d\n", inf.avail_in, def.avail_out); + ret = recompress(&inf, &def); +printf("recompress2 inf.avail_in=%d def.avail_out=%d\n", inf.avail_in, def.avail_out); + if (ret == Z_MEM_ERROR) + quit("out of memory"); + assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */ + + /* done -- write block to stdout */ + have = size - def.avail_out; +// if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) +// quit("error writing output"); + + /* clean up and print results to stderr */ + free(tmp); + ret = inflateEnd(&inf); + assert(ret != Z_STREAM_ERROR); + ret = deflateEnd(&def); + assert(ret != Z_STREAM_ERROR); + free(blk); + fprintf(stderr, + "%u bytes unused out of %u requested (%lu input)\n", + size - have, size, def.total_in); + return 0; +}