diff --git a/README.md b/README.md index eee92f91..f37be454 100644 --- a/README.md +++ b/README.md @@ -67,11 +67,11 @@ Previous charts provide results applicable to typical file and stream scenarios The smaller the amount of data to compress, the more difficult it is to compress. This problem is common to all compression algorithms, and reason is, compression algorithms learn from past data how to compress future data. But at the beginning of a new data set, there is no "past" to build upon. To solve this situation, Zstd offers a __training mode__, which can be used to tune the algorithm for a selected type of data. -Training Zstandard is achieved by provide it with a few samples (one file per sample). The result of this training is stored in a file called "dictionary", which must be loaded before compression and decompression. +Training Zstandard is achieved by providing it with a few samples (one file per sample). The result of this training is stored in a file called "dictionary", which must be loaded before compression and decompression. Using this dictionary, the compression ratio achievable on small data improves dramatically. The following example uses the `github-users` [sample set](https://github.com/facebook/zstd/releases/tag/v1.1.3), created from [github public API](https://developer.github.com/v3/users/#get-all-users). -It consists of roughly 10K records weighting about 1KB each. +It consists of roughly 10K records weighing about 1KB each. Compression Ratio | Compression Speed | Decompression Speed ------------------|-------------------|-------------------- diff --git a/contrib/linux-kernel/lib/zstd/compress.c b/contrib/linux-kernel/lib/zstd/compress.c index 42236a3e..1aff542b 100644 --- a/contrib/linux-kernel/lib/zstd/compress.c +++ b/contrib/linux-kernel/lib/zstd/compress.c @@ -1978,10 +1978,15 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx *ctx, const void *src, size_t src break; /* nothing found : store previous solution */ } + /* NOTE: + * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. + * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which + * overflows the pointer, which is undefined behavior. + */ /* catch up */ if (offset) { while ((start > anchor) && (start > base + offset - ZSTD_REP_MOVE) && - (start[-1] == start[-1 - offset + ZSTD_REP_MOVE])) /* only search for offset within prefix */ + (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1])) /* only search for offset within prefix */ { start--; matchLength++; diff --git a/contrib/linux-kernel/lib/zstd/decompress.c b/contrib/linux-kernel/lib/zstd/decompress.c index def10ea4..ec673d7e 100644 --- a/contrib/linux-kernel/lib/zstd/decompress.c +++ b/contrib/linux-kernel/lib/zstd/decompress.c @@ -2212,6 +2212,20 @@ ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace, size_t wor zds->ddict = zds->ddictLocal; zds->legacyVersion = 0; zds->hostageByte = 0; + + { + size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); + size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2; + + zds->inBuff = (char *)ZSTD_malloc(blockSize, zds->customMem); + zds->inBuffSize = blockSize; + zds->outBuff = (char *)ZSTD_malloc(neededOutSize, zds->customMem); + zds->outBuffSize = neededOutSize; + if (zds->inBuff == NULL || zds->outBuff == NULL) { + ZSTD_freeDStream(zds); + return NULL; + } + } return zds; } @@ -2333,25 +2347,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_windowTooLarge); - /* Adapt buffer sizes to frame header instructions */ + /* Buffers are preallocated, but double check */ { - size_t const blockSize = MIN(zds->fParams.windowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); - size_t const neededOutSize = zds->fParams.windowSize + blockSize + WILDCOPY_OVERLENGTH * 2; - zds->blockSize = blockSize; + size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); + size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2; if (zds->inBuffSize < blockSize) { - ZSTD_free(zds->inBuff, zds->customMem); - zds->inBuffSize = blockSize; - zds->inBuff = (char *)ZSTD_malloc(blockSize, zds->customMem); - if (zds->inBuff == NULL) - return ERROR(memory_allocation); + return ERROR(GENERIC); } if (zds->outBuffSize < neededOutSize) { - ZSTD_free(zds->outBuff, zds->customMem); - zds->outBuffSize = neededOutSize; - zds->outBuff = (char *)ZSTD_malloc(neededOutSize, zds->customMem); - if (zds->outBuff == NULL) - return ERROR(memory_allocation); + return ERROR(GENERIC); } + zds->blockSize = blockSize; } zds->stage = zdss_read; } diff --git a/contrib/linux-kernel/test/DecompressCrash.c b/contrib/linux-kernel/test/DecompressCrash.c new file mode 100644 index 00000000..b5b673aa --- /dev/null +++ b/contrib/linux-kernel/test/DecompressCrash.c @@ -0,0 +1,85 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +/* + This program takes a file in input, + performs a zstd round-trip test (compression - decompress) + compares the result with original + and generates a crash (double free) on corruption detection. +*/ + +/*=========================================== +* Dependencies +*==========================================*/ +#include /* size_t */ +#include /* malloc, free, exit */ +#include /* fprintf */ +#include + +/*=========================================== +* Macros +*==========================================*/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +static ZSTD_DCtx *dctx = NULL; +void *dws = NULL; +static void* rBuff = NULL; +static size_t buffSize = 0; + +static void crash(int errorCode){ + /* abort if AFL/libfuzzer, exit otherwise */ + #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION /* could also use __AFL_COMPILER */ + abort(); + #else + exit(errorCode); + #endif +} + +static void decompressCheck(const void* srcBuff, size_t srcBuffSize) +{ + size_t const neededBuffSize = 20 * srcBuffSize; + + /* Allocate all buffers and contexts if not already allocated */ + if (neededBuffSize > buffSize) { + free(rBuff); + buffSize = 0; + + rBuff = malloc(neededBuffSize); + if (!rBuff) { + fprintf(stderr, "not enough memory ! \n"); + crash(1); + } + buffSize = neededBuffSize; + } + if (!dctx) { + size_t const workspaceSize = ZSTD_DCtxWorkspaceBound(); + dws = malloc(workspaceSize); + if (!dws) { + fprintf(stderr, "not enough memory ! \n"); + crash(1); + } + dctx = ZSTD_initDCtx(dws, workspaceSize); + if (!dctx) { + fprintf(stderr, "not enough memory ! \n"); + crash(1); + } + } + ZSTD_decompressDCtx(dctx, rBuff, buffSize, srcBuff, srcBuffSize); + +#ifndef SKIP_FREE + free(dws); dws = NULL; dctx = NULL; + free(rBuff); rBuff = NULL; + buffSize = 0; +#endif +} + +int LLVMFuzzerTestOneInput(const unsigned char *srcBuff, size_t srcBuffSize) { + decompressCheck(srcBuff, srcBuffSize); + return 0; +} diff --git a/contrib/linux-kernel/test/Makefile b/contrib/linux-kernel/test/Makefile index e33a256a..892264f4 100644 --- a/contrib/linux-kernel/test/Makefile +++ b/contrib/linux-kernel/test/Makefile @@ -12,12 +12,24 @@ CPPFLAGS += $(IFLAGS) ../lib/zstd/libzstd.a: $(OBJECTS) $(AR) $(ARFLAGS) $@ $^ +DecompressCrash: DecompressCrash.o $(OBJECTS) libFuzzer.a + $(CXX) $(TEST_CPPFLAGS) $(TEST_CXXFLAGS) $(LDFLAGS) $^ -o $@ + +RoundTripCrash: RoundTripCrash.o $(OBJECTS) ../lib/xxhash.o libFuzzer.a + $(CXX) $(TEST_CPPFLAGS) $(TEST_CXXFLAGS) $(LDFLAGS) $^ -o $@ + UserlandTest: UserlandTest.cpp ../lib/zstd/libzstd.a ../lib/xxhash.o $(CXX) $(CXXFLAGS) $(CFLAGS) $(CPPFLAGS) $^ googletest/build/googlemock/gtest/libgtest.a googletest/build/googlemock/gtest/libgtest_main.a -o $@ XXHashUserlandTest: XXHashUserlandTest.cpp ../lib/xxhash.o ../../../lib/common/xxhash.o $(CXX) $(CXXFLAGS) $(CFLAGS) $(CPPFLAGS) $^ googletest/build/googlemock/gtest/libgtest.a googletest/build/googlemock/gtest/libgtest_main.a -o $@ +# Install libfuzzer +libFuzzer.a: + @$(RM) -rf Fuzzer + @git clone https://chromium.googlesource.com/chromium/llvm-project/llvm/lib/Fuzzer + @./Fuzzer/build.sh + # Install googletest .PHONY: googletest googletest: @@ -28,3 +40,4 @@ googletest: clean: $(RM) -f *.{o,a} ../lib/zstd/*.{o,a} + $(RM) -f DecompressCrash RoundTripCrash UserlandTest XXHashUserlandTest diff --git a/contrib/linux-kernel/test/RoundTripCrash.c b/contrib/linux-kernel/test/RoundTripCrash.c new file mode 100644 index 00000000..44c67f3a --- /dev/null +++ b/contrib/linux-kernel/test/RoundTripCrash.c @@ -0,0 +1,162 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +/* + This program takes a file in input, + performs a zstd round-trip test (compression - decompress) + compares the result with original + and generates a crash (double free) on corruption detection. +*/ + +/*=========================================== +* Dependencies +*==========================================*/ +#include /* size_t */ +#include /* malloc, free, exit */ +#include /* fprintf */ +#include +#include + +/*=========================================== +* Macros +*==========================================*/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +static const int kMaxClevel = 22; + +static ZSTD_CCtx *cctx = NULL; +void *cws = NULL; +static ZSTD_DCtx *dctx = NULL; +void *dws = NULL; +static void* cBuff = NULL; +static void* rBuff = NULL; +static size_t buffSize = 0; + + +/** roundTripTest() : +* Compresses `srcBuff` into `compressedBuff`, +* then decompresses `compressedBuff` into `resultBuff`. +* Compression level used is derived from first content byte. +* @return : result of decompression, which should be == `srcSize` +* or an error code if either compression or decompression fails. +* Note : `compressedBuffCapacity` should be `>= ZSTD_compressBound(srcSize)` +* for compression to be guaranteed to work */ +static size_t roundTripTest(void* resultBuff, size_t resultBuffCapacity, + void* compressedBuff, size_t compressedBuffCapacity, + const void* srcBuff, size_t srcBuffSize) +{ + size_t const hashLength = MIN(128, srcBuffSize); + unsigned const h32 = xxh32(srcBuff, hashLength, 0); + int const cLevel = h32 % kMaxClevel; + ZSTD_parameters const params = ZSTD_getParams(cLevel, srcBuffSize, 0); + size_t const cSize = ZSTD_compressCCtx(cctx, compressedBuff, compressedBuffCapacity, srcBuff, srcBuffSize, params); + if (ZSTD_isError(cSize)) { + fprintf(stderr, "Compression error : %u \n", ZSTD_getErrorCode(cSize)); + return cSize; + } + return ZSTD_decompressDCtx(dctx, resultBuff, resultBuffCapacity, compressedBuff, cSize); +} + + +static size_t checkBuffers(const void* buff1, const void* buff2, size_t buffSize) +{ + const char* ip1 = (const char*)buff1; + const char* ip2 = (const char*)buff2; + size_t pos; + + for (pos=0; pos buffSize) { + free(cBuff); + free(rBuff); + buffSize = 0; + + cBuff = malloc(neededBuffSize); + rBuff = malloc(neededBuffSize); + if (!cBuff || !rBuff) { + fprintf(stderr, "not enough memory ! \n"); + crash(1); + } + buffSize = neededBuffSize; + } + if (!cctx) { + ZSTD_compressionParameters const params = ZSTD_getCParams(kMaxClevel, 0, 0); + size_t const workspaceSize = ZSTD_CCtxWorkspaceBound(params); + cws = malloc(workspaceSize); + if (!cws) { + fprintf(stderr, "not enough memory ! \n"); + crash(1); + } + cctx = ZSTD_initCCtx(cws, workspaceSize); + if (!cctx) { + fprintf(stderr, "not enough memory ! \n"); + crash(1); + } + } + if (!dctx) { + size_t const workspaceSize = ZSTD_DCtxWorkspaceBound(); + dws = malloc(workspaceSize); + if (!dws) { + fprintf(stderr, "not enough memory ! \n"); + crash(1); + } + dctx = ZSTD_initDCtx(dws, workspaceSize); + if (!dctx) { + fprintf(stderr, "not enough memory ! \n"); + crash(1); + } + } + + { size_t const result = roundTripTest(rBuff, buffSize, cBuff, buffSize, srcBuff, srcBuffSize); + if (ZSTD_isError(result)) { + fprintf(stderr, "roundTripTest error : %u \n", ZSTD_getErrorCode(result)); + crash(1); + } + if (result != srcBuffSize) { + fprintf(stderr, "Incorrect regenerated size : %u != %u\n", (unsigned)result, (unsigned)srcBuffSize); + crash(1); + } + if (checkBuffers(srcBuff, rBuff, srcBuffSize) != srcBuffSize) { + fprintf(stderr, "Silent decoding corruption !!!"); + crash(1); + } + } + +#ifndef SKIP_FREE + free(cws); cws = NULL; cctx = NULL; + free(dws); dws = NULL; dctx = NULL; + free(cBuff); cBuff = NULL; + free(rBuff); rBuff = NULL; + buffSize = 0; +#endif +} + +int LLVMFuzzerTestOneInput(const unsigned char *srcBuff, size_t srcBuffSize) { + roundTripCheck(srcBuff, srcBuffSize); + return 0; +} diff --git a/contrib/linux-kernel/test/UserlandTest.cpp b/contrib/linux-kernel/test/UserlandTest.cpp index 73b30be4..03058382 100644 --- a/contrib/linux-kernel/test/UserlandTest.cpp +++ b/contrib/linux-kernel/test/UserlandTest.cpp @@ -280,9 +280,9 @@ TEST(Block, ContentSize) { TEST(Block, CCtxLevelIncrease) { std::string c; - auto cctx = createCCtx(6); + auto cctx = createCCtx(22); auto dctx = createDCtx(); - for (int level = 1; level <= 6; ++level) { + for (int level = 1; level <= 22; ++level) { auto compressed = compress(*cctx, kData, level); auto const decompressed = decompress(*dctx, compressed, kData.size()); EXPECT_EQ(kData, decompressed); @@ -478,6 +478,17 @@ TEST(Stream, Flush) { EXPECT_EQ(kData, decompressed); } +TEST(Stream, DStreamLevelIncrease) { + auto zds = createDStream(); + for (int level = 1; level <= 22; ++level) { + auto zcs = createCStream(level); + auto compressed = compress(*zcs, kData); + ZSTD_resetDStream(zds.get()); + auto const decompressed = decompress(*zds, compressed, kData.size()); + EXPECT_EQ(kData, decompressed); + } +} + #define TEST_SYMBOL(symbol) \ do { \ extern void *__##symbol; \ diff --git a/contrib/linux-kernel/xxhash_test.c b/contrib/linux-kernel/xxhash_test.c new file mode 100644 index 00000000..5c1101b6 --- /dev/null +++ b/contrib/linux-kernel/xxhash_test.c @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. This program is dual-licensed; you may select + * either version 2 of the GNU General Public License ("GPL") or BSD license + * ("BSD"). + */ + +/* DO_XXH should be 32 or 64 for xxh32 and xxh64 respectively */ +#define DO_XXH 0 +/* DO_CRC should be 0 or 1 */ +#define DO_CRC 0 +/* Buffer size */ +#define BUFFER_SIZE 4096 + +#include +#include +#include +#include + +#if DO_XXH +#include +#endif + +#if DO_CRC +#include +#endif + +/* Device name to pass to register_chrdev(). */ +#define DEVICE_NAME "xxhash_test" + +/* Dynamically allocated device major number */ +static int device_major; + +/* + * We reuse the same hash state, and thus can hash only one + * file at a time. + */ +static bool device_is_open; + +static uint64_t total_length; + + +#if (DO_XXH == 32) + +#define xxh_state xxh32_state +#define xxh_reset xxh32_reset +#define xxh_update xxh32_update +#define xxh_digest xxh32_digest +#define XXH_FORMAT "XXH32 = 0x%x" + +#elif (DO_XXH == 64) + +#define xxh_state xxh64_state +#define xxh_reset xxh64_reset +#define xxh_update xxh64_update +#define xxh_digest xxh64_digest +#define XXH_FORMAT "XXH64 = 0x%llx" + +#elif DO_XXH + +#error "Invalid value of DO_XXH" + +#endif + +#if DO_XXH + +/* XXH state */ +static struct xxh_state state; + +#endif /* DO_XXH */ + +#if DO_CRC + +static uint32_t crc; + +#endif /* DO_CRC */ + +/* + * Input buffer used to put data coming from userspace. + */ +static uint8_t buffer_in[BUFFER_SIZE]; + +static int xxhash_test_open(struct inode *i, struct file *f) +{ + if (device_is_open) + return -EBUSY; + + device_is_open = true; + + total_length = 0; +#if DO_XXH + xxh_reset(&state, 0); +#endif +#if DO_CRC + crc = 0xFFFFFFFF; +#endif + + printk(KERN_INFO DEVICE_NAME ": opened\n"); + return 0; +} + +static int xxhash_test_release(struct inode *i, struct file *f) +{ + device_is_open = false; + + printk(KERN_INFO DEVICE_NAME ": total_len = %llu\n", total_length); +#if DO_XXH + printk(KERN_INFO DEVICE_NAME ": " XXH_FORMAT "\n", xxh_digest(&state)); +#endif +#if DO_CRC + printk(KERN_INFO DEVICE_NAME ": CRC32 = 0x%08x\n", ~crc); +#endif + printk(KERN_INFO DEVICE_NAME ": closed\n"); + return 0; +} + +/* + * Hash the data given to us from userspace. + */ +static ssize_t xxhash_test_write(struct file *file, const char __user *buf, + size_t size, loff_t *pos) +{ + size_t remaining = size; + + while (remaining > 0) { +#if DO_XXH + int ret; +#endif + size_t const copy_size = min(remaining, sizeof(buffer_in)); + + if (copy_from_user(buffer_in, buf, copy_size)) + return -EFAULT; + buf += copy_size; + remaining -= copy_size; + total_length += copy_size; +#if DO_XXH + if ((ret = xxh_update(&state, buffer_in, copy_size))) { + printk(KERN_INFO DEVICE_NAME ": xxh failure."); + return ret; + } +#endif +#if DO_CRC + crc = crc32(crc, buffer_in, copy_size); +#endif + } + return size; +} +/* register the character device. */ +static int __init xxhash_test_init(void) +{ + static const struct file_operations fileops = { + .owner = THIS_MODULE, + .open = &xxhash_test_open, + .release = &xxhash_test_release, + .write = &xxhash_test_write + }; + + device_major = register_chrdev(0, DEVICE_NAME, &fileops); + if (device_major < 0) { + return device_major; + } + + printk(KERN_INFO DEVICE_NAME ": module loaded\n"); + printk(KERN_INFO DEVICE_NAME ": Create a device node with " + "'mknod " DEVICE_NAME " c %d 0' and write data " + "to it.\n", device_major); + return 0; +} + +static void __exit xxhash_test_exit(void) +{ + unregister_chrdev(device_major, DEVICE_NAME); + printk(KERN_INFO DEVICE_NAME ": module unloaded\n"); +} + +module_init(xxhash_test_init); +module_exit(xxhash_test_exit); + +MODULE_DESCRIPTION("XXHash tester"); +MODULE_VERSION("1.0"); + + +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/contrib/linux-kernel/zstd.diff b/contrib/linux-kernel/zstd.diff index 285961ab..e83b56ed 100644 --- a/contrib/linux-kernel/zstd.diff +++ b/contrib/linux-kernel/zstd.diff @@ -1599,10 +1599,10 @@ index 0000000..a826b99 +#endif /* BITSTREAM_H_MODULE */ diff --git a/lib/zstd/compress.c b/lib/zstd/compress.c new file mode 100644 -index 0000000..42236a3 +index 0000000..1aff542 --- /dev/null +++ b/lib/zstd/compress.c -@@ -0,0 +1,3463 @@ +@@ -0,0 +1,3468 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. @@ -3583,10 +3583,15 @@ index 0000000..42236a3 + break; /* nothing found : store previous solution */ + } + ++ /* NOTE: ++ * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. ++ * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which ++ * overflows the pointer, which is undefined behavior. ++ */ + /* catch up */ + if (offset) { + while ((start > anchor) && (start > base + offset - ZSTD_REP_MOVE) && -+ (start[-1] == start[-1 - offset + ZSTD_REP_MOVE])) /* only search for offset within prefix */ ++ (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1])) /* only search for offset within prefix */ + { + start--; + matchLength++; @@ -5068,10 +5073,10 @@ index 0000000..42236a3 +MODULE_DESCRIPTION("Zstd Compressor"); diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c new file mode 100644 -index 0000000..def10ea +index 0000000..ec673d7 --- /dev/null +++ b/lib/zstd/decompress.c -@@ -0,0 +1,2508 @@ +@@ -0,0 +1,2514 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. @@ -7286,6 +7291,20 @@ index 0000000..def10ea + zds->ddict = zds->ddictLocal; + zds->legacyVersion = 0; + zds->hostageByte = 0; ++ ++ { ++ size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); ++ size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2; ++ ++ zds->inBuff = (char *)ZSTD_malloc(blockSize, zds->customMem); ++ zds->inBuffSize = blockSize; ++ zds->outBuff = (char *)ZSTD_malloc(neededOutSize, zds->customMem); ++ zds->outBuffSize = neededOutSize; ++ if (zds->inBuff == NULL || zds->outBuff == NULL) { ++ ZSTD_freeDStream(zds); ++ return NULL; ++ } ++ } + return zds; +} + @@ -7407,25 +7426,17 @@ index 0000000..def10ea + if (zds->fParams.windowSize > zds->maxWindowSize) + return ERROR(frameParameter_windowTooLarge); + -+ /* Adapt buffer sizes to frame header instructions */ ++ /* Buffers are preallocated, but double check */ + { -+ size_t const blockSize = MIN(zds->fParams.windowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); -+ size_t const neededOutSize = zds->fParams.windowSize + blockSize + WILDCOPY_OVERLENGTH * 2; -+ zds->blockSize = blockSize; ++ size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); ++ size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2; + if (zds->inBuffSize < blockSize) { -+ ZSTD_free(zds->inBuff, zds->customMem); -+ zds->inBuffSize = blockSize; -+ zds->inBuff = (char *)ZSTD_malloc(blockSize, zds->customMem); -+ if (zds->inBuff == NULL) -+ return ERROR(memory_allocation); ++ return ERROR(GENERIC); + } + if (zds->outBuffSize < neededOutSize) { -+ ZSTD_free(zds->outBuff, zds->customMem); -+ zds->outBuffSize = neededOutSize; -+ zds->outBuff = (char *)ZSTD_malloc(neededOutSize, zds->customMem); -+ if (zds->outBuff == NULL) -+ return ERROR(memory_allocation); ++ return ERROR(GENERIC); + } ++ zds->blockSize = blockSize; + } + zds->stage = zdss_read; + } diff --git a/contrib/linux-kernel/zstd_compress_test.c b/contrib/linux-kernel/zstd_compress_test.c new file mode 100644 index 00000000..bf856b79 --- /dev/null +++ b/contrib/linux-kernel/zstd_compress_test.c @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. This program is dual-licensed; you may select + * either version 2 of the GNU General Public License ("GPL") or BSD license + * ("BSD"). + */ + +/* Compression level or 0 to disable */ +#define DO_ZLIB 9 +/* Compression level or 0 to disable */ +#define DO_ZSTD 0 +/* Buffer size */ +#define BUFFER_SIZE 4096 + +#include +#include +#include +#include +#include + +#if DO_ZSTD +#include +#endif + +#if DO_ZLIB +#include +#endif + +/* Device name to pass to register_chrdev(). */ +#define DEVICE_NAME "zstd_compress_test" + +/* Dynamically allocated device major number */ +static int device_major; + +/* + * We reuse the same state, and thus can compress only one file at a time. + */ +static bool device_is_open; + + +static void *workspace = NULL; + +/* + * Input buffer used to put data coming from userspace. + */ +static uint8_t buffer_in[BUFFER_SIZE]; +static uint8_t buffer_out[BUFFER_SIZE]; + +static uint64_t uncompressed_len; +static uint64_t compressed_len; + +#if DO_ZSTD + +static ZSTD_CStream *state; + +static ZSTD_inBuffer input = { + .src = buffer_in, + .size = sizeof(buffer_in), + .pos = sizeof(buffer_in), +}; + +static ZSTD_outBuffer output = { + .dst = buffer_out, + .size = sizeof(buffer_out), + .pos = sizeof(buffer_out), +}; + +#endif /* DO_ZSTD */ + +#if DO_ZLIB + +static z_stream state = { + .next_in = buffer_in, + .avail_in = 0, + .total_in = 0, + + .next_out = buffer_out, + .avail_out = sizeof(buffer_out), + .total_out = 0, + + .msg = NULL, + .state = NULL, + .workspace = NULL, +}; + +#endif /* DO_ZLIB */ + +static int zstd_compress_test_open(struct inode *i, struct file *f) +{ + if (device_is_open) + return -EBUSY; + + device_is_open = true; + + uncompressed_len = compressed_len = 0; + +#if DO_ZSTD + if (ZSTD_isError(ZSTD_resetCStream(state, 0))) + return -EIO; +#endif + +#if DO_ZLIB + if (zlib_deflateReset(&state) != Z_OK) + return -EIO; +#endif + + printk(KERN_INFO DEVICE_NAME ": opened\n"); + return 0; +} + +static int zstd_compress_test_release(struct inode *i, struct file *f) +{ + device_is_open = false; + +#if DO_ZSTD + do { + size_t ret; + + output.pos = 0; + ret = ZSTD_endStream(state, &output); + if (ZSTD_isError(ret)) { + printk(KERN_INFO DEVICE_NAME ": zstd end error %u\n", ZSTD_getErrorCode(ret)); + return -EIO; + } + compressed_len += output.pos; + } while (output.pos != output.size); +#endif + +#if DO_ZLIB + for (;;) { + int ret; + + state.next_out = buffer_out; + state.avail_out = sizeof(buffer_out); + ret = zlib_deflate(&state, Z_FINISH); + compressed_len += sizeof(buffer_out) - state.avail_out; + if (ret == Z_STREAM_END) + break; + if (ret != Z_OK) { + printk(KERN_INFO DEVICE_NAME ": zlib end error %d: %s\n", ret, state.msg); + return -EIO; + } + } +#endif + + printk(KERN_INFO DEVICE_NAME ": uncompressed_len = %llu\n", uncompressed_len); + printk(KERN_INFO DEVICE_NAME ": compressed_len = %llu\n", compressed_len); + printk(KERN_INFO DEVICE_NAME ": closed\n"); + return 0; +} + +/* + * Hash the data given to us from userspace. + */ +static ssize_t zstd_compress_test_write(struct file *file, + const char __user *buf, size_t size, loff_t *pos) +{ + size_t remaining = size; + + while (remaining > 0) { + size_t const copy_size = min(remaining, sizeof(buffer_in)); + + if (copy_from_user(buffer_in, buf, copy_size)) + return -EFAULT; + buf += copy_size; + remaining -= copy_size; + uncompressed_len += copy_size; + +#if DO_ZSTD + input.pos = 0; + input.size = copy_size; + while (input.pos != input.size) { + size_t ret; + + output.pos = 0; + ret = ZSTD_compressStream(state, &output, &input); + if (ZSTD_isError(ret)) { + printk(KERN_INFO DEVICE_NAME ": zstd compress error %u\n", ZSTD_getErrorCode(ret)); + return -EIO; + } + compressed_len += output.pos; + } +#endif +#if DO_ZLIB + state.next_in = buffer_in; + state.avail_in = copy_size; + while (state.avail_in > 0) { + int ret; + + state.next_out = buffer_out; + state.avail_out = sizeof(buffer_out); + ret = zlib_deflate(&state, Z_NO_FLUSH); + compressed_len += sizeof(buffer_out) - state.avail_out; + if (ret != Z_OK) { + printk(KERN_INFO DEVICE_NAME ": zlib end error %d: %s\n", ret, state.msg); + return -EIO; + } + } +#endif + } + return size; +} +/* register the character device. */ +static int __init zstd_compress_test_init(void) +{ + static const struct file_operations fileops = { + .owner = THIS_MODULE, + .open = &zstd_compress_test_open, + .release = &zstd_compress_test_release, + .write = &zstd_compress_test_write + }; + size_t workspace_size = 0; +#if DO_ZSTD + ZSTD_parameters params; +#endif + + device_major = register_chrdev(0, DEVICE_NAME, &fileops); + if (device_major < 0) { + return device_major; + } + +#if DO_ZSTD + params = ZSTD_getParams(DO_ZSTD, 0, 0); + workspace_size = ZSTD_CStreamWorkspaceBound(params.cParams); + + if (!(workspace = vmalloc(workspace_size))) + goto fail; + if (!(state = ZSTD_initCStream(params, 0, workspace, workspace_size))) + goto fail; +#endif + +#if DO_ZLIB + workspace_size = zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL); + + if (!(workspace = vmalloc(workspace_size))) + goto fail; + state.workspace = workspace; + if (zlib_deflateInit(&state, DO_ZLIB) != Z_OK) + goto fail; +#endif + + printk(KERN_INFO DEVICE_NAME ": module loaded\n"); + printk(KERN_INFO DEVICE_NAME ": compression requires %zu bytes of memory\n", workspace_size); + printk(KERN_INFO DEVICE_NAME ": Create a device node with " + "'mknod " DEVICE_NAME " c %d 0' and write data " + "to it.\n", device_major); + return 0; + +fail: + printk(KERN_INFO DEVICE_NAME ": failed to load module\n"); + if (workspace) { + vfree(workspace); + workspace = NULL; + } + return -ENOMEM; +} + +static void __exit zstd_compress_test_exit(void) +{ + unregister_chrdev(device_major, DEVICE_NAME); +#if DO_ZLIB + zlib_deflateEnd(&state); +#endif + if (workspace) { + vfree(workspace); + workspace = NULL; + } + printk(KERN_INFO DEVICE_NAME ": module unloaded\n"); +} + +module_init(zstd_compress_test_init); +module_exit(zstd_compress_test_exit); + +MODULE_DESCRIPTION("Zstd compression tester"); +MODULE_VERSION("1.0"); + +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/contrib/linux-kernel/zstd_decompress_test.c b/contrib/linux-kernel/zstd_decompress_test.c new file mode 100644 index 00000000..4905a5ac --- /dev/null +++ b/contrib/linux-kernel/zstd_decompress_test.c @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. This program is dual-licensed; you may select + * either version 2 of the GNU General Public License ("GPL") or BSD license + * ("BSD"). + */ + +/* Compression level or 0 to disable */ +#define DO_ZLIB 1 +/* Compression level or 0 to disable */ +#define DO_ZSTD 0 +/* Buffer size */ +#define BUFFER_SIZE 4096 + +#include +#include +#include +#include +#include + +#if DO_ZSTD +#include +#endif + +#if DO_ZLIB +#include +#endif + +/* Device name to pass to register_chrdev(). */ +#define DEVICE_NAME "zstd_decompress_test" + +/* Dynamically allocated device major number */ +static int device_major; + +/* + * We reuse the same state, and thus can compress only one file at a time. + */ +static bool device_is_open; + + +static void *workspace = NULL; + +/* + * Input buffer used to put data coming from userspace. + */ +static uint8_t buffer_in[BUFFER_SIZE]; +static uint8_t buffer_out[BUFFER_SIZE]; + +static uint64_t uncompressed_len; +static uint64_t compressed_len; + +#if DO_ZSTD + +static ZSTD_DStream *state; + +static ZSTD_inBuffer input = { + .src = buffer_in, + .size = sizeof(buffer_in), + .pos = sizeof(buffer_in), +}; + +static ZSTD_outBuffer output = { + .dst = buffer_out, + .size = sizeof(buffer_out), + .pos = sizeof(buffer_out), +}; + +#endif /* DO_ZSTD */ + +#if DO_ZLIB + +static z_stream state = { + .next_in = buffer_in, + .avail_in = 0, + .total_in = 0, + + .next_out = buffer_out, + .avail_out = sizeof(buffer_out), + .total_out = 0, + + .msg = NULL, + .state = NULL, + .workspace = NULL, +}; + +#endif /* DO_ZLIB */ + +static int zstd_decompress_test_open(struct inode *i, struct file *f) +{ + if (device_is_open) + return -EBUSY; + + device_is_open = true; + + uncompressed_len = compressed_len = 0; + +#if DO_ZSTD + if (ZSTD_isError(ZSTD_resetDStream(state))) + return -EIO; +#endif + +#if DO_ZLIB + if (zlib_inflateReset(&state) != Z_OK) + return -EIO; +#endif + + printk(KERN_INFO DEVICE_NAME ": opened\n"); + return 0; +} + +static int zstd_decompress_test_release(struct inode *i, struct file *f) +{ + device_is_open = false; + + printk(KERN_INFO DEVICE_NAME ": uncompressed_len = %llu\n", uncompressed_len); + printk(KERN_INFO DEVICE_NAME ": compressed_len = %llu\n", compressed_len); + printk(KERN_INFO DEVICE_NAME ": closed\n"); + return 0; +} + +/* + * Hash the data given to us from userspace. + */ +static ssize_t zstd_decompress_test_write(struct file *file, + const char __user *buf, size_t size, loff_t *pos) +{ + size_t remaining = size; + + while (remaining > 0) { + size_t const copy_size = min(remaining, sizeof(buffer_in)); + + if (copy_from_user(buffer_in, buf, copy_size)) + return -EFAULT; + buf += copy_size; + remaining -= copy_size; + compressed_len += copy_size; + +#if DO_ZSTD + input.pos = 0; + input.size = copy_size; + while (input.pos != input.size) { + size_t ret; + + output.pos = 0; + ret = ZSTD_decompressStream(state, &output, &input); + if (ZSTD_isError(ret)) { + printk(KERN_INFO DEVICE_NAME ": zstd decompress error %u\n", ZSTD_getErrorCode(ret)); + return -EIO; + } + uncompressed_len += output.pos; + } +#endif +#if DO_ZLIB + state.next_in = buffer_in; + state.avail_in = copy_size; + while (state.avail_in > 0) { + int ret; + + state.next_out = buffer_out; + state.avail_out = sizeof(buffer_out); + ret = zlib_inflate(&state, Z_NO_FLUSH); + uncompressed_len += sizeof(buffer_out) - state.avail_out; + if (ret != Z_OK && ret != Z_STREAM_END) { + printk(KERN_INFO DEVICE_NAME ": zlib decompress error %d: %s\n", ret, state.msg); + return -EIO; + } + } +#endif + } + return size; +} +/* register the character device. */ +static int __init zstd_decompress_test_init(void) +{ + static const struct file_operations fileops = { + .owner = THIS_MODULE, + .open = &zstd_decompress_test_open, + .release = &zstd_decompress_test_release, + .write = &zstd_decompress_test_write + }; + size_t workspace_size = 0; +#if DO_ZSTD + ZSTD_parameters params; + size_t max_window_size; +#endif + + device_major = register_chrdev(0, DEVICE_NAME, &fileops); + if (device_major < 0) { + return device_major; + } + +#if DO_ZSTD + params = ZSTD_getParams(DO_ZSTD, 0, 0); + max_window_size = (size_t)1 << params.cParams.windowLog; + workspace_size = ZSTD_DStreamWorkspaceBound(max_window_size); + + if (!(workspace = vmalloc(workspace_size))) + goto fail; + if (!(state = ZSTD_initDStream(max_window_size, workspace, workspace_size))) + goto fail; +#endif + +#if DO_ZLIB + workspace_size = zlib_inflate_workspacesize(); + + if (!(workspace = vmalloc(workspace_size))) + goto fail; + state.workspace = workspace; + if (zlib_inflateInit(&state) != Z_OK) + goto fail; +#endif + + printk(KERN_INFO DEVICE_NAME ": module loaded\n"); + printk(KERN_INFO DEVICE_NAME ": decompression requires %zu bytes of memory\n", workspace_size); + printk(KERN_INFO DEVICE_NAME ": Create a device node with " + "'mknod " DEVICE_NAME " c %d 0' and write data " + "to it.\n", device_major); + return 0; + +fail: + printk(KERN_INFO DEVICE_NAME ": failed to load module\n"); + if (workspace) { + vfree(workspace); + workspace = NULL; + } + return -ENOMEM; +} + +static void __exit zstd_decompress_test_exit(void) +{ + unregister_chrdev(device_major, DEVICE_NAME); +#if DO_ZLIB + zlib_deflateEnd(&state); +#endif + if (workspace) { + vfree(workspace); + workspace = NULL; + } + printk(KERN_INFO DEVICE_NAME ": module unloaded\n"); +} + +module_init(zstd_decompress_test_init); +module_exit(zstd_decompress_test_exit); + +MODULE_DESCRIPTION("Zstd decompression tester"); +MODULE_VERSION("1.0"); + +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/Makefile b/lib/Makefile index d0c3c826..5845cf17 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -149,7 +149,7 @@ install: libzstd.a libzstd libzstd.pc @$(INSTALL) -d -m 755 $(DESTDIR)$(PKGCONFIGDIR)/ $(DESTDIR)$(INCLUDEDIR)/ @$(INSTALL_DATA) libzstd.pc $(DESTDIR)$(PKGCONFIGDIR)/ @echo Installing libraries - @$(INSTALL_LIB) libzstd.a $(DESTDIR)$(LIBDIR) + @$(INSTALL_DATA) libzstd.a $(DESTDIR)$(LIBDIR) @$(INSTALL_LIB) libzstd.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR) @ln -sf libzstd.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR) @ln -sf libzstd.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 3ba1748f..d5de46a4 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2136,15 +2136,19 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, break; /* nothing found : store previous solution */ } + /* NOTE: + * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. + * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which + * overflows the pointer, which is undefined behavior. + */ /* catch up */ if (offset) { while ( (start > anchor) && (start > base+offset-ZSTD_REP_MOVE) - && (start[-1] == start[-1-offset+ZSTD_REP_MOVE]) ) /* only search for offset within prefix */ + && (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1]) ) /* only search for offset within prefix */ { start--; matchLength++; } offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); } - /* store sequence */ _storeSequence: { size_t const litLength = start - anchor; diff --git a/tests/playTests.sh b/tests/playTests.sh index 4eb794b5..fa82ae9e 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -567,21 +567,24 @@ $ECHO "\n**** zstd --list/-l error detection tests ****" $ECHO "\n**** zstd --list/-l test with null files ****" ./datagen -g0 > tmp5 $ZSTD tmp5 +$ZSTD -l tmp5.zst ! $ZSTD -l tmp5* +$ZSTD -lv tmp5.zst ! $ZSTD -lv tmp5* -! $ZSTD --list tmp5* -! $ZSTD --list -v tmp5* -$ECHO "\n**** zstd --list/-l test with no frame content size ****" -echo -n '' > tmp6 -$ZSTD tmp6 +$ECHO "\n**** zstd --list/-l test with no content size field ****" +./datagen -g1MB | $ZSTD > tmp6.zst $ZSTD -l tmp6.zst $ZSTD -lv tmp6.zst -$ZSTD --list tmp6.zst -$ZSTD --list -v tmp6.zst + +$ECHO "\n**** zstd --list/-l test with no checksum ****" +$ZSTD -f --no-check tmp1 +$ZSTD -l tmp1.zst +$ZSTD -lv tmp1.zst rm tmp* + if [ "$1" != "--test-large-data" ]; then $ECHO "Skipping large data tests" exit 0