diff --git a/.gitignore b/.gitignore index acd9552b..6ad56430 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ projects/ bin/ .buckd/ buck-out/ +build-* # Other files .directory @@ -43,3 +44,5 @@ _zstdbench/ googletest/ *.d *.vscode +compile_commands.json +.clangd \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 97f95a60..ec1adab0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -32,9 +32,10 @@ matrix: script: - make check - - name: Trusty (Test All) + - name: make test (complete) script: - - make test + # DEVNULLRIGHTS : will request sudo rights to test permissions on /dev/null + - DEVNULLRIGHTS=test make test - name: gcc-6 + gcc-7 compilation script: @@ -80,43 +81,43 @@ matrix: - make clean - make -j check MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS" - - name: Trusty (CMake) + - name: cmake test script: - make cmakebuild - - name: Trusty (Static Analyze) + - name: static analyzer scanbuild script: - make staticAnalyze - - name: Trusty (gcc-8 + ASan + UBSan + Fuzz Test) + - name: gcc-8 + ASan + UBSan + Fuzz Test script: - make gcc8install - CC=gcc-8 make clean uasan-fuzztest - - name: Trusty (gcc-6 + ASan + UBSan + Fuzz Test 32bit) + - name: gcc-6 + ASan + UBSan + Fuzz Test 32bit script: - make gcc6install libc6install - CC=gcc-6 CFLAGS="-O2 -m32" make uasan-fuzztest # can complain about pointer overflow - - name: Trusty (clang-3.8 + MSan + Fuzz Test) + - name: clang-3.8 + MSan + Fuzz Test script: - make clang38install - CC=clang-3.8 make clean msan-fuzztest - - name: Trusty (ASan + UBSan + MSan + Regression Test) + - name: ASan + UBSan + MSan + Regression Test script: - make -j uasanregressiontest - make clean - make -j msanregressiontest - - name: Trusty (Valgrind + Fuzz Test Stack Mode) + - name: Valgrind + Fuzz Test Stack Mode script: - make valgrindinstall - make -C tests clean valgrindTest - make clean - make -C tests test-fuzzer-stackmode - - name: Trusty (ARM + Fuzz Test) + - name: Qemu ARM emulation + Fuzz Test script: - make arminstall - make armfuzz @@ -127,12 +128,12 @@ matrix: - make arminstall - make aarch64fuzz - - name: Trusty (PPC + Fuzz Test) + - name: PPC + Fuzz Test script: - make ppcinstall - make ppcfuzz - - name: Trusty (Versions Compatibility Test) + - name: Versions Compatibility Test script: - make -C tests versionsTest diff --git a/Makefile b/Makefile index efb555c3..8ff9642f 100644 --- a/Makefile +++ b/Makefile @@ -351,9 +351,9 @@ cmakebuild: mkdir $(BUILDIR)/cmake/build cd $(BUILDIR)/cmake/build ; cmake -DCMAKE_INSTALL_PREFIX:PATH=~/install_test_dir $(CMAKE_PARAMS) .. ; $(MAKE) install ; $(MAKE) uninstall -c90build: clean +c89build: clean $(CC) -v - CFLAGS="-std=c90 -Werror" $(MAKE) allmost # will fail, due to missing support for `long long` + CFLAGS="-std=c89 -Werror" $(MAKE) allmost # will fail, due to missing support for `long long` gnu90build: clean $(CC) -v diff --git a/build/VS2008/fullbench/fullbench.vcproj b/build/VS2008/fullbench/fullbench.vcproj index 66ac8223..5752643f 100644 --- a/build/VS2008/fullbench/fullbench.vcproj +++ b/build/VS2008/fullbench/fullbench.vcproj @@ -372,6 +372,10 @@ RelativePath="..\..\..\lib\compress\zstd_compress_sequences.c" > + + @@ -514,6 +518,10 @@ RelativePath="..\..\..\lib\compress\zstd_cwksp.h" > + + diff --git a/build/VS2008/fuzzer/fuzzer.vcproj b/build/VS2008/fuzzer/fuzzer.vcproj index a9008a67..d48bc0fa 100644 --- a/build/VS2008/fuzzer/fuzzer.vcproj +++ b/build/VS2008/fuzzer/fuzzer.vcproj @@ -420,6 +420,10 @@ RelativePath="..\..\..\lib\compress\zstd_compress_sequences.c" > + + @@ -550,6 +554,10 @@ RelativePath="..\..\..\lib\compress\zstd_cwksp.h" > + + diff --git a/build/VS2008/zstd/zstd.vcproj b/build/VS2008/zstd/zstd.vcproj index 4d75a9d3..ab02e615 100644 --- a/build/VS2008/zstd/zstd.vcproj +++ b/build/VS2008/zstd/zstd.vcproj @@ -432,6 +432,10 @@ RelativePath="..\..\..\lib\compress\zstd_compress_sequences.c" > + + @@ -630,6 +634,10 @@ RelativePath="..\..\..\lib\compress\zstd_cwksp.h" > + + diff --git a/build/VS2008/zstdlib/zstdlib.vcproj b/build/VS2008/zstdlib/zstdlib.vcproj index 100a501a..5eb49f9b 100644 --- a/build/VS2008/zstdlib/zstdlib.vcproj +++ b/build/VS2008/zstdlib/zstdlib.vcproj @@ -404,6 +404,10 @@ RelativePath="..\..\..\lib\compress\zstd_compress_sequences.c" > + + @@ -562,6 +566,10 @@ RelativePath="..\..\..\lib\compress\zstd_cwksp.h" > + + diff --git a/build/VS2010/fullbench/fullbench.vcxproj b/build/VS2010/fullbench/fullbench.vcxproj index 4597239b..20932fa3 100644 --- a/build/VS2010/fullbench/fullbench.vcxproj +++ b/build/VS2010/fullbench/fullbench.vcxproj @@ -169,6 +169,7 @@ + @@ -198,6 +199,7 @@ + diff --git a/build/VS2010/fuzzer/fuzzer.vcxproj b/build/VS2010/fuzzer/fuzzer.vcxproj index a6f136ee..84275728 100644 --- a/build/VS2010/fuzzer/fuzzer.vcxproj +++ b/build/VS2010/fuzzer/fuzzer.vcxproj @@ -169,6 +169,7 @@ + @@ -201,6 +202,7 @@ + diff --git a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj index 50c4e817..0957d413 100644 --- a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj +++ b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj @@ -33,6 +33,7 @@ + @@ -83,6 +84,7 @@ + diff --git a/build/VS2010/libzstd/libzstd.vcxproj b/build/VS2010/libzstd/libzstd.vcxproj index c4e82877..20342935 100644 --- a/build/VS2010/libzstd/libzstd.vcxproj +++ b/build/VS2010/libzstd/libzstd.vcxproj @@ -33,6 +33,7 @@ + @@ -83,6 +84,7 @@ + diff --git a/build/VS2010/zstd/zstd.vcxproj b/build/VS2010/zstd/zstd.vcxproj index 1058d229..e320d88a 100644 --- a/build/VS2010/zstd/zstd.vcxproj +++ b/build/VS2010/zstd/zstd.vcxproj @@ -34,6 +34,7 @@ + @@ -80,6 +81,7 @@ + diff --git a/build/cmake/tests/CMakeLists.txt b/build/cmake/tests/CMakeLists.txt index 077d824b..063260a0 100644 --- a/build/cmake/tests/CMakeLists.txt +++ b/build/cmake/tests/CMakeLists.txt @@ -49,6 +49,9 @@ target_link_libraries(fullbench libzstd_static) add_executable(fuzzer ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/fuzzer.c) target_link_libraries(fuzzer libzstd_static) +add_executable(zstreamtest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/seqgen.c ${TESTS_DIR}/zstreamtest.c) +target_link_libraries(zstreamtest libzstd_static) + if (UNIX) add_executable(paramgrill ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/paramgrill.c) target_link_libraries(paramgrill libzstd_static m) #m is math library diff --git a/build/meson/lib/meson.build b/build/meson/lib/meson.build index ef669327..6fb3df8e 100644 --- a/build/meson/lib/meson.build +++ b/build/meson/lib/meson.build @@ -30,6 +30,7 @@ libzstd_sources = [join_paths(zstd_rootdir, 'lib/common/entropy_common.c'), join_paths(zstd_rootdir, 'lib/compress/zstd_compress.c'), join_paths(zstd_rootdir, 'lib/compress/zstd_compress_literals.c'), join_paths(zstd_rootdir, 'lib/compress/zstd_compress_sequences.c'), + join_paths(zstd_rootdir, 'lib/compress/zstd_compress_superblock.c'), join_paths(zstd_rootdir, 'lib/compress/zstdmt_compress.c'), join_paths(zstd_rootdir, 'lib/compress/zstd_fast.c'), join_paths(zstd_rootdir, 'lib/compress/zstd_double_fast.c'), diff --git a/doc/zstd_compression_format.md b/doc/zstd_compression_format.md index 90ac0fe9..fc61726f 100644 --- a/doc/zstd_compression_format.md +++ b/doc/zstd_compression_format.md @@ -16,7 +16,7 @@ Distribution of this document is unlimited. ### Version -0.3.4 (16/08/19) +0.3.5 (13/11/19) Introduction @@ -341,6 +341,8 @@ The structure of a block is as follows: |:--------------:|:---------------:| | 3 bytes | n bytes | +__`Block_Header`__ + `Block_Header` uses 3 bytes, written using __little-endian__ convention. It contains 3 fields : @@ -385,17 +387,30 @@ There are 4 block types : __`Block_Size`__ The upper 21 bits of `Block_Header` represent the `Block_Size`. + When `Block_Type` is `Compressed_Block` or `Raw_Block`, -`Block_Size` is the size of `Block_Content`, hence excluding `Block_Header`. -When `Block_Type` is `RLE_Block`, `Block_Content`’s size is always 1, -and `Block_Size` represents the number of times this byte must be repeated. -A block can contain and decompress into any number of bytes (even zero), -up to `Block_Maximum_Decompressed_Size`, which is the smallest of: -- Window_Size +`Block_Size` is the size of `Block_Content` (hence excluding `Block_Header`). + +When `Block_Type` is `RLE_Block`, since `Block_Content`’s size is always 1, +`Block_Size` represents the number of times this byte must be repeated. + +`Block_Size` is limited by `Block_Maximum_Size` (see below). + +__`Block_Content`__ and __`Block_Maximum_Size`__ + +The size of `Block_Content` is limited by `Block_Maximum_Size`, +which is the smallest of: +- `Window_Size` - 128 KB -If this condition cannot be respected when generating a `Compressed_Block`, -the block must be sent uncompressed instead (`Raw_Block`). +`Block_Maximum_Size` is constant for a given frame. +This maximum is applicable to both the decompressed size +and the compressed size of any block in the frame. + +The reasoning for this limit is that a decoder can read this information +at the beginning of a frame and use it to allocate buffers. +The guarantees on the size of blocks ensure that +the buffers will be large enough for any following block of the valid frame. Compressed Blocks @@ -1658,6 +1673,7 @@ or at least provide a meaningful error code explaining for which reason it canno Version changes --------------- +- 0.3.5 : clarifications for Block_Maximum_Size - 0.3.4 : clarifications for FSE decoding table - 0.3.3 : clarifications for field Block_Size - 0.3.2 : remove additional block size restriction on compressed blocks diff --git a/lib/Makefile b/lib/Makefile index 273ceb90..fd1710cf 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -161,7 +161,7 @@ ifneq (,$(filter Windows%,$(OS))) LIBZSTD = dll\libzstd.dll $(LIBZSTD): $(ZSTD_FILES) @echo compiling dynamic library $(LIBVER) - $(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -Wl,--out-implib,dll\libzstd.lib -shared $^ -o $@ + $(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -Wl,--out-implib,dll\libzstd.dll.a -shared $^ -o $@ else diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index 1c294b80..fe3148ee 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -48,6 +48,7 @@ extern "C" { * Dependencies ******************************************/ #include "mem.h" /* unaligned access routines */ +#include "compiler.h" /* UNLIKELY() */ #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ #include "error_private.h" /* error codes and messages */ @@ -411,6 +412,23 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) return value; } +/*! BIT_reloadDStreamFast() : + * Similar to BIT_reloadDStream(), but with two differences: + * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! + * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this + * point you must use BIT_reloadDStream() to reload. + */ +MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) +{ + if (UNLIKELY(bitD->ptr < bitD->limitPtr)) + return BIT_DStream_overflow; + assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; +} + /*! BIT_reloadDStream() : * Refill `bitD` from buffer previously set in BIT_initDStream() . * This function is safe, it guarantees it will not read beyond src buffer. @@ -422,10 +440,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) return BIT_DStream_overflow; if (bitD->ptr >= bitD->limitPtr) { - bitD->ptr -= bitD->bitsConsumed >> 3; - bitD->bitsConsumed &= 7; - bitD->bitContainer = MEM_readLEST(bitD->ptr); - return BIT_DStream_unfinished; + return BIT_reloadDStreamFast(bitD); } if (bitD->ptr == bitD->start) { if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; diff --git a/lib/common/compiler.h b/lib/common/compiler.h index 1877a0c1..228845aa 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -146,6 +146,19 @@ # define DONT_VECTORIZE #endif +/* Tell the compiler that a branch is likely or unlikely. + * Only use these macros if it causes the compiler to generate better code. + * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc + * and clang, please do. + */ +#if defined(__GNUC__) +#define LIKELY(x) (__builtin_expect((x), 1)) +#define UNLIKELY(x) (__builtin_expect((x), 0)) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif + /* disable warnings */ #ifdef _MSC_VER /* Visual Studio */ # include /* For Visual 2005 */ diff --git a/lib/common/huf.h b/lib/common/huf.h index 6b572c44..3026c43e 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -208,6 +208,7 @@ typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); typedef enum { HUF_repeat_none, /**< Cannot use the previous table */ diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index f074f1e0..0cbba2c9 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -427,7 +427,7 @@ size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbo return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); } -static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { size_t nbBits = 0; int s; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 35346b92..5dacba8c 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -28,11 +28,19 @@ #include "zstd_lazy.h" #include "zstd_opt.h" #include "zstd_ldm.h" +#include "zstd_compress_superblock.h" /*-************************************* * Helper functions ***************************************/ +/* ZSTD_compressBound() + * Note that the result from this function is only compatible with the "normal" + * full-block strategy. + * When there are a lot of small blocks due to frequent flush in streaming mode + * or targetCBlockSize, the overhead of headers can make the compressed data to + * be larger than the return value of ZSTD_compressBound(). + */ size_t ZSTD_compressBound(size_t srcSize) { return ZSTD_COMPRESSBOUND(srcSize); } @@ -249,12 +257,12 @@ size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_paramete /* ZSTD_assignParamsToCCtxParams() : * params is presumed valid at this stage */ static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( - const ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) + const ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) { ZSTD_CCtx_params ret = *cctxParams; - assert(!ZSTD_checkCParams(params.cParams)); - ret.cParams = params.cParams; - ret.fParams = params.fParams; + assert(!ZSTD_checkCParams(params->cParams)); + ret.cParams = params->cParams; + ret.fParams = params->fParams; ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ return ret; } @@ -339,8 +347,13 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) return bounds; case ZSTD_c_overlapLog: +#ifdef ZSTD_MULTITHREAD bounds.lowerBound = ZSTD_OVERLAPLOG_MIN; bounds.upperBound = ZSTD_OVERLAPLOG_MAX; +#else + bounds.lowerBound = 0; + bounds.upperBound = 0; +#endif return bounds; case ZSTD_c_enableLongDistanceMatching: @@ -845,7 +858,7 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) { ZSTD_localDict* const dl = &cctx->localDict; ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams( - &cctx->requestedParams, 0, dl->dictSize); + &cctx->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN, dl->dictSize); if (dl->dict == NULL) { /* No local dictionary. */ assert(dl->dictBuffer == NULL); @@ -1006,7 +1019,7 @@ static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) * optimize `cPar` for a specified input (`srcSize` and `dictSize`). * mostly downsize to reduce memory consumption and initialization latency. * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. - * note : for the time being, `srcSize==0` means "unknown" too, for compatibility with older convention. + * note : `srcSize==0` means 0! * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ static ZSTD_compressionParameters ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, @@ -1017,10 +1030,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); assert(ZSTD_checkCParams(cPar)==0); - if (dictSize && (srcSize+1<2) /* ZSTD_CONTENTSIZE_UNKNOWN and 0 mean "unknown" */ ) - srcSize = minSrcSize; /* presumed small when there is a dictionary */ - else if (srcSize == 0) - srcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* 0 == unknown : presumed large */ + if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) + srcSize = minSrcSize; /* resize windowLog if input is small enough, to use less memory */ if ( (srcSize < maxWindowResize) @@ -1049,9 +1060,13 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar, size_t dictSize) { cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ + if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); } +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize); +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize); + ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) { @@ -1059,7 +1074,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { srcSizeHint = CCtxParams->srcSizeHint; } - cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); + cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize); if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; @@ -1069,6 +1084,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength; if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy; assert(!ZSTD_checkCParams(cParams)); + /* srcSizeHint == 0 means 0 */ return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize); } @@ -1104,7 +1120,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) { RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(params, 0, 0); + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); U32 const divider = (cParams.minMatch==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; @@ -1136,7 +1152,7 @@ size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) { - ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); return ZSTD_estimateCCtxSize_usingCParams(cParams); } @@ -1155,7 +1171,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) { RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(params, 0, 0); + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0); size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize; @@ -1175,7 +1191,7 @@ size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) { - ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); return ZSTD_estimateCStreamSize_usingCParams(cParams); } @@ -1243,7 +1259,7 @@ static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, assert(cParams1.strategy == cParams2.strategy); } -static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) { int i; for (i = 0; i < ZSTD_REP_NUM; ++i) @@ -1320,10 +1336,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms, DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); if (forceResetIndex == ZSTDirp_reset) { - memset(&ms->window, 0, sizeof(ms->window)); - ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */ - ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ - ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */ + ZSTD_window_init(&ms->window); ZSTD_cwksp_mark_tables_dirty(ws); } @@ -1416,7 +1429,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); - ZSTD_indexResetPolicy_e needsIndexReset = ZSTDirp_continue; + ZSTD_indexResetPolicy_e needsIndexReset = zc->initialized ? ZSTDirp_continue : ZSTDirp_reset; if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { needsIndexReset = ZSTDirp_reset; @@ -1541,11 +1554,12 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); zc->maxNbLdmSequences = maxNbLdmSeq; - memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window)); + ZSTD_window_init(&zc->ldmState.window); ZSTD_window_clear(&zc->ldmState.window); } DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); + zc->initialized = 1; return 0; } @@ -1603,6 +1617,7 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, assert(windowLog != 0); /* Resize working context table params for input only, since the dict * has its own tables. */ + /* pledgeSrcSize == 0 means 0! */ params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0); params.cParams.windowLog = windowLog; FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, @@ -1889,16 +1904,6 @@ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* par /* See doc/zstd_compression_format.md for detailed format description */ -static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) -{ - U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); - RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, - dstSize_tooSmall); - MEM_writeLE24(dst, cBlockHeader24); - memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); - return ZSTD_blockHeaderSize + srcSize; -} - void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) { const seqDef* const sequences = seqStorePtr->sequencesStart; @@ -1936,6 +1941,16 @@ static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) } } +/* ZSTD_useTargetCBlockSize(): + * Returns if target compressed block size param is being used. + * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. + * Returns 1 if true, 0 otherwise. */ +static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) +{ + DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize); + return (cctxParams->targetCBlockSize != 0); +} + /* ZSTD_compressSequences_internal(): * actually compresses both literals and sequences */ MEM_STATIC size_t @@ -2435,6 +2450,70 @@ out: return cSize; } +static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock) { + size_t cSize = 0; + DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); + + { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + FORWARD_IF_ERROR(bss); + if (bss == ZSTDbss_compress) { + cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, lastBlock); + } } + + /* Superblock compression may fail, in which case + * encode using ZSTD_noCompressSuperBlock writing sub blocks + * in uncompressed mode. + */ + if (cSize == 0) { + cSize = ZSTD_noCompressSuperBlock(dst, dstCapacity, src, srcSize, zc->appliedParams.targetCBlockSize, lastBlock); + /* In compression, there is an assumption that a compressed block is always + * within the size of ZSTD_compressBound(). However, SuperBlock compression + * can exceed the limit due to overhead of headers from SubBlocks. + * This breaks in streaming mode where output buffer in compress context is + * allocated ZSTD_compressBound() amount of memory, which may not be big + * enough for SuperBlock compression. + * In such case, fall back to normal compression. This is possible because + * targetCBlockSize is best effort not a guarantee. */ + if (cSize != ERROR(dstSize_tooSmall)) return cSize; + else { + BYTE* const ostart = (BYTE*)dst; + /* If ZSTD_noCompressSuperBlock fails with dstSize_tooSmall, + * compress normally. + */ + cSize = ZSTD_compressSequences(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + ostart+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, + srcSize, + zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + if (!ZSTD_isError(cSize) && cSize != 0) { + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(ostart, cBlockHeader24); + cSize += ZSTD_blockHeaderSize; + } + } + } + + if (!ZSTD_isError(cSize) && cSize != 0) { + /* confirm repcodes and entropy tables when emitting a compressed block */ + ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; + zc->blockState.prevCBlock = zc->blockState.nextCBlock; + zc->blockState.nextCBlock = tmp; + } + /* We check that dictionaries have offset codes available for the first + * block. After the first block, the offcode table might not have large + * enough codes to represent the offsets in the data. + */ + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_cwksp* ws, @@ -2500,21 +2579,30 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; - { size_t cSize = ZSTD_compressBlock_internal(cctx, - op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, - ip, blockSize, 1 /* frame */); - FORWARD_IF_ERROR(cSize); - if (cSize == 0) { /* block is not compressible */ - cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + { size_t cSize; + int useTargetCBlockSize = ZSTD_useTargetCBlockSize(&cctx->appliedParams); + if (useTargetCBlockSize) { + cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock); FORWARD_IF_ERROR(cSize); } else { - const U32 cBlockHeader = cSize == 1 ? - lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : - lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(op, cBlockHeader); - cSize += ZSTD_blockHeaderSize; + cSize = ZSTD_compressBlock_internal(cctx, + op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, + ip, blockSize, 1 /* frame */); + FORWARD_IF_ERROR(cSize); + + if (cSize == 0) { /* block is not compressible */ + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize); + } else { + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + cSize += ZSTD_blockHeaderSize; + } } + ip += blockSize; assert(remaining >= blockSize); remaining -= blockSize; @@ -2763,37 +2851,13 @@ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSym return 0; } - -/* Dictionary format : - * See : - * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format - */ -/*! ZSTD_loadZstdDictionary() : - * @return : dictID, or an error code - * assumptions : magic number supposed already checked - * dictSize supposed >= 8 - */ -static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, - ZSTD_matchState_t* ms, - ZSTD_cwksp* ws, - ZSTD_CCtx_params const* params, - const void* dict, size_t dictSize, - ZSTD_dictTableLoadMethod_e dtlm, - void* workspace) +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + short* offcodeNCount, unsigned* offcodeMaxValue, + const void* const dict, size_t dictSize) { - const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ const BYTE* const dictEnd = dictPtr + dictSize; - short offcodeNCount[MaxOff+1]; - unsigned offcodeMaxValue = MaxOff; - size_t dictID; - - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); - assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); - - dictPtr += 4; /* skip magic number */ - dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); - dictPtr += 4; + dictPtr += 8; { unsigned maxSymbolValue = 255; size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); @@ -2803,7 +2867,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, } { unsigned offcodeLog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ @@ -2852,6 +2916,42 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, bs->rep[2] = MEM_readLE32(dictPtr+8); dictPtr += 12; + return dictPtr - (const BYTE*)dict; +} + +/* Dictionary format : + * See : + * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format + */ +/*! ZSTD_loadZstdDictionary() : + * @return : dictID, or an error code + * assumptions : magic number supposed already checked + * dictSize supposed >= 8 + */ +static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* dict, size_t dictSize, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff; + size_t dictID; + size_t eSize; + + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); + assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + + dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); + eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize); + FORWARD_IF_ERROR(eSize); + dictPtr += eSize; + { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); U32 offcodeMax = MaxOff; if (dictContentSize <= ((U32)-1) - 128 KB) { @@ -2986,7 +3086,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, ZSTD_parameters params, unsigned long long pledgedSrcSize) { ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); + ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); return ZSTD_compressBegin_advanced_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL /*cdict*/, @@ -2995,9 +3095,9 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); + ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); @@ -3081,7 +3181,7 @@ static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, - ZSTD_parameters params) + const ZSTD_parameters* params) { ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); @@ -3105,7 +3205,7 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, dst, dstCapacity, src, srcSize, dict, dictSize, - params); + ¶ms); } /* Internal */ @@ -3129,8 +3229,8 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0); - ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0); + ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); assert(params.fParams.contentSizeFlag == 1); return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); } @@ -3176,7 +3276,7 @@ size_t ZSTD_estimateCDictSize_advanced( size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) { - ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); } @@ -3287,7 +3387,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); @@ -3298,7 +3398,7 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); return ZSTD_createCDict_advanced(dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); @@ -3590,7 +3690,7 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); - zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, params); + zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, ¶ms); FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); return 0; } @@ -3655,11 +3755,11 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, ZSTD_EndDirective const flushMode) { const char* const istart = (const char*)input->src; - const char* const iend = istart + input->size; - const char* ip = istart + input->pos; + const char* const iend = input->size != 0 ? istart + input->size : istart; + const char* ip = input->pos != 0 ? istart + input->pos : istart; char* const ostart = (char*)output->dst; - char* const oend = ostart + output->size; - char* op = ostart + output->pos; + char* const oend = output->size != 0 ? ostart + output->size : ostart; + char* op = output->pos != 0 ? ostart + output->pos : ostart; U32 someMoreWork = 1; /* check expectations */ @@ -3698,7 +3798,8 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend-ip); zcs->inBuffPos += loaded; - ip += loaded; + if (loaded != 0) + ip += loaded; if ( (flushMode == ZSTD_e_continue) && (zcs->inBuffPos < zcs->inBuffTarget) ) { /* not enough input to fill full block : stop here */ @@ -3758,7 +3859,8 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, zcs->outBuff + zcs->outBuffFlushedSize, toFlush); DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); - op += flushed; + if (flushed) + op += flushed; zcs->outBuffFlushedSize += flushed; if (toFlush!=flushed) { /* flush not fully completed, presumably because dst is too small */ @@ -4069,35 +4171,56 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV }, }; -/*! ZSTD_getCParams() : +/*! ZSTD_getCParams_internal() : * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. - * Size values are optional, provide 0 if not known or unused */ -ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) + * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. + * Use dictSize == 0 for unknown or unused. */ +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { - size_t const addedSize = srcSizeHint ? 0 : 500; - U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : ZSTD_CONTENTSIZE_UNKNOWN; /* intentional overflow for srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN */ + int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; + size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; + U64 const rSize = unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); int row = compressionLevel; - DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel); + DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel); if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ - return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); /* refine parameters based on srcSize & dictSize */ + /* refine parameters based on srcSize & dictSize */ + return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); } } +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Size values are optional, provide 0 if not known or unused */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) +{ + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize); +} + +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { + ZSTD_parameters params; + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize); + DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); + memset(¶ms, 0, sizeof(params)); + params.cParams = cParams; + params.fParams.contentSizeFlag = 1; + return params; +} + /*! ZSTD_getParams() : * same idea as ZSTD_getCParams() * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). * Fields of `ZSTD_frameParameters` are set to default values */ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { - ZSTD_parameters params; - ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize); - DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); - memset(¶ms, 0, sizeof(params)); - params.cParams = cParams; - params.fParams.contentSizeFlag = 1; - return params; + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize); } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 14036f87..c76189cc 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -249,6 +249,7 @@ struct ZSTD_CCtx_s { size_t staticSize; SeqCollector seqCollector; int isFirstBlock; + int initialized; seqStore_t seqStore; /* sequences storage ptrs */ ldmState_t ldmState; /* long distance matching state */ @@ -336,6 +337,20 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) return 1; } +/* ZSTD_noCompressBlock() : + * Writes uncompressed block to dst buffer from given src. + * Returns the size of the block */ +MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) +{ + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, + dstSize_tooSmall); + MEM_writeLE24(dst, cBlockHeader24); + memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); + return ZSTD_blockHeaderSize + srcSize; +} + + /* ZSTD_minGain() : * minimum compression required * to generate a compress block or a compressed literals section. @@ -844,6 +859,15 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window, } } } } +MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { + memset(window, 0, sizeof(*window)); + window->base = (BYTE const*)""; + window->dictBase = (BYTE const*)""; + window->dictLimit = 1; /* start from 1, so that 1st position is valid */ + window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ + window->nextSrc = window->base + 1; /* see issue #1241 */ +} + /** * ZSTD_window_update(): * Updates the window by appending [src, src + srcSize) to the window. @@ -857,6 +881,10 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, BYTE const* const ip = (BYTE const*)src; U32 contiguous = 1; DEBUGLOG(5, "ZSTD_window_update"); + if (srcSize == 0) + return contiguous; + assert(window->base != NULL); + assert(window->dictBase != NULL); /* Check if blocks follow each other */ if (src != window->nextSrc) { /* not contiguous */ @@ -931,6 +959,21 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) } #endif +/* =============================================================== + * Shared internal declarations + * These prototypes may be called from sources not in lib/compress + * =============================================================== */ + +/* ZSTD_loadCEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * return : size of dictionary header (size of magic number + dict ID + entropy tables) + * assumptions : magic number supposed already checked + * and dictSize >= 8 */ +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + short* offcodeNCount, unsigned* offcodeMaxValue, + const void* const dict, size_t dictSize); + +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); /* ============================================================== * Private declarations @@ -940,6 +983,7 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) /* ZSTD_getCParamsFromCCtxParams() : * cParams are built depending on compressionLevel, src size hints, * LDM and manually set compression parameters. + * Note: srcSizeHint == 0 means 0! */ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize); diff --git a/lib/compress/zstd_compress_literals.c b/lib/compress/zstd_compress_literals.c index 6c133311..df3f269d 100644 --- a/lib/compress/zstd_compress_literals.c +++ b/lib/compress/zstd_compress_literals.c @@ -102,11 +102,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, cLitSize = singleStream ? HUF_compress1X_repeat( ostart+lhSize, dstCapacity-lhSize, src, srcSize, - 255, 11, entropyWorkspace, entropyWorkspaceSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : HUF_compress4X_repeat( ostart+lhSize, dstCapacity-lhSize, src, srcSize, - 255, 11, entropyWorkspace, entropyWorkspaceSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); if (repeat != HUF_repeat_none) { /* reused the existing table */ diff --git a/lib/compress/zstd_compress_sequences.c b/lib/compress/zstd_compress_sequences.c index 0ff7a268..950471c4 100644 --- a/lib/compress/zstd_compress_sequences.c +++ b/lib/compress/zstd_compress_sequences.c @@ -86,7 +86,7 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t * Returns the cost in bits of encoding the distribution in count using ctable. * Returns an error if ctable cannot represent all the symbols in count. */ -static size_t ZSTD_fseBitCost( +size_t ZSTD_fseBitCost( FSE_CTable const* ctable, unsigned const* count, unsigned const max) @@ -117,8 +117,8 @@ static size_t ZSTD_fseBitCost( * table described by norm. The max symbol support by norm is assumed >= max. * norm must be valid for every symbol with non-zero probability in count. */ -static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, - unsigned const* count, unsigned const max) +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max) { unsigned const shift = 8 - accuracyLog; size_t cost = 0; diff --git a/lib/compress/zstd_compress_sequences.h b/lib/compress/zstd_compress_sequences.h index 57e8e367..10cf6781 100644 --- a/lib/compress/zstd_compress_sequences.h +++ b/lib/compress/zstd_compress_sequences.h @@ -44,4 +44,11 @@ size_t ZSTD_encodeSequences( FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max); + +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max); #endif /* ZSTD_COMPRESS_SEQUENCES_H */ diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c new file mode 100644 index 00000000..eb3e369d --- /dev/null +++ b/lib/compress/zstd_compress_superblock.c @@ -0,0 +1,742 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +#include "hist.h" /* HIST_countFast_wksp */ +#include "zstd_compress_internal.h" +#include "zstd_compress_sequences.h" +#include "zstd_compress_literals.h" +#include "zstd_compress_superblock.h" + +/*-************************************* +* Superblock entropy buffer structs +***************************************/ +/** ZSTD_hufCTablesMetadata_t : + * Stores Literals Block Type for a super-block in hType, and + * huffman tree description in hufDesBuffer. + * hufDesSize refers to the size of huffman tree description in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ +typedef struct { + symbolEncodingType_e hType; + BYTE hufDesBuffer[500]; // TODO give name to this value + size_t hufDesSize; +} ZSTD_hufCTablesMetadata_t; + +/** ZSTD_fseCTablesMetadata_t : + * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and + * fse tables in fseTablesBuffer. + * fseTablesSize refers to the size of fse tables in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ +typedef struct { + symbolEncodingType_e llType; + symbolEncodingType_e ofType; + symbolEncodingType_e mlType; + BYTE fseTablesBuffer[500]; // TODO give name to this value + size_t fseTablesSize; + size_t lastCountSize; // This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() +} ZSTD_fseCTablesMetadata_t; + +typedef struct { + ZSTD_hufCTablesMetadata_t hufMetadata; + ZSTD_fseCTablesMetadata_t fseMetadata; +} ZSTD_entropyCTablesMetadata_t; + + +/** ZSTD_buildSuperBlockEntropy_literal() : + * Builds entropy for the super-block literals. + * Stores literals block type (raw, rle, compressed) and + * huffman description table to hufMetadata. + * Currently, this does not consider the option of reusing huffman table from + * previous super-block. I think it would be a good improvement to add that option. + * @return : size of huffman description table or error code */ +static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); + BYTE* const nodeWksp = countWkspStart + countWkspSize; + const size_t nodeWkspSize = wkspEnd-nodeWksp; + unsigned maxSymbolValue = 255; + unsigned huffLog = 11; + + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) { hufMetadata->hType = set_basic; return 0; } + } + + /* Scan input and build symbol stats */ + { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); + FORWARD_IF_ERROR(largest); + if (largest == srcSize) { hufMetadata->hType = set_rle; return 0; } + if (largest <= (srcSize >> 7)+4) { hufMetadata->hType = set_basic; return 0; } + } + + + /* Build Huffman Tree */ + memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, + maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + FORWARD_IF_ERROR(maxBits); + huffLog = (U32)maxBits; + { size_t cSize = HUF_estimateCompressedSize( + (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); + size_t hSize = HUF_writeCTable( + hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), + (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog); + if (cSize + hSize >= srcSize) { hufMetadata->hType = set_basic; return 0; } + hufMetadata->hType = set_compressed; + return hSize; + } + } +} + +/** ZSTD_buildSuperBlockEntropy_sequences() : + * Builds entropy for the super-block sequences. + * Stores symbol compression modes and fse table to fseMetadata. + * @return : size of fse tables or error code */ +static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, + const ZSTD_fseCTables_t* prevEntropy, + ZSTD_fseCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); + BYTE* const cTableWksp = countWkspStart + countWkspSize; + const size_t cTableWkspSize = wkspEnd-cTableWksp; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* const ostart = fseMetadata->fseTablesBuffer; + BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); + BYTE* op = ostart; + + assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); + memset(workspace, 0, wkspSize); + + fseMetadata->lastCountSize = 0; + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { U32 LLtype; + unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize); + if (LLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->llType = (symbolEncodingType_e) LLtype; + } } + /* build CTable for Offsets */ + { U32 Offtype; + unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, + countWksp, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize); + if (Offtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->ofType = (symbolEncodingType_e) Offtype; + } } + /* build CTable for MatchLengths */ + { U32 MLtype; + unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize); + if (MLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->mlType = (symbolEncodingType_e) MLtype; + } } + assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); + return op-ostart; +} + + +/** ZSTD_buildSuperBlockEntropy() : + * Builds entropy for the super-block. + * @return : 0 on success or error code */ +static size_t +ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize) +{ + size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); + entropyMetadata->hufMetadata.hufDesSize = + ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, + &prevEntropy->huf, &nextEntropy->huf, + &entropyMetadata->hufMetadata, + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize); + entropyMetadata->fseMetadata.fseTablesSize = + ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, + &prevEntropy->fse, &nextEntropy->fse, + cctxParams, + &entropyMetadata->fseMetadata, + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize); + return 0; +} + +/** ZSTD_compressSubBlock_literal() : + * Compresses literals section for a sub-block. + * Compressed literal size needs to be less than uncompressed literal size. + * ZSTD spec doesn't have this constaint. I will explain why I have this constraint here. + * Literals section header size ranges from 1 to 5 bytes, + * which is dictated by regenerated size and compressed size. + * In order to figure out the memory address to start writing compressed literal, + * it is necessary to figure out the literals section header size. + * The challenge is that compressed size is only known after compression. + * This is a chicken and egg problem. + * I am simplifying the problem by assuming that + * compressed size will always be less than or equal to regenerated size, + * and using regenerated size to calculate literals section header size. + * hufMetadata->hType has literals block type info. + * If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block. + * If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block. + * If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block + * and the following sub-blocks' literals sections will be Treeless_Literals_Block. + * @return : compressed size of literals section of a sub-block + * Or 0 if it unable to compress. + * Or error code */ +static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + const BYTE* literals, size_t litSize, + void* dst, size_t dstSize, + const int bmi2, int writeEntropy) +{ + size_t const lhSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart + lhSize; + U32 singleStream = litSize < 256; + symbolEncodingType_e hType = writeEntropy ? set_compressed : set_repeat; + size_t cLitSize = 0; + + (void)bmi2; // TODO bmi2... + + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); + + if (writeEntropy && litSize == 0) { + /* Literals section cannot be compressed mode when litSize == 0. + * (This seems to be decoder constraint.) + * Entropy cannot be written if literals section is not compressed mode. + */ + return 0; + } + + if (litSize == 0 || hufMetadata->hType == set_basic) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } else if (hufMetadata->hType == set_rle) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal"); + return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize); + } + + if (lhSize == 3) singleStream = 1; + if (writeEntropy) { + memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize); + op += hufMetadata->hufDesSize; + cLitSize += hufMetadata->hufDesSize; + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); + } + + // TODO bmi2 + { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) + : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); + op += cSize; + cLitSize += cSize; + if (cSize == 0 || ERR_isError(cSize)) { + return 0; + } + if (cLitSize > litSize) { + if (writeEntropy) return 0; + else return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize); + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + return op-ostart; +} + +static size_t ZSTD_seqDecompressedSize(const seqDef* sequences, size_t nbSeq, size_t litSize) { + const seqDef* const sstart = sequences; + const seqDef* const send = sequences + nbSeq; + const seqDef* sp = sstart; + size_t matchLengthSum = 0; + while (send-sp > 0) { + matchLengthSum += sp->matchLength + MINMATCH; + sp++; + } + return matchLengthSum + litSize; +} + +/** ZSTD_compressSubBlock_sequences() : + * Compresses sequences section for a sub-block. + * fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have + * symbol compression modes for the super-block. + * First sub-block will have these in its header. The following sub-blocks + * will always have repeat mode. + * @return : compressed size of sequences section of a sub-block + * Or 0 if it is unable to compress + * Or error code. */ +static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, int writeEntropy) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + BYTE* seqHead; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets); + + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall); + if (nbSeq < 0x7F) + *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) + op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else + op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (writeEntropy && nbSeq == 0) { + return 0; + } + if (nbSeq==0) { + return op - ostart; + } + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart)); + + if (writeEntropy) { + const U32 LLtype = fseMetadata->llType; + const U32 Offtype = fseMetadata->ofType; + const U32 MLtype = fseMetadata->mlType; + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize); + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize); + op += fseMetadata->fseTablesSize; + } else { + const U32 repeat = set_repeat; + *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2)); + } + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, oend - op, + fseTables->matchlengthCTable, mlCode, + fseTables->offcodeCTable, ofCode, + fseTables->litlengthCTable, llCode, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize); + op += bitstreamSize; + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ + if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(fseMetadata->lastCountSize + bitstreamSize == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize); + } + + /* zstd versions <= 1.4.0 mistakenly report error when + * sequences section body size is less than 3 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1664. + * This can happen when the previous sequences section block is compressed + * with rle mode and the current block's sequences section is compressed + * with repeat mode where sequences section body size can be 1 byte. + */ + if (op-seqHead < 4) { + return 0; + } + + return op - ostart; +} + +/** ZSTD_compressSubBlock() : + * Compresses a single sub-block. + * @return : compressed size of the sub-block + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* literals, size_t litSize, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, int writeEntropy, U32 lastBlock) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart + ZSTD_blockHeaderSize; + DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeEntropy=%d, lastBlock=%d)", + litSize, nbSeq, writeEntropy, lastBlock); + { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, + &entropyMetadata->hufMetadata, literals, litSize, + op, oend-op, bmi2, writeEntropy); + FORWARD_IF_ERROR(cLitSize); + if (cLitSize == 0) return 0; + op += cLitSize; + } + { size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse, + &entropyMetadata->fseMetadata, + sequences, nbSeq, + llCode, mlCode, ofCode, + cctxParams, + op, oend-op, + bmi2, writeEntropy); + FORWARD_IF_ERROR(cSeqSize); + if (cSeqSize == 0) return 0; + op += cSeqSize; + } + /* Write block header */ + { size_t cSize = (op-ostart)-ZSTD_blockHeaderSize; + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(ostart, cBlockHeader24); + } + return op-ostart; +} + +static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + unsigned* const countWksp = (unsigned*)workspace; + unsigned maxSymbolValue = 255; + size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + + if (hufMetadata->hType == set_basic) return litSize; + else if (hufMetadata->hType == set_rle) return 1; + else if (hufMetadata->hType == set_compressed) { + size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); + if (ZSTD_isError(largest)) return litSize; + { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); + if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; + return cLitSizeEstimate + literalSectionHeaderSize; + } } + assert(0); /* impossible */ + return 0; +} + +static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, unsigned maxCode, + size_t nbSeq, const FSE_CTable* fseCTable, + const U32* additionalBits, + short const* defaultNorm, U32 defaultNormLog, + void* workspace, size_t wkspSize) +{ + unsigned* const countWksp = (unsigned*)workspace; + const BYTE* ctp = codeTable; + const BYTE* const ctStart = ctp; + const BYTE* const ctEnd = ctStart + nbSeq; + size_t cSymbolTypeSizeEstimateInBits = 0; + unsigned max = maxCode; + + HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ + if (type == set_basic) { + cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max); + } else if (type == set_rle) { + cSymbolTypeSizeEstimateInBits = 0; + } else if (type == set_compressed || type == set_repeat) { + cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); + } + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; + while (ctp < ctEnd) { + if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; + else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ + ctp++; + } + return cSymbolTypeSizeEstimateInBits / 8; +} + +static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + size_t cSeqSizeEstimate = 0; + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, + nbSeq, fseTables->offcodeCTable, NULL, + OF_defaultNorm, OF_defaultNormLog, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, + nbSeq, fseTables->litlengthCTable, LL_bits, + LL_defaultNorm, LL_defaultNormLog, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, + nbSeq, fseTables->matchlengthCTable, ML_bits, + ML_defaultNorm, ML_defaultNormLog, + workspace, wkspSize); + if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; + return cSeqSizeEstimate + sequencesSectionHeaderSize; +} + +static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) { + size_t cSizeEstimate = 0; + cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeEntropy); + cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeEntropy); + return cSizeEstimate + ZSTD_blockHeaderSize; +} + +/** ZSTD_compressSubBlock_multi() : + * Breaks super-block into multiple sub-blocks and compresses them. + * Entropy will be written to the first block. + * The following blocks will use repeat mode to compress. + * All sub-blocks are compressed blocks (no raw or rle blocks). + * @return : compressed size of the super block (which is multiple ZSTD blocks) + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, U32 lastBlock, + void* workspace, size_t wkspSize) +{ + const seqDef* const sstart = seqStorePtr->sequencesStart; + const seqDef* const send = seqStorePtr->sequences; + const seqDef* sp = sstart; + const BYTE* const lstart = seqStorePtr->litStart; + const BYTE* const lend = seqStorePtr->lit; + const BYTE* lp = lstart; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + const BYTE* llCodePtr = seqStorePtr->llCode; + const BYTE* mlCodePtr = seqStorePtr->mlCode; + const BYTE* ofCodePtr = seqStorePtr->ofCode; + size_t targetCBlockSize = cctxParams->targetCBlockSize; + size_t litSize, seqCount; + int writeEntropy = 1; + size_t remaining = ZSTD_seqDecompressedSize(sstart, send-sstart, lend-lstart); + size_t cBlockSizeEstimate = 0; + + DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", + (unsigned)(lend-lp), (unsigned)(send-sstart)); + + litSize = 0; + seqCount = 0; + while (sp + seqCount < send) { + const seqDef* const sequence = sp + seqCount; + const U32 lastSequence = sequence+1 == send; + litSize = (sequence == send) ? (size_t)(lend-lp) : litSize + sequence->litLength; + seqCount++; + /* I think there is an optimization opportunity here. + * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful + * since it recalculates estimate from scratch. + * For example, it would recount literal distribution and symbol codes everytime. + */ + cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, + entropy, entropyMetadata, + workspace, wkspSize, writeEntropy); + if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { + const size_t decompressedSize = ZSTD_seqDecompressedSize(sp, seqCount, litSize); + const size_t cSize = ZSTD_compressSubBlock(entropy, entropyMetadata, + sp, seqCount, + lp, litSize, + llCodePtr, mlCodePtr, ofCodePtr, + cctxParams, + op, oend-op, + bmi2, writeEntropy, lastBlock && lastSequence); + FORWARD_IF_ERROR(cSize); + if (cSize > 0 && cSize < decompressedSize) { + assert(remaining >= decompressedSize); + remaining -= decompressedSize; + sp += seqCount; + lp += litSize; + op += cSize; + llCodePtr += seqCount; + mlCodePtr += seqCount; + ofCodePtr += seqCount; + litSize = 0; + seqCount = 0; + writeEntropy = 0; // Entropy only needs to be written once + } + } + } + if (remaining) { + DEBUGLOG(5, "ZSTD_compressSubBlock_multi failed to compress"); + return 0; + } + DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); + return op-ostart; +} + +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + unsigned lastBlock) { + ZSTD_entropyCTablesMetadata_t entropyMetadata; + + FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + &entropyMetadata, + zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */)); + + return ZSTD_compressSubBlock_multi(&zc->seqStore, + &zc->blockState.nextCBlock->entropy, + &entropyMetadata, + &zc->appliedParams, + dst, dstCapacity, + zc->bmi2, lastBlock, + zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */); +} + +size_t ZSTD_noCompressSuperBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + size_t targetCBlockSize, + unsigned lastBlock) { + const BYTE* const istart = (const BYTE*)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + DEBUGLOG(5, "ZSTD_noCompressSuperBlock (dstCapacity=%zu, srcSize=%zu, targetCBlockSize=%zu)", + dstCapacity, srcSize, targetCBlockSize); + while (ip < iend) { + size_t remaining = iend-ip; + unsigned lastSubBlock = remaining <= targetCBlockSize; + size_t blockSize = lastSubBlock ? remaining : targetCBlockSize; + size_t cSize = ZSTD_noCompressBlock(op, oend-op, ip, blockSize, lastSubBlock && lastBlock); + FORWARD_IF_ERROR(cSize); + ip += blockSize; + op += cSize; + } + return op-ostart; +} diff --git a/lib/compress/zstd_compress_superblock.h b/lib/compress/zstd_compress_superblock.h new file mode 100644 index 00000000..b1d72ec4 --- /dev/null +++ b/lib/compress/zstd_compress_superblock.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_ADVANCED_H +#define ZSTD_COMPRESS_ADVANCED_H + +/*-************************************* +* Dependencies +***************************************/ + +#include "zstd.h" /* ZSTD_CCtx */ + +/*-************************************* +* Target Compressed Block Size +***************************************/ + +/* ZSTD_compressSuperBlock() : + * Used to compress a super block when targetCBlockSize is being used. + * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + unsigned lastBlock); + +/* ZSTD_noCompressSuperBlock() : +* Used to break a super block into multiple uncompressed sub blocks +* when targetCBlockSize is being used. +* The given block will be broken into multiple uncompressed sub blocks that are +* around targetCBlockSize. */ +size_t ZSTD_noCompressSuperBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + size_t targetCBlockSize, + unsigned lastBlock); + +#endif /* ZSTD_COMPRESS_ADVANCED_H */ diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index a661a485..2e657f7c 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -96,7 +96,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( dictCParams->hashLog : hBitsL; const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? dictCParams->chainLog : hBitsS; - const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart); + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); @@ -271,7 +271,7 @@ _match_stored: U32 const repIndex2 = current2 - offset_2; const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState && repIndex2 < prefixLowestIndex ? - dictBase - dictIndexDelta + repIndex2 : + dictBase + repIndex2 - dictIndexDelta : base + repIndex2; if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 9ad7e03b..dd82830e 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -660,7 +660,7 @@ ZSTD_compressBlock_lazy_generic( const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? prefixLowestIndex - (U32)(dictEnd - dictBase) : 0; - const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest); + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); /* init */ ip += (dictAndPrefixLength == 0); diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index bc3062b5..d0542562 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -490,7 +490,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* /* Size the seq pool tables */ ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize)); /* Reset the window */ - ZSTD_window_clear(&serialState->ldmState.window); + ZSTD_window_init(&serialState->ldmState.window); serialState->ldmWindow = serialState->ldmState.window; /* Resize tables and output space if necessary. */ if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) { @@ -1076,7 +1076,7 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)", compressionLevel); mtctx->params.compressionLevel = compressionLevel; - { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, 0, 0); + { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0); cParams.windowLog = saved_wlog; mtctx->params.cParams = cParams; } @@ -1786,7 +1786,7 @@ static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range) BYTE const* const bufferStart = (BYTE const*)buffer.start; BYTE const* const bufferEnd = bufferStart + buffer.capacity; BYTE const* const rangeStart = (BYTE const*)range.start; - BYTE const* const rangeEnd = rangeStart + range.size; + BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart; if (rangeStart == NULL || bufferStart == NULL) return 0; diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index bb2d0a96..732e1c93 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -181,17 +181,29 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize /* fill DTable */ { U32 n; - for (n=0; n> 1; - U32 u; + size_t const nEnd = nbSymbols; + for (n=0; n> 1; + size_t const uStart = rankVal[w]; + size_t const uEnd = uStart + length; + size_t u; HUF_DEltX1 D; - D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); - for (u = rankVal[w]; u < rankVal[w] + length; u++) - dt[u] = D; - rankVal[w] += length; - } } - + D.byte = (BYTE)n; + D.nbBits = (BYTE)(tableLog + 1 - w); + rankVal[w] = (U32)uEnd; + if (length < 4) { + /* Use length in the loop bound so the compiler knows it is short. */ + for (u = 0; u < length; ++u) + dt[uStart + u] = D; + } else { + /* Unroll the loop 4 times, we know it is a power of 2. */ + for (u = uStart; u < uEnd; u += 4) { + dt[u + 0] = D; + dt[u + 1] = D; + dt[u + 2] = D; + dt[u + 3] = D; + } } } } return iSize; } @@ -282,6 +294,7 @@ HUF_decompress4X1_usingDTable_internal_body( { const BYTE* const istart = (const BYTE*) cSrc; BYTE* const ostart = (BYTE*) dst; BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - 3; const void* const dtPtr = DTable + 1; const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; @@ -306,9 +319,9 @@ HUF_decompress4X1_usingDTable_internal_body( BYTE* op2 = opStart2; BYTE* op3 = opStart3; BYTE* op4 = opStart4; - U32 endSignal = BIT_DStream_unfinished; DTableDesc const dtd = HUF_getDTableDesc(DTable); U32 const dtLog = dtd.tableLog; + U32 endSignal = 1; if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); @@ -317,8 +330,7 @@ HUF_decompress4X1_usingDTable_internal_body( CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) { + for ( ; (endSignal) & (op4 < olimit) ; ) { HUF_DECODE_SYMBOLX1_2(op1, &bitD1); HUF_DECODE_SYMBOLX1_2(op2, &bitD2); HUF_DECODE_SYMBOLX1_2(op3, &bitD3); @@ -335,10 +347,10 @@ HUF_decompress4X1_usingDTable_internal_body( HUF_DECODE_SYMBOLX1_0(op2, &bitD2); HUF_DECODE_SYMBOLX1_0(op3, &bitD3); HUF_DECODE_SYMBOLX1_0(op4, &bitD4); - BIT_reloadDStream(&bitD1); - BIT_reloadDStream(&bitD2); - BIT_reloadDStream(&bitD3); - BIT_reloadDStream(&bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; } /* check corruption */ @@ -757,7 +769,6 @@ HUF_decompress1X2_usingDTable_internal_body( return dstSize; } - FORCE_INLINE_TEMPLATE size_t HUF_decompress4X2_usingDTable_internal_body( void* dst, size_t dstSize, @@ -769,6 +780,7 @@ HUF_decompress4X2_usingDTable_internal_body( { const BYTE* const istart = (const BYTE*) cSrc; BYTE* const ostart = (BYTE*) dst; BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - (sizeof(size_t)-1); const void* const dtPtr = DTable+1; const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; @@ -793,7 +805,7 @@ HUF_decompress4X2_usingDTable_internal_body( BYTE* op2 = opStart2; BYTE* op3 = opStart3; BYTE* op4 = opStart4; - U32 endSignal; + U32 endSignal = 1; DTableDesc const dtd = HUF_getDTableDesc(DTable); U32 const dtLog = dtd.tableLog; @@ -804,8 +816,29 @@ HUF_decompress4X2_usingDTable_internal_body( CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); /* 16-32 symbols per loop (4-8 symbols per stream) */ - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) { + for ( ; (endSignal) & (op4 < olimit); ) { +#ifdef __clang__ + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; +#else HUF_DECODE_SYMBOLX2_2(op1, &bitD1); HUF_DECODE_SYMBOLX2_2(op2, &bitD2); HUF_DECODE_SYMBOLX2_2(op3, &bitD3); @@ -822,8 +855,11 @@ HUF_decompress4X2_usingDTable_internal_body( HUF_DECODE_SYMBOLX2_0(op2, &bitD2); HUF_DECODE_SYMBOLX2_0(op3, &bitD3); HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; +#endif } /* check corruption */ diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index dd4591b7..d4d42e79 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -618,7 +618,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, { const BYTE* ip = (const BYTE*)(*srcPtr); BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = ostart + dstCapacity; + BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; BYTE* op = ostart; size_t remainingSrcSize = *srcSizePtr; @@ -669,7 +669,9 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, if (ZSTD_isError(decodedSize)) return decodedSize; if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, op, decodedSize); - op += decodedSize; + if (decodedSize != 0) + op += decodedSize; + assert(ip != NULL); ip += cBlockSize; remainingSrcSize -= cBlockSize; if (blockProperties.lastBlock) break; @@ -776,7 +778,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, "error."); if (ZSTD_isError(res)) return res; assert(res <= dstCapacity); - dst = (BYTE*)dst + res; + if (res != 0) + dst = (BYTE*)dst + res; dstCapacity -= res; } moreThan1Frame = 1; @@ -1486,11 +1489,13 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) { - const char* const istart = (const char*)(input->src) + input->pos; - const char* const iend = (const char*)(input->src) + input->size; + const char* const src = (const char*)input->src; + const char* const istart = input->pos != 0 ? src + input->pos : src; + const char* const iend = input->size != 0 ? src + input->size : src; const char* ip = istart; - char* const ostart = (char*)(output->dst) + output->pos; - char* const oend = (char*)(output->dst) + output->size; + char* const dst = (char*)output->dst; + char* const ostart = output->pos != 0 ? dst + output->pos : dst; + char* const oend = output->size != 0 ? dst + output->size : dst; char* op = ostart; U32 someMoreWork = 1; diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 767e5f9a..ce43c525 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -715,7 +715,7 @@ size_t ZSTD_execSequence(BYTE* op, /* Errors and uncommon cases handled here. */ assert(oLitEnd < oMatchEnd); - if (iLitEnd > litLimit || oMatchEnd > oend_w) + if (UNLIKELY(iLitEnd > litLimit || oMatchEnd > oend_w)) return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ @@ -729,7 +729,7 @@ size_t ZSTD_execSequence(BYTE* op, */ assert(WILDCOPY_OVERLENGTH >= 16); ZSTD_copy16(op, (*litPtr)); - if (sequence.litLength > 16) { + if (UNLIKELY(sequence.litLength > 16)) { ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); } op = oLitEnd; @@ -738,7 +738,7 @@ size_t ZSTD_execSequence(BYTE* op, /* Copy Match */ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix -> go into extDict */ - RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected); + RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected); match = dictEnd + (match - prefixStart); if (match + sequence.matchLength <= dictEnd) { memmove(oLitEnd, match, sequence.matchLength); @@ -760,7 +760,7 @@ size_t ZSTD_execSequence(BYTE* op, /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy * without overlap checking. */ - if (sequence.offset >= WILDCOPY_VECLEN) { + if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { /* We bet on a full wildcopy for matches, since we expect matches to be * longer than literals (in general). In silesia, ~10% of matches are longer * than 16 bytes. @@ -802,6 +802,14 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) DStatePtr->state = DInfo.nextState + lowBits; } +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo) +{ + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) * bits before reloading. This value is the maximum number of bytes we read @@ -813,25 +821,26 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) : 0) typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; +typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) { seq_t seq; - U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits; - U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits; - U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits; - U32 const totalBits = llBits+mlBits+ofBits; - U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue; - U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue; - U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue; + ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; + ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state]; + ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state]; + U32 const llBase = llDInfo.baseValue; + U32 const mlBase = mlDInfo.baseValue; + U32 const ofBase = ofDInfo.baseValue; + BYTE const llBits = llDInfo.nbAdditionalBits; + BYTE const mlBits = mlDInfo.nbAdditionalBits; + BYTE const ofBits = ofDInfo.nbAdditionalBits; + BYTE const totalBits = llBits+mlBits+ofBits; /* sequence */ { size_t offset; - if (!ofBits) - offset = 0; - else { + if (ofBits > 1) { ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); assert(ofBits <= MaxOff); @@ -845,53 +854,89 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); } - } - - if (ofBits <= 1) { - offset += (llBase==0); - if (offset) { - size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } else { /* offset == 0 */ - offset = seqState->prevOffset[0]; - } - } else { seqState->prevOffset[2] = seqState->prevOffset[1]; seqState->prevOffset[1] = seqState->prevOffset[0]; seqState->prevOffset[0] = offset; - } + } else { + U32 const ll0 = (llBase == 0); + if (LIKELY((ofBits == 0))) { + if (LIKELY(!ll0)) + offset = seqState->prevOffset[0]; + else { + offset = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + } else { + offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); + { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } } } seq.offset = offset; } - seq.matchLength = mlBase - + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */ + seq.matchLength = mlBase; + if (mlBits > 0) + seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) BIT_reloadDStream(&seqState->DStream); - if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) BIT_reloadDStream(&seqState->DStream); /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); - seq.litLength = llBase - + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */ + seq.litLength = llBase; + if (llBits > 0) + seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - /* ANS state update */ - ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + if (prefetch == ZSTD_p_prefetch) { + size_t const pos = seqState->pos + seq.litLength; + const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; + seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : no memory access will occur, offset is only used for prefetching */ + seqState->pos = pos + seq.matchLength; + } + + /* ANS state update + * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). + * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). + * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the + * better option, so it is the default for other compilers. But, if you + * measure that it is worse, please put up a pull request. + */ + { +#if defined(__GNUC__) && !defined(__clang__) + const int kUseUpdateFseState = 1; +#else + const int kUseUpdateFseState = 0; +#endif + if (kUseUpdateFseState) { + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + } else { + ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */ + } + } return seq; } +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG FORCE_INLINE_TEMPLATE size_t DONT_VECTORIZE ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, @@ -914,6 +959,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, /* Regen sequences */ if (nbSeq) { seqState_t seqState; + size_t error = 0; dctx->fseEntropy = 1; { U32 i; for (i=0; ientropy.rep[i]; } RETURN_ERROR_IF( @@ -928,17 +974,25 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, BIT_DStream_endOfBuffer < BIT_DStream_completed && BIT_DStream_completed < BIT_DStream_overflow); - for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { - nbSeq--; - { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); - DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - op += oneSeqSize; - } } + for ( ; ; ) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + BIT_reloadDStream(&(seqState.DStream)); + /* gcc and clang both don't like early returns in this loop. + * gcc doesn't like early breaks either. + * Instead save an error and report it at the end. + * When there is an error, don't increment op, so we don't + * overwrite. + */ + if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize; + else op += oneSeqSize; + if (UNLIKELY(!--nbSeq)) break; + } /* check if reached exact end */ DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); + if (ZSTD_isError(error)) return error; RETURN_ERROR_IF(nbSeq, corruption_detected); RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected); /* save reps for next block */ @@ -965,87 +1019,7 @@ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ - - #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets) -{ - seq_t seq; - U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits; - U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits; - U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits; - U32 const totalBits = llBits+mlBits+ofBits; - U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue; - U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue; - U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue; - - /* sequence */ - { size_t offset; - if (!ofBits) - offset = 0; - else { - ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); - assert(ofBits <= MaxOff); - if (MEM_32bits() && longOffsets) { - U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1); - offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); - if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream); - if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); - } else { - offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); - } - } - - if (ofBits <= 1) { - offset += (llBase==0); - if (offset) { - size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } else { - offset = seqState->prevOffset[0]; - } - } else { - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } - seq.offset = offset; - } - - seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ - if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) - BIT_reloadDStream(&seqState->DStream); - if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) - BIT_reloadDStream(&seqState->DStream); - /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */ - ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); - - seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ - if (MEM_32bits()) - BIT_reloadDStream(&seqState->DStream); - - { size_t const pos = seqState->pos + seq.litLength; - const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; - seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. - * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */ - seqState->pos = pos + seq.matchLength; - } - - /* ANS state update */ - ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ - - return seq; -} - FORCE_INLINE_TEMPLATE size_t ZSTD_decompressSequencesLong_body( ZSTD_DCtx* dctx, @@ -1088,14 +1062,14 @@ ZSTD_decompressSequencesLong_body( /* prepare in advance */ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbcParams.windowLog); size_t cSize; if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */ @@ -731,7 +755,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, /* collect stats on all samples */ for (u=0; ust_mtim; + timebuf[1].tv_sec = statbuf->st_mtime; res += utimensat(AT_FDCWD, filename, timebuf, 0); } #endif @@ -126,21 +143,20 @@ int UTIL_setFileStat(const char *filename, stat_t *statbuf) res += chown(filename, statbuf->st_uid, statbuf->st_gid); /* Copy ownership */ #endif - res += chmod(filename, statbuf->st_mode & 07777); /* Copy file permissions */ + res += UTIL_chmod(filename, statbuf->st_mode & 07777); /* Copy file permissions */ errno = 0; return -res; /* number of errors is returned */ } -U32 UTIL_isDirectory(const char* infilename) +int UTIL_isDirectory(const char* infilename) { - int r; stat_t statbuf; #if defined(_MSC_VER) - r = _stat64(infilename, &statbuf); + int const r = _stat64(infilename, &statbuf); if (!r && (statbuf.st_mode & _S_IFDIR)) return 1; #else - r = stat(infilename, &statbuf); + int const r = stat(infilename, &statbuf); if (!r && S_ISDIR(statbuf.st_mode)) return 1; #endif return 0; @@ -170,28 +186,25 @@ int UTIL_isSameFile(const char* fName1, const char* fName2) #endif } -#ifndef _MSC_VER -/* Using this to distinguish named pipes */ -U32 UTIL_isFIFO(const char* infilename) +/* UTIL_isFIFO : distinguish named pipes */ +int UTIL_isFIFO(const char* infilename) { /* macro guards, as defined in : https://linux.die.net/man/2/lstat */ #if PLATFORM_POSIX_VERSION >= 200112L stat_t statbuf; - int r = UTIL_getFileStat(infilename, &statbuf); + int const r = UTIL_getFileStat(infilename, &statbuf); if (!r && S_ISFIFO(statbuf.st_mode)) return 1; #endif (void)infilename; return 0; } -#endif -U32 UTIL_isLink(const char* infilename) +int UTIL_isLink(const char* infilename) { /* macro guards, as defined in : https://linux.die.net/man/2/lstat */ #if PLATFORM_POSIX_VERSION >= 200112L - int r; stat_t statbuf; - r = lstat(infilename, &statbuf); + int const r = lstat(infilename, &statbuf); if (!r && S_ISLNK(statbuf.st_mode)) return 1; #endif (void)infilename; diff --git a/programs/util.h b/programs/util.h index 14e5b494..0065ec46 100644 --- a/programs/util.h +++ b/programs/util.h @@ -30,12 +30,12 @@ extern "C" { # include /* _chmod */ #else # include /* chown, stat */ -#if PLATFORM_POSIX_VERSION < 200809L -# include /* utime */ -#else -# include /* AT_FDCWD */ -# include /* utimensat */ -#endif +# if PLATFORM_POSIX_VERSION < 200809L +# include /* utime */ +# else +# include /* AT_FDCWD */ +# include /* utimensat */ +# endif #endif #include /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */ #include "mem.h" /* U32, U64 */ @@ -85,12 +85,6 @@ extern "C" { #endif -/*-************************************* -* Constants -***************************************/ -#define LIST_SIZE_INCREASE (8*1024) -#define MAX_FILE_OF_FILE_NAMES_SIZE (1<<20)*50 - /*-**************************************** * Compiler specifics ******************************************/ @@ -120,8 +114,8 @@ extern int g_utilDisplayLevel; * File functions ******************************************/ #if defined(_MSC_VER) - #define chmod _chmod typedef struct __stat64 stat_t; + typedef int mode_t; #else typedef struct stat stat_t; #endif @@ -129,22 +123,20 @@ extern int g_utilDisplayLevel; int UTIL_fileExist(const char* filename); int UTIL_isRegularFile(const char* infilename); -int UTIL_setFileStat(const char* filename, stat_t* statbuf); -U32 UTIL_isDirectory(const char* infilename); -int UTIL_getFileStat(const char* infilename, stat_t* statbuf); +int UTIL_isDirectory(const char* infilename); int UTIL_isSameFile(const char* file1, const char* file2); -int UTIL_compareStr(const void *p1, const void *p2); int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]); -const char* UTIL_getFileExtension(const char* infilename); +int UTIL_isLink(const char* infilename); +int UTIL_isFIFO(const char* infilename); -#ifndef _MSC_VER -U32 UTIL_isFIFO(const char* infilename); -#endif -U32 UTIL_isLink(const char* infilename); #define UTIL_FILESIZE_UNKNOWN ((U64)(-1)) U64 UTIL_getFileSize(const char* infilename); - -U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles); +U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbFiles); +int UTIL_getFileStat(const char* infilename, stat_t* statbuf); +int UTIL_setFileStat(const char* filename, stat_t* statbuf); +int UTIL_chmod(char const* filename, mode_t permissions); /*< like chmod, but avoid changing permission of /dev/null */ +int UTIL_compareStr(const void *p1, const void *p2); +const char* UTIL_getFileExtension(const char* infilename); /*-**************************************** diff --git a/programs/zstd.1.md b/programs/zstd.1.md index edb5ea0a..6a63e978 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -405,7 +405,7 @@ The list of available _options_: Bigger hash tables cause less collisions which usually makes compression faster, but requires more memory during compression. - The minimum _hlog_ is 6 (64 B) and the maximum is 26 (128 MiB). + The minimum _hlog_ is 6 (64 B) and the maximum is 30 (1 GiB). - `chainLog`=_clog_, `clog`=_clog_: Specify the maximum number of bits for a hash chain or a binary tree. @@ -416,7 +416,8 @@ The list of available _options_: compression. This option is ignored for the ZSTD_fast strategy. - The minimum _clog_ is 6 (64 B) and the maximum is 28 (256 MiB). + The minimum _clog_ is 6 (64 B) and the maximum is 29 (524 Mib) on 32-bit platforms + and 30 (1 Gib) on 64-bit platforms. - `searchLog`=_slog_, `slog`=_slog_: Specify the maximum number of searches in a hash chain or a binary tree @@ -425,7 +426,7 @@ The list of available _options_: More searches increases the chance to find a match which usually increases compression ratio but decreases compression speed. - The minimum _slog_ is 1 and the maximum is 26. + The minimum _slog_ is 1 and the maximum is 'windowLog' - 1. - `minMatch`=_mml_, `mml`=_mml_: Specify the minimum searched length of a match in a hash table. @@ -450,7 +451,7 @@ The list of available _options_: For all other strategies, this field has no impact. - The minimum _tlen_ is 0 and the maximum is 999. + The minimum _tlen_ is 0 and the maximum is 128 Kib. - `overlapLog`=_ovlog_, `ovlog`=_ovlog_: Determine `overlapSize`, amount of data reloaded from previous job. @@ -473,7 +474,7 @@ The list of available _options_: Bigger hash tables usually improve compression ratio at the expense of more memory during compression and a decrease in compression speed. - The minimum _lhlog_ is 6 and the maximum is 26 (default: 20). + The minimum _lhlog_ is 6 and the maximum is 30 (default: 20). - `ldmMinMatch`=_lmml_, `lmml`=_lmml_: Specify the minimum searched length of a match for long distance matching. @@ -493,7 +494,7 @@ The list of available _options_: Larger bucket sizes improve collision resolution but decrease compression speed. - The minimum _lblog_ is 0 and the maximum is 8 (default: 3). + The minimum _lblog_ is 1 and the maximum is 8 (default: 3). - `ldmHashRateLog`=_lhrlog_, `lhrlog`=_lhrlog_: Specify the frequency of inserting entries into the long distance matching diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 57dbaa82..cf2f3682 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -1005,12 +1005,10 @@ int main(int argCount, const char* argv[]) unsigned u, fileNamesNb; unsigned const nbFilenames = (unsigned)filenames->tableSize; for (u=0, fileNamesNb=0; ufileNames[u]) -#ifndef _MSC_VER - && !UTIL_isFIFO(filenames->fileNames[u]) -#endif /* _MSC_VER */ + if ( UTIL_isLink(filenames->fileNames[u]) + && !UTIL_isFIFO(filenames->fileNames[u]) ) { - DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring\n", filenames->fileNames[u]); + DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring \n", filenames->fileNames[u]); } else { filenames->fileNames[fileNamesNb++] = filenames->fileNames[u]; } } diff --git a/tests/fuzz/.gitignore b/tests/fuzz/.gitignore index 9409cf83..e2563097 100644 --- a/tests/fuzz/.gitignore +++ b/tests/fuzz/.gitignore @@ -2,7 +2,13 @@ corpora block_decompress block_round_trip +dictionary_decompress +dictionary_loader +dictionary_round_trip +simple_compress simple_decompress simple_round_trip stream_decompress stream_round_trip +zstd_frame_info +fuzz-*.log \ No newline at end of file diff --git a/tests/fuzz/dictionary_round_trip.c b/tests/fuzz/dictionary_round_trip.c index 9411b50a..4a32b82b 100644 --- a/tests/fuzz/dictionary_round_trip.c +++ b/tests/fuzz/dictionary_round_trip.c @@ -73,7 +73,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) size_t const rBufSize = size; void* rBuf = malloc(rBufSize); - size_t cBufSize = ZSTD_compressBound(size); + size_t cBufSize = ZSTD_compressBound(size) * 2; void *cBuf; /* Half of the time fuzz with a 1 byte smaller output size. * This will still succeed because we force the checksum to be disabled, diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c index 2d1d0598..6a65b190 100644 --- a/tests/fuzz/simple_round_trip.c +++ b/tests/fuzz/simple_round_trip.c @@ -48,7 +48,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { size_t const rBufSize = size; void* rBuf = malloc(rBufSize); - size_t cBufSize = ZSTD_compressBound(size); + size_t cBufSize = ZSTD_compressBound(size) * 2; void* cBuf; /* Give a random portion of src data to the producer, to use for diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index 90bf1a15..313b463f 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -96,6 +96,9 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer); } + if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { + setRand(cctx, ZSTD_c_targetCBlockSize, ZSTD_TARGETCBLOCKSIZE_MIN, ZSTD_TARGETCBLOCKSIZE_MAX, producer); + } } FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer) diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 88f3b83f..a5991644 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -490,6 +490,19 @@ static int basicUnitTests(U32 const seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : compress a NULL input with each level : ", testNb++); + { int level = -1; + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + if (!cctx) goto _output_error; + for (level = -1; level <= ZSTD_maxCLevel(); ++level) { + CHECK_Z( ZSTD_compress(compressedBuffer, compressedBufferSize, NULL, 0, level) ); + CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level) ); + CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, NULL, 0) ); + } + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3d : check CCtx size after compressing empty input : ", testNb++); { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); size_t const r = ZSTD_compressCCtx(cctx, compressedBuffer, compressedBufferSize, NULL, 0, 19); @@ -884,6 +897,28 @@ static int basicUnitTests(U32 const seed, double compressibility) ZSTDMT_freeCCtx(mtctx); } + DISPLAYLEVEL(3, "test%3u : compress empty string and decompress with small window log : ", testNb++); + { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + char out[32]; + if (cctx == NULL || dctx == NULL) goto _output_error; + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0) ); + CHECK_VAR(cSize, ZSTD_compress2(cctx, out, sizeof(out), NULL, 0) ); + DISPLAYLEVEL(3, "OK (%u bytes)\n", (unsigned)cSize); + + CHECK( ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, 10) ); + { char const* outPtr = out; + ZSTD_inBuffer inBuffer = { outPtr, cSize, 0 }; + ZSTD_outBuffer outBuffer = { NULL, 0, 0 }; + size_t dSize; + CHECK_VAR(dSize, ZSTD_decompressStream(dctx, &outBuffer, &inBuffer) ); + if (dSize != 0) goto _output_error; + } + + ZSTD_freeDCtx(dctx); + ZSTD_freeCCtx(cctx); + } + DISPLAYLEVEL(3, "test%3i : compress -T2 with/without literals compression : ", testNb++) { ZSTD_CCtx* cctx = ZSTD_createCCtx(); size_t cSize1, cSize2; @@ -1137,6 +1172,7 @@ static int basicUnitTests(U32 const seed, double compressibility) size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t)); size_t dictSize; U32 dictID; + size_t dictHeaderSize; if (dictBuffer==NULL || samplesSizes==NULL) { free(dictBuffer); @@ -1226,6 +1262,29 @@ static int basicUnitTests(U32 const seed, double compressibility) if (dictID==0) goto _output_error; DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictID); + DISPLAYLEVEL(3, "test%3i : check dict header size no error : ", testNb++); + dictHeaderSize = ZDICT_getDictHeaderSize(dictBuffer, dictSize); + if (dictHeaderSize==0) goto _output_error; + DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize); + + DISPLAYLEVEL(3, "test%3i : check dict header size correctness : ", testNb++); + { unsigned char const dictBufferFixed[144] = { 0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f, + 0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01, + 0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18, + 0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c, + 0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04, + 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61, + 0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65, + 0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69 }; + dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, 144); + if (dictHeaderSize != 115) goto _output_error; + } + DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize); + DISPLAYLEVEL(3, "test%3i : compress with dictionary : ", testNb++); cSize = ZSTD_compress_usingDict(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize, @@ -1999,14 +2058,14 @@ static int basicUnitTests(U32 const seed, double compressibility) /* long rle test */ { size_t sampleSize = 0; + size_t expectedCompressedSize = 39; /* block 1, 2: compressed, block 3: RLE, zstd 1.4.4 */ DISPLAYLEVEL(3, "test%3i : Long RLE test : ", testNb++); - RDG_genBuffer(CNBuffer, sampleSize, compressibility, 0., seed+1); memset((char*)CNBuffer+sampleSize, 'B', 256 KB - 1); sampleSize += 256 KB - 1; - RDG_genBuffer((char*)CNBuffer+sampleSize, 96 KB, compressibility, 0., seed+2); + memset((char*)CNBuffer+sampleSize, 'A', 96 KB); sampleSize += 96 KB; cSize = ZSTD_compress(compressedBuffer, ZSTD_compressBound(sampleSize), CNBuffer, sampleSize, 1); - if (ZSTD_isError(cSize)) goto _output_error; + if (ZSTD_isError(cSize) || cSize > expectedCompressedSize) goto _output_error; { CHECK_NEWV(regenSize, ZSTD_decompress(decodedBuffer, sampleSize, compressedBuffer, cSize)); if (regenSize!=sampleSize) goto _output_error; } DISPLAYLEVEL(3, "OK \n"); diff --git a/tests/playTests.sh b/tests/playTests.sh index 05951ab2..38582f07 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -92,6 +92,11 @@ case "$UNAME" in *) MD5SUM="md5sum" ;; esac +MTIME="stat -c %Y" +case "$UNAME" in + Darwin | FreeBSD | OpenBSD) MTIME="stat -f %m" ;; +esac + DIFF="diff" case "$UNAME" in SunOS) DIFF="gdiff" ;; @@ -215,20 +220,19 @@ $ZSTD tmp -c --compress-literals -19 | $ZSTD -t $ZSTD -b --fast=1 -i0e1 tmp --compress-literals $ZSTD -b --fast=1 -i0e1 tmp --no-compress-literals -println "test: --exclude-compressed flag" +println "\n===> --exclude-compressed flag" rm -rf precompressedFilterTestDir mkdir -p precompressedFilterTestDir ./datagen $size > precompressedFilterTestDir/input.5 ./datagen $size > precompressedFilterTestDir/input.6 $ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir -sleep 5 ./datagen $size > precompressedFilterTestDir/input.7 ./datagen $size > precompressedFilterTestDir/input.8 $ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir test ! -f precompressedFilterTestDir/input.5.zst.zst test ! -f precompressedFilterTestDir/input.6.zst.zst -file1timestamp=`date -r precompressedFilterTestDir/input.5.zst +%s` -file2timestamp=`date -r precompressedFilterTestDir/input.7.zst +%s` +file1timestamp=`$MTIME precompressedFilterTestDir/input.5.zst` +file2timestamp=`$MTIME precompressedFilterTestDir/input.7.zst` if [[ $file2timestamp -ge $file1timestamp ]]; then println "Test is successful. input.5.zst is precompressed and therefore not compressed/modified again." else @@ -246,7 +250,7 @@ test -f precompressedFilterTestDir/input.5.zst.zst test -f precompressedFilterTestDir/input.6.zst.zst println "Test completed" -println "test : file removal" +println "\n===> file removal" $ZSTD -f --rm tmp test ! -f tmp # tmp should no longer be present $ZSTD -f -d --rm tmp.zst @@ -274,14 +278,17 @@ test -f tmp.zst # destination file should still be present rm -rf tmp* # may also erase tmp* directory from previous failed run -println "\n===> decompression only tests " -head -c 1048576 /dev/zero > tmp +println "\n===> decompression only tests " +# the following test verifies that the decoder is compatible with RLE as first block +# older versions of zstd cli are not able to decode such corner case. +# As a consequence, the zstd cli do not generate them, to maintain compatibility with older versions. +dd bs=1048576 count=1 if=/dev/zero of=tmp $ZSTD -d -o tmp1 "$TESTDIR/golden-decompression/rle-first-block.zst" $DIFF -s tmp1 tmp rm tmp* -println "test : compress multiple files" +println "\n===> compress multiple files" println hello > tmp1 println world > tmp2 $ZSTD tmp1 tmp2 -o "$INTOVOID" -f @@ -328,7 +335,22 @@ $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!" rm tmp* -println "test : compress multiple files into an output directory, --output-dir-flat" +if [ -n "$DEVNULLRIGHTS" ] +then + # these tests requires sudo rights, which is uncommon. + # they are only triggered if DEVNULLRIGHTS macro is defined. + println "\n===> checking /dev/null permissions are unaltered " + ./datagen > tmp + sudo $ZSTD tmp -o $INTOVOID # sudo rights could modify /dev/null permissions + sudo $ZSTD tmp -c > $INTOVOID + $ZSTD tmp -f -o tmp.zst + sudo $ZSTD -d tmp.zst -c > $INTOVOID + sudo $ZSTD -d tmp.zst -o $INTOVOID + ls -las $INTOVOID | grep "rw-rw-rw-" +fi + + +println "\n===> compress multiple files into an output directory, --output-dir-flat" println henlo > tmp1 mkdir tmpInputTestDir mkdir tmpInputTestDir/we @@ -436,7 +458,6 @@ $ZSTD -dcf tmp1 println "\n===> frame concatenation " - println "hello " > hello.tmp println "world!" > world.tmp cat hello.tmp world.tmp > helloworld.tmp @@ -1241,9 +1262,9 @@ rm -f tmp* dictionary if [ "$isWindows" = false ] ; then println "\n===> zstd fifo named pipe test " -head -c 10 /dev/zero > tmp_original +dd bs=1 count=10 if=/dev/zero of=tmp_original mkfifo named_pipe -head -c 10 /dev/zero > named_pipe & +dd bs=1 count=10 if=/dev/zero of=named_pipe & $ZSTD named_pipe -o tmp_compressed $ZSTD -d -o tmp_decompressed tmp_compressed $DIFF -s tmp_original tmp_decompressed diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 2047d4bd..549a4981 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -509,6 +509,33 @@ static int basicUnitTests(U32 seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : NULL buffers : ", testNb++); + inBuff.src = NULL; + inBuff.size = 0; + inBuff.pos = 0; + outBuff.dst = NULL; + outBuff.size = 0; + outBuff.pos = 0; + CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); + CHECK(inBuff.pos != inBuff.size, "Entire input should be consumed"); + CHECK_Z( ZSTD_endStream(zc, &outBuff) ); + outBuff.dst = (char*)(compressedBuffer); + outBuff.size = compressedBufferSize; + outBuff.pos = 0; + { size_t const r = ZSTD_endStream(zc, &outBuff); + CHECK(r != 0, "Error or some data not flushed (ret=%zu)", r); + } + inBuff.src = outBuff.dst; + inBuff.size = outBuff.pos; + inBuff.pos = 0; + outBuff.dst = NULL; + outBuff.size = 0; + outBuff.pos = 0; + CHECK_Z( ZSTD_initDStream(zd) ); + { size_t const ret = ZSTD_decompressStream(zd, &outBuff, &inBuff); + if (ret != 0) goto _output_error; + } + DISPLAYLEVEL(3, "OK\n"); /* _srcSize compression test */ DISPLAYLEVEL(3, "test%3i : compress_srcSize %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); CHECK_Z( ZSTD_initCStream_srcSize(zc, 1, CNBufferSize) ); diff --git a/zlibWrapper/Makefile b/zlibWrapper/Makefile index 6addb743..feed5b84 100644 --- a/zlibWrapper/Makefile +++ b/zlibWrapper/Makefile @@ -20,8 +20,8 @@ TEST_FILE = ../doc/zstd_compression_format.md CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -I$(ZLIB_PATH) -I$(PROGRAMS_PATH) \ -I$(ZSTDLIBDIR) -I$(ZSTDLIBDIR)/common -I$(ZLIBWRAPPER_PATH) -STDFLAGS = -std=c90 -pedantic -Wno-long-long -Wno-variadic-macros -Wc++-compat \ - -DNO_snprintf -DNO_vsnprintf # strict ISO C90 is missing these prototypes +STDFLAGS = -std=c89 -pedantic -Wno-long-long -Wno-variadic-macros -Wc++-compat \ + -DNO_snprintf -DNO_vsnprintf # strict ANSI C89 is missing these prototypes DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wswitch-enum \ -Wdeclaration-after-statement -Wstrict-prototypes -Wundef \ -Wstrict-aliasing=1