diff --git a/CHANGELOG b/CHANGELOG index ae54896a..44600267 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,8 +1,32 @@ +v1.4.4 +perf: Improved decompression speed, by > 10%, by @terrelln +perf: Better compression speed when re-using a context, by @felixhandte +perf: Fix compression ratio when compressing large files with small dictionary, by @senhuang42 +perf: zstd reference encoder can generate RLE blocks, by @bimbashrestha +perf: minor generic speed optimization, by @davidbolvansky +api: new ability to extract sequences from the parser for analysis, by @bimbashrestha +api: fixed decoding of magic-less frames, by @terrelln +api: fixed ZSTD_initCStream_advanced() performance with fast modes, reported by @QrczakMK +cli: Named pipes support, by @bimbashrestha +cli: short tar's extension support, by @stokito +cli: command --output-dir-flat= , generates target files into requested directory, by @senhuang42 +cli: commands --stream-size=# and --size-hint=#, by @nmagerko +cli: faster `-t` test mode +cli: improved some error messages, by @vangyzen +cli: rare deadlock condition within dictionary builder, by @terrelln +build: single-file decoder with emscripten compilation script, by @cwoffenden +build: fixed zlibWrapper compilation on Visual Studio, reported by @bluenlive +build: fixed deprecation warning for certain gcc version, reported by @jasonma163 +build: fix compilation on old gcc versions, by @cemeyer +build: improved installation directories for cmake script, by Dmitri Shubin +pack: modified pkgconfig, for better integration into openwrt, requested by @neheb +misc: Improved documentation : ZSTD_CLEVEL, DYNAMIC_BMI2, ZSTD_CDict, function deprecation, zstd format +misc: fixed educational decoder : accept larger literals section, and removed UNALIGNED() macro + v1.4.3 bug: Fix Dictionary Compression Ratio Regression by @cyan4973 (#1709) -bug: Fix Buffer Overflow in v0.3 Decompression by @felixhandte (#1722) +bug: Fix Buffer Overflow in legacy v0.3 decompression by @felixhandte (#1722) build: Add support for IAR C/C++ Compiler for Arm by @joseph0918 (#1705) -misc: Add NULL pointer check in util.c by @leeyoung624 (#1706) v1.4.2 bug: Fix bug in zstd-0.5 decoder by @terrelln (#1696) diff --git a/build/cmake/README.md b/build/cmake/README.md index 854389ad..73b30dc7 100644 --- a/build/cmake/README.md +++ b/build/cmake/README.md @@ -5,9 +5,9 @@ use case sensitivity that matches modern (ie. cmake version 2.6 and above) conventions of using lower-case for commands, and upper-case for variables. -# How to build +## How to build -As cmake doesn't support command like `cmake clean`, it's recommanded to perform a "out of source build". +As cmake doesn't support command like `cmake clean`, it's recommended to perform a "out of source build". To do this, you can create a new directory and build in it: ```sh cd build/cmake @@ -16,7 +16,7 @@ cd builddir cmake .. make ``` -Then you can clean all cmake caches by simpily delete the new directory: +Then you can clean all cmake caches by simply delete the new directory: ```sh rm -rf build/cmake/builddir ``` @@ -34,19 +34,19 @@ cd build/cmake/builddir cmake -LH .. ``` -Bool options can be set to ON/OFF with -D\[option\]=\[ON/OFF\]. You can configure cmake options like this: +Bool options can be set to `ON/OFF` with `-D[option]=[ON/OFF]`. You can configure cmake options like this: ```sh cd build/cmake/builddir cmake -DZSTD_BUILD_TESTS=ON -DZSTD_LEGACY_SUPPORT=ON .. make ``` -## referring +### referring [Looking for a 'cmake clean' command to clear up CMake output](https://stackoverflow.com/questions/9680420/looking-for-a-cmake-clean-command-to-clear-up-cmake-output) -# CMake Style Recommendations +## CMake Style Recommendations -## Indent all code correctly, i.e. the body of +### Indent all code correctly, i.e. the body of * if/else/endif * foreach/endforeach @@ -57,7 +57,7 @@ make Use spaces for indenting, 2, 3 or 4 spaces preferably. Use the same amount of spaces for indenting as is used in the rest of the file. Do not use tabs. -## Upper/lower casing +### Upper/lower casing Most important: use consistent upper- or lowercasing within one file ! @@ -77,7 +77,7 @@ Add_Executable(hello hello.c) aDd_ExEcUtAbLe(blub blub.c) ``` -## End commands +### End commands To make the code easier to read, use empty commands for endforeach(), endif(), endfunction(), endmacro() and endwhile(). Also, use empty else() commands. @@ -99,6 +99,6 @@ if(BARVAR) endif(BARVAR) ``` -## Other resources for best practices +### Other resources for best practices -`https://cmake.org/cmake/help/latest/manual/cmake-developer.7.html#modules` +https://cmake.org/cmake/help/latest/manual/cmake-developer.7.html#modules diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index 77b389ca..7adca875 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -134,11 +134,10 @@ if (UNIX) # pkg-config set(PREFIX "${CMAKE_INSTALL_PREFIX}") set(LIBDIR "${CMAKE_INSTALL_FULL_LIBDIR}") - set(INCLUDEDIR "${CMAKE_INSTALL_FULL_INCLUDEDIR}") set(VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}") add_custom_target(libzstd.pc ALL ${CMAKE_COMMAND} -DIN="${LIBRARY_DIR}/libzstd.pc.in" -DOUT="libzstd.pc" - -DPREFIX="${PREFIX}" -DLIBDIR="${LIBDIR}" -DINCLUDEDIR="${INCLUDEDIR}" -DVERSION="${VERSION}" + -DPREFIX="${PREFIX}" -DVERSION="${VERSION}" -P "${CMAKE_CURRENT_SOURCE_DIR}/pkgconfig.cmake" COMMENT "Creating pkg-config file") diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 21ba000c..7fa1a8d1 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -692,12 +692,17 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel); -When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once. - ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost. +
When compressing multiple messages or blocks using the same dictionary, + it's recommended to digest the dictionary only once, since it's a costly operation. + ZSTD_createCDict() will create a state from digesting a dictionary. + The resulting state can be used for future compression operations with very limited startup cost. ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. - `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict. - Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content. - Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. + @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + in which case the only thing that it transports is the @compressionLevel. + This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + expecting a ZSTD_CDict parameter with any data, including those without a known dictionary.
size_t ZSTD_freeCDict(ZSTD_CDict* CDict); @@ -969,6 +974,12 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); * This method is effective when the dictionary sizes are very small relative * to the input size, and the input size is fairly large to begin with. * + * - The CDict's tables are not used at all, and instead we use the working + * context alone to reload the dictionary and use params based on the source + * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). + * This method is effective when the dictionary sizes are very small relative + * to the input size, and the input size is fairly large to begin with. + * * Zstd has a simple internal heuristic that selects which strategy to use * at the beginning of a compression. However, if experimentation shows that * Zstd is making poor choices, it is possible to override that choice with diff --git a/lib/Makefile b/lib/Makefile index 87a396c5..273ceb90 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -244,8 +244,6 @@ libzstd.pc: libzstd.pc: libzstd.pc.in @echo creating pkgconfig @sed -e 's|@PREFIX@|$(PREFIX)|' \ - -e 's|@LIBDIR@|$(LIBDIR)|' \ - -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \ -e 's|@VERSION@|$(VERSION)|' \ $< >$@ diff --git a/lib/libzstd.pc.in b/lib/libzstd.pc.in index 1d07b91f..e7880be4 100644 --- a/lib/libzstd.pc.in +++ b/lib/libzstd.pc.in @@ -3,8 +3,9 @@ # BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) prefix=@PREFIX@ -libdir=@LIBDIR@ -includedir=@INCLUDEDIR@ +exec_prefix=${prefix} +includedir=${prefix}/include +libdir=${exec_prefix}/lib Name: zstd Description: fast lossless compression algorithm library diff --git a/lib/zstd.h b/lib/zstd.h index 24d23ff8..72080ea8 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -808,12 +808,17 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, typedef struct ZSTD_CDict_s ZSTD_CDict; /*! ZSTD_createCDict() : - * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once. - * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost. + * When compressing multiple messages or blocks using the same dictionary, + * it's recommended to digest the dictionary only once, since it's a costly operation. + * ZSTD_createCDict() will create a state from digesting a dictionary. + * The resulting state can be used for future compression operations with very limited startup cost. * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. - * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict. - * Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content. - * Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */ + * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + * in which case the only thing that it transports is the @compressionLevel. + * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel); @@ -1167,7 +1172,7 @@ typedef enum { * tables. However, this model incurs no start-up cost (as long as the * working context's tables can be reused). For small inputs, this can be * faster than copying the CDict's tables. - * + * * - The CDict's tables are not used at all, and instead we use the working * context alone to reload the dictionary and use params based on the source * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). diff --git a/programs/fileio.c b/programs/fileio.c index 0365f144..828878c6 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1496,17 +1496,17 @@ FIO_determineCompressedName(const char* srcFileName, const char* outDirName, con static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ char* outDirFilename = NULL; size_t sfnSize = strlen(srcFileName); - size_t const suffixSize = strlen(suffix); + size_t const srcSuffixLen = strlen(suffix); if (outDirName) { - outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, suffixSize); + outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen); sfnSize = strlen(outDirFilename); assert(outDirFilename != NULL); } - if (dfnbCapacity <= sfnSize+suffixSize+1) { + if (dfnbCapacity <= sfnSize+srcSuffixLen+1) { /* resize buffer for dstName */ free(dstFileNameBuffer); - dfnbCapacity = sfnSize + suffixSize + 30; + dfnbCapacity = sfnSize + srcSuffixLen + 30; dstFileNameBuffer = (char*)malloc(dfnbCapacity); if (!dstFileNameBuffer) { EXM_THROW(30, "zstd: %s", strerror(errno)); @@ -1520,7 +1520,7 @@ FIO_determineCompressedName(const char* srcFileName, const char* outDirName, con } else { memcpy(dstFileNameBuffer, srcFileName, sfnSize); } - memcpy(dstFileNameBuffer+sfnSize, suffix, suffixSize+1 /* Include terminating null */); + memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */); return dstFileNameBuffer; } @@ -2287,6 +2287,37 @@ int FIO_decompressFilename(FIO_prefs_t* const prefs, return decodingError; } +static const char *suffixList[] = { + ZSTD_EXTENSION, + TZSTD_EXTENSION, +#ifdef ZSTD_GZDECOMPRESS + GZ_EXTENSION, + TGZ_EXTENSION, +#endif +#ifdef ZSTD_LZMADECOMPRESS + LZMA_EXTENSION, + XZ_EXTENSION, + TXZ_EXTENSION, +#endif +#ifdef ZSTD_LZ4DECOMPRESS + LZ4_EXTENSION, + TLZ4_EXTENSION, +#endif + NULL +}; + +static const char *suffixListStr = + ZSTD_EXTENSION "/" TZSTD_EXTENSION +#ifdef ZSTD_GZDECOMPRESS + "/" GZ_EXTENSION "/" TGZ_EXTENSION +#endif +#ifdef ZSTD_LZMADECOMPRESS + "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION +#endif +#ifdef ZSTD_LZ4DECOMPRESS + "/" LZ4_EXTENSION "/" TLZ4_EXTENSION +#endif +; /* FIO_determineDstName() : * create a destination filename from a srcFileName. @@ -2297,71 +2328,78 @@ FIO_determineDstName(const char* srcFileName, const char* outDirName) { static size_t dfnbCapacity = 0; static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ + size_t dstFileNameEndPos; char* outDirFilename = NULL; + const char* dstSuffix = ""; + size_t dstSuffixLen = 0; + size_t sfnSize = strlen(srcFileName); - size_t suffixSize; - const char* const suffixPtr = strrchr(srcFileName, '.'); - if (suffixPtr == NULL) { - DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n", - srcFileName); + size_t srcSuffixLen; + const char* const srcSuffix = strrchr(srcFileName, '.'); + if (srcSuffix == NULL) { + DISPLAYLEVEL(1, + "zstd: %s: unknown suffix (%s expected). " + "Can't derive the output file name. " + "Specify it with -o dstFileName. Ignoring.\n", + srcFileName, suffixListStr); return NULL; } - suffixSize = strlen(suffixPtr); + srcSuffixLen = strlen(srcSuffix); - /* check suffix is authorized */ - if (sfnSize <= suffixSize - || ( strcmp(suffixPtr, ZSTD_EXTENSION) - #ifdef ZSTD_GZDECOMPRESS - && strcmp(suffixPtr, GZ_EXTENSION) - #endif - #ifdef ZSTD_LZMADECOMPRESS - && strcmp(suffixPtr, XZ_EXTENSION) - && strcmp(suffixPtr, LZMA_EXTENSION) - #endif - #ifdef ZSTD_LZ4DECOMPRESS - && strcmp(suffixPtr, LZ4_EXTENSION) - #endif - ) ) { - const char* suffixlist = ZSTD_EXTENSION - #ifdef ZSTD_GZDECOMPRESS - "/" GZ_EXTENSION - #endif - #ifdef ZSTD_LZMADECOMPRESS - "/" XZ_EXTENSION "/" LZMA_EXTENSION - #endif - #ifdef ZSTD_LZ4DECOMPRESS - "/" LZ4_EXTENSION - #endif - ; - DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s expected) -- ignored \n", - srcFileName, suffixlist); - return NULL; + { + const char** matchedSuffixPtr; + for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) { + if (!strcmp(*matchedSuffixPtr, srcSuffix)) { + break; + } + } + + /* check suffix is authorized */ + if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) { + DISPLAYLEVEL(1, + "zstd: %s: unknown suffix (%s expected). " + "Can't derive the output file name. " + "Specify it with -o dstFileName. Ignoring.\n", + srcFileName, suffixListStr); + return NULL; + } + + if ((*matchedSuffixPtr)[1] == 't') { + dstSuffix = ".tar"; + dstSuffixLen = strlen(dstSuffix); + } } + if (outDirName) { outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0); sfnSize = strlen(outDirFilename); assert(outDirFilename != NULL); } - if (dfnbCapacity+suffixSize <= sfnSize+1) { + if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) { /* allocate enough space to write dstFilename into it */ free(dstFileNameBuffer); dfnbCapacity = sfnSize + 20; dstFileNameBuffer = (char*)malloc(dfnbCapacity); if (dstFileNameBuffer==NULL) - EXM_THROW(74, "%s : not enough memory for dstFileName", strerror(errno)); + EXM_THROW(74, "%s : not enough memory for dstFileName", + strerror(errno)); } /* return dst name == src name truncated from suffix */ assert(dstFileNameBuffer != NULL); + dstFileNameEndPos = sfnSize - srcSuffixLen; if (outDirFilename) { - memcpy(dstFileNameBuffer, outDirFilename, sfnSize - suffixSize); + memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos); free(outDirFilename); } else { - memcpy(dstFileNameBuffer, srcFileName, sfnSize - suffixSize); + memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos); } - dstFileNameBuffer[sfnSize-suffixSize] = '\0'; + + /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar" + * extension on decompression. Also writes terminating null. */ + strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix); return dstFileNameBuffer; /* note : dstFileNameBuffer memory is not going to be free */ diff --git a/programs/fileio.h b/programs/fileio.h index 4b0143be..af2c5d9d 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -30,11 +30,23 @@ extern "C" { #else # define nulmark "/dev/null" #endif + +/** + * We test whether the extension we found starts with 't', and if so, we append + * ".tar" to the end of the output name. + */ #define LZMA_EXTENSION ".lzma" #define XZ_EXTENSION ".xz" +#define TXZ_EXTENSION ".txz" + #define GZ_EXTENSION ".gz" +#define TGZ_EXTENSION ".tgz" + #define ZSTD_EXTENSION ".zst" +#define TZSTD_EXTENSION ".tzst" + #define LZ4_EXTENSION ".lz4" +#define TLZ4_EXTENSION ".tlz4" /*-************************************* diff --git a/tests/playTests.sh b/tests/playTests.sh index 8adf97df..cd4d5045 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -896,6 +896,46 @@ if [ $LZ4MODE -ne 1 ]; then grep ".lz4" tmplg > $INTOVOID && die "Unsupported suffix listed" fi +println "\n===> tar extension tests " + +rm -f tmp tmp.tar tmp.tzst tmp.tgz tmp.txz tmp.tlz4 + +./datagen > tmp +tar cf tmp.tar tmp +$ZSTD tmp.tar -o tmp.tzst +rm tmp.tar +$ZSTD -d tmp.tzst +[ -e tmp.tar ] || die ".tzst failed to decompress to .tar!" +rm -f tmp.tar tmp.tzst + +if [ $GZIPMODE -eq 1 ]; then + tar czf tmp.tgz tmp + $ZSTD -d tmp.tgz + [ -e tmp.tar ] || die ".tgz failed to decompress to .tar!" + rm -f tmp.tar tmp.tgz +fi + +if [ $LZMAMODE -eq 1 ]; then + tar c tmp | $ZSTD --format=xz > tmp.txz + $ZSTD -d tmp.txz + [ -e tmp.tar ] || die ".txz failed to decompress to .tar!" + rm -f tmp.tar tmp.txz +fi + +if [ $LZ4MODE -eq 1 ]; then + tar c tmp | $ZSTD --format=lz4 > tmp.tlz4 + $ZSTD -d tmp.tlz4 + [ -e tmp.tar ] || die ".tlz4 failed to decompress to .tar!" + rm -f tmp.tar tmp.tlz4 +fi + +touch tmp.t tmp.tz tmp.tzs +! $ZSTD -d tmp.t +! $ZSTD -d tmp.tz +! $ZSTD -d tmp.tzs + +exit + println "\n===> zstd round-trip tests " roundTripTest diff --git a/zlibWrapper/gzread.c b/zlibWrapper/gzread.c index bcac9700..359d1788 100644 --- a/zlibWrapper/gzread.c +++ b/zlibWrapper/gzread.c @@ -8,6 +8,14 @@ #include "gzguts.h" +/* fix for Visual Studio, which doesn't support ssize_t type. + * see https://github.com/facebook/zstd/issues/1800#issuecomment-545945050 */ +#if defined(_MSC_VER) && !defined(ssize_t) +# include+ typedef SSIZE_T ssize_t; +#endif + + /* Local functions */ local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *)); local int gz_avail OF((gz_statep));