diff --git a/CHANGELOG b/CHANGELOG index ae54896a..44600267 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,8 +1,32 @@ +v1.4.4 +perf: Improved decompression speed, by > 10%, by @terrelln +perf: Better compression speed when re-using a context, by @felixhandte +perf: Fix compression ratio when compressing large files with small dictionary, by @senhuang42 +perf: zstd reference encoder can generate RLE blocks, by @bimbashrestha +perf: minor generic speed optimization, by @davidbolvansky +api: new ability to extract sequences from the parser for analysis, by @bimbashrestha +api: fixed decoding of magic-less frames, by @terrelln +api: fixed ZSTD_initCStream_advanced() performance with fast modes, reported by @QrczakMK +cli: Named pipes support, by @bimbashrestha +cli: short tar's extension support, by @stokito +cli: command --output-dir-flat= , generates target files into requested directory, by @senhuang42 +cli: commands --stream-size=# and --size-hint=#, by @nmagerko +cli: faster `-t` test mode +cli: improved some error messages, by @vangyzen +cli: rare deadlock condition within dictionary builder, by @terrelln +build: single-file decoder with emscripten compilation script, by @cwoffenden +build: fixed zlibWrapper compilation on Visual Studio, reported by @bluenlive +build: fixed deprecation warning for certain gcc version, reported by @jasonma163 +build: fix compilation on old gcc versions, by @cemeyer +build: improved installation directories for cmake script, by Dmitri Shubin +pack: modified pkgconfig, for better integration into openwrt, requested by @neheb +misc: Improved documentation : ZSTD_CLEVEL, DYNAMIC_BMI2, ZSTD_CDict, function deprecation, zstd format +misc: fixed educational decoder : accept larger literals section, and removed UNALIGNED() macro + v1.4.3 bug: Fix Dictionary Compression Ratio Regression by @cyan4973 (#1709) -bug: Fix Buffer Overflow in v0.3 Decompression by @felixhandte (#1722) +bug: Fix Buffer Overflow in legacy v0.3 decompression by @felixhandte (#1722) build: Add support for IAR C/C++ Compiler for Arm by @joseph0918 (#1705) -misc: Add NULL pointer check in util.c by @leeyoung624 (#1706) v1.4.2 bug: Fix bug in zstd-0.5 decoder by @terrelln (#1696) diff --git a/build/cmake/README.md b/build/cmake/README.md index 854389ad..73b30dc7 100644 --- a/build/cmake/README.md +++ b/build/cmake/README.md @@ -5,9 +5,9 @@ use case sensitivity that matches modern (ie. cmake version 2.6 and above) conventions of using lower-case for commands, and upper-case for variables. -# How to build +## How to build -As cmake doesn't support command like `cmake clean`, it's recommanded to perform a "out of source build". +As cmake doesn't support command like `cmake clean`, it's recommended to perform a "out of source build". To do this, you can create a new directory and build in it: ```sh cd build/cmake @@ -16,7 +16,7 @@ cd builddir cmake .. make ``` -Then you can clean all cmake caches by simpily delete the new directory: +Then you can clean all cmake caches by simply delete the new directory: ```sh rm -rf build/cmake/builddir ``` @@ -34,19 +34,19 @@ cd build/cmake/builddir cmake -LH .. ``` -Bool options can be set to ON/OFF with -D\[option\]=\[ON/OFF\]. You can configure cmake options like this: +Bool options can be set to `ON/OFF` with `-D[option]=[ON/OFF]`. You can configure cmake options like this: ```sh cd build/cmake/builddir cmake -DZSTD_BUILD_TESTS=ON -DZSTD_LEGACY_SUPPORT=ON .. make ``` -## referring +### referring [Looking for a 'cmake clean' command to clear up CMake output](https://stackoverflow.com/questions/9680420/looking-for-a-cmake-clean-command-to-clear-up-cmake-output) -# CMake Style Recommendations +## CMake Style Recommendations -## Indent all code correctly, i.e. the body of +### Indent all code correctly, i.e. the body of * if/else/endif * foreach/endforeach @@ -57,7 +57,7 @@ make Use spaces for indenting, 2, 3 or 4 spaces preferably. Use the same amount of spaces for indenting as is used in the rest of the file. Do not use tabs. -## Upper/lower casing +### Upper/lower casing Most important: use consistent upper- or lowercasing within one file ! @@ -77,7 +77,7 @@ Add_Executable(hello hello.c) aDd_ExEcUtAbLe(blub blub.c) ``` -## End commands +### End commands To make the code easier to read, use empty commands for endforeach(), endif(), endfunction(), endmacro() and endwhile(). Also, use empty else() commands. @@ -99,6 +99,6 @@ if(BARVAR) endif(BARVAR) ``` -## Other resources for best practices +### Other resources for best practices -`https://cmake.org/cmake/help/latest/manual/cmake-developer.7.html#modules` +https://cmake.org/cmake/help/latest/manual/cmake-developer.7.html#modules diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index 77b389ca..7adca875 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -134,11 +134,10 @@ if (UNIX) # pkg-config set(PREFIX "${CMAKE_INSTALL_PREFIX}") set(LIBDIR "${CMAKE_INSTALL_FULL_LIBDIR}") - set(INCLUDEDIR "${CMAKE_INSTALL_FULL_INCLUDEDIR}") set(VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}") add_custom_target(libzstd.pc ALL ${CMAKE_COMMAND} -DIN="${LIBRARY_DIR}/libzstd.pc.in" -DOUT="libzstd.pc" - -DPREFIX="${PREFIX}" -DLIBDIR="${LIBDIR}" -DINCLUDEDIR="${INCLUDEDIR}" -DVERSION="${VERSION}" + -DPREFIX="${PREFIX}" -DVERSION="${VERSION}" -P "${CMAKE_CURRENT_SOURCE_DIR}/pkgconfig.cmake" COMMENT "Creating pkg-config file") diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 21ba000c..7fa1a8d1 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -692,12 +692,17 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
                              int compressionLevel);
-

When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once. - ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost. +

When compressing multiple messages or blocks using the same dictionary, + it's recommended to digest the dictionary only once, since it's a costly operation. + ZSTD_createCDict() will create a state from digesting a dictionary. + The resulting state can be used for future compression operations with very limited startup cost. ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. - `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict. - Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content. - Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. + @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + in which case the only thing that it transports is the @compressionLevel. + This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + expecting a ZSTD_CDict parameter with any data, including those without a known dictionary.


size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
@@ -969,6 +974,12 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
      *   This method is effective when the dictionary sizes are very small relative
      *   to the input size, and the input size is fairly large to begin with.
      *
+     * - The CDict's tables are not used at all, and instead we use the working
+     *   context alone to reload the dictionary and use params based on the source
+     *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
+     *   This method is effective when the dictionary sizes are very small relative
+     *   to the input size, and the input size is fairly large to begin with.
+     *
      * Zstd has a simple internal heuristic that selects which strategy to use
      * at the beginning of a compression. However, if experimentation shows that
      * Zstd is making poor choices, it is possible to override that choice with
diff --git a/lib/Makefile b/lib/Makefile
index 87a396c5..273ceb90 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -244,8 +244,6 @@ libzstd.pc:
 libzstd.pc: libzstd.pc.in
 	@echo creating pkgconfig
 	@sed -e 's|@PREFIX@|$(PREFIX)|' \
-             -e 's|@LIBDIR@|$(LIBDIR)|' \
-             -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
              -e 's|@VERSION@|$(VERSION)|' \
              $< >$@
 
diff --git a/lib/libzstd.pc.in b/lib/libzstd.pc.in
index 1d07b91f..e7880be4 100644
--- a/lib/libzstd.pc.in
+++ b/lib/libzstd.pc.in
@@ -3,8 +3,9 @@
 #   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
 prefix=@PREFIX@
-libdir=@LIBDIR@
-includedir=@INCLUDEDIR@
+exec_prefix=${prefix}
+includedir=${prefix}/include
+libdir=${exec_prefix}/lib
 
 Name: zstd
 Description: fast lossless compression algorithm library
diff --git a/lib/zstd.h b/lib/zstd.h
index 24d23ff8..72080ea8 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -808,12 +808,17 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
 typedef struct ZSTD_CDict_s ZSTD_CDict;
 
 /*! ZSTD_createCDict() :
- *  When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once.
- *  ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost.
+ *  When compressing multiple messages or blocks using the same dictionary,
+ *  it's recommended to digest the dictionary only once, since it's a costly operation.
+ *  ZSTD_createCDict() will create a state from digesting a dictionary.
+ *  The resulting state can be used for future compression operations with very limited startup cost.
  *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
- * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
- *  Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
- *  Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */
+ * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+ *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+ *      in which case the only thing that it transports is the @compressionLevel.
+ *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+ *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
                                          int compressionLevel);
 
@@ -1167,7 +1172,7 @@ typedef enum {
      *   tables. However, this model incurs no start-up cost (as long as the
      *   working context's tables can be reused). For small inputs, this can be
      *   faster than copying the CDict's tables.
-     * 
+     *
      * - The CDict's tables are not used at all, and instead we use the working
      *   context alone to reload the dictionary and use params based on the source
      *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
diff --git a/programs/fileio.c b/programs/fileio.c
index 0365f144..828878c6 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -1496,17 +1496,17 @@ FIO_determineCompressedName(const char* srcFileName, const char* outDirName, con
     static char* dstFileNameBuffer = NULL;   /* using static allocation : this function cannot be multi-threaded */
     char* outDirFilename = NULL;
     size_t sfnSize = strlen(srcFileName);
-    size_t const suffixSize = strlen(suffix);
+    size_t const srcSuffixLen = strlen(suffix);
     if (outDirName) {
-        outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, suffixSize);
+        outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen);
         sfnSize = strlen(outDirFilename);
         assert(outDirFilename != NULL);
     }
 
-    if (dfnbCapacity <= sfnSize+suffixSize+1) {
+    if (dfnbCapacity <= sfnSize+srcSuffixLen+1) {
         /* resize buffer for dstName */
         free(dstFileNameBuffer);
-        dfnbCapacity = sfnSize + suffixSize + 30;
+        dfnbCapacity = sfnSize + srcSuffixLen + 30;
         dstFileNameBuffer = (char*)malloc(dfnbCapacity);
         if (!dstFileNameBuffer) {
             EXM_THROW(30, "zstd: %s", strerror(errno));
@@ -1520,7 +1520,7 @@ FIO_determineCompressedName(const char* srcFileName, const char* outDirName, con
     } else {
         memcpy(dstFileNameBuffer, srcFileName, sfnSize);
     }
-    memcpy(dstFileNameBuffer+sfnSize, suffix, suffixSize+1 /* Include terminating null */);
+    memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */);
     return dstFileNameBuffer;
 }
 
@@ -2287,6 +2287,37 @@ int FIO_decompressFilename(FIO_prefs_t* const prefs,
     return decodingError;
 }
 
+static const char *suffixList[] = {
+    ZSTD_EXTENSION,
+    TZSTD_EXTENSION,
+#ifdef ZSTD_GZDECOMPRESS
+    GZ_EXTENSION,
+    TGZ_EXTENSION,
+#endif
+#ifdef ZSTD_LZMADECOMPRESS
+    LZMA_EXTENSION,
+    XZ_EXTENSION,
+    TXZ_EXTENSION,
+#endif
+#ifdef ZSTD_LZ4DECOMPRESS
+    LZ4_EXTENSION,
+    TLZ4_EXTENSION,
+#endif
+    NULL
+};
+
+static const char *suffixListStr =
+    ZSTD_EXTENSION "/" TZSTD_EXTENSION
+#ifdef ZSTD_GZDECOMPRESS
+    "/" GZ_EXTENSION "/" TGZ_EXTENSION
+#endif
+#ifdef ZSTD_LZMADECOMPRESS
+    "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION
+#endif
+#ifdef ZSTD_LZ4DECOMPRESS
+    "/" LZ4_EXTENSION "/" TLZ4_EXTENSION
+#endif
+;
 
 /* FIO_determineDstName() :
  * create a destination filename from a srcFileName.
@@ -2297,71 +2328,78 @@ FIO_determineDstName(const char* srcFileName, const char* outDirName)
 {
     static size_t dfnbCapacity = 0;
     static char* dstFileNameBuffer = NULL;   /* using static allocation : this function cannot be multi-threaded */
+    size_t dstFileNameEndPos;
     char* outDirFilename = NULL;
+    const char* dstSuffix = "";
+    size_t dstSuffixLen = 0;
+
     size_t sfnSize = strlen(srcFileName);
-    size_t suffixSize;
 
-    const char* const suffixPtr = strrchr(srcFileName, '.');
-    if (suffixPtr == NULL) {
-        DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n",
-                        srcFileName);
+    size_t srcSuffixLen;
+    const char* const srcSuffix = strrchr(srcFileName, '.');
+    if (srcSuffix == NULL) {
+        DISPLAYLEVEL(1,
+            "zstd: %s: unknown suffix (%s expected). "
+            "Can't derive the output file name. "
+            "Specify it with -o dstFileName. Ignoring.\n",
+            srcFileName, suffixListStr);
         return NULL;
     }
-    suffixSize = strlen(suffixPtr);
+    srcSuffixLen = strlen(srcSuffix);
 
-    /* check suffix is authorized */
-    if (sfnSize <= suffixSize
-        || (   strcmp(suffixPtr, ZSTD_EXTENSION)
-        #ifdef ZSTD_GZDECOMPRESS
-            && strcmp(suffixPtr, GZ_EXTENSION)
-        #endif
-        #ifdef ZSTD_LZMADECOMPRESS
-            && strcmp(suffixPtr, XZ_EXTENSION)
-            && strcmp(suffixPtr, LZMA_EXTENSION)
-        #endif
-        #ifdef ZSTD_LZ4DECOMPRESS
-            && strcmp(suffixPtr, LZ4_EXTENSION)
-        #endif
-            ) ) {
-        const char* suffixlist = ZSTD_EXTENSION
-        #ifdef ZSTD_GZDECOMPRESS
-            "/" GZ_EXTENSION
-        #endif
-        #ifdef ZSTD_LZMADECOMPRESS
-            "/" XZ_EXTENSION "/" LZMA_EXTENSION
-        #endif
-        #ifdef ZSTD_LZ4DECOMPRESS
-            "/" LZ4_EXTENSION
-        #endif
-        ;
-        DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s expected) -- ignored \n",
-                     srcFileName, suffixlist);
-        return NULL;
+    {
+        const char** matchedSuffixPtr;
+        for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) {
+            if (!strcmp(*matchedSuffixPtr, srcSuffix)) {
+                break;
+            }
+        }
+
+        /* check suffix is authorized */
+        if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) {
+            DISPLAYLEVEL(1,
+                "zstd: %s: unknown suffix (%s expected). "
+                "Can't derive the output file name. "
+                "Specify it with -o dstFileName. Ignoring.\n",
+                srcFileName, suffixListStr);
+            return NULL;
+        }
+
+        if ((*matchedSuffixPtr)[1] == 't') {
+            dstSuffix = ".tar";
+            dstSuffixLen = strlen(dstSuffix);
+        }
     }
+
     if (outDirName) {
         outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0);
         sfnSize = strlen(outDirFilename);
         assert(outDirFilename != NULL);
     }
 
-    if (dfnbCapacity+suffixSize <= sfnSize+1) {
+    if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) {
         /* allocate enough space to write dstFilename into it */
         free(dstFileNameBuffer);
         dfnbCapacity = sfnSize + 20;
         dstFileNameBuffer = (char*)malloc(dfnbCapacity);
         if (dstFileNameBuffer==NULL)
-            EXM_THROW(74, "%s : not enough memory for dstFileName", strerror(errno));
+            EXM_THROW(74, "%s : not enough memory for dstFileName",
+                      strerror(errno));
     }
 
     /* return dst name == src name truncated from suffix */
     assert(dstFileNameBuffer != NULL);
+    dstFileNameEndPos = sfnSize - srcSuffixLen;
     if (outDirFilename) {
-        memcpy(dstFileNameBuffer, outDirFilename, sfnSize - suffixSize);
+        memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos);
         free(outDirFilename);
     } else {
-        memcpy(dstFileNameBuffer, srcFileName, sfnSize - suffixSize);
+        memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos);
     }
-    dstFileNameBuffer[sfnSize-suffixSize] = '\0';
+
+    /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar"
+     * extension on decompression. Also writes terminating null. */
+    strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix);
     return dstFileNameBuffer;
 
     /* note : dstFileNameBuffer memory is not going to be free */
diff --git a/programs/fileio.h b/programs/fileio.h
index 4b0143be..af2c5d9d 100644
--- a/programs/fileio.h
+++ b/programs/fileio.h
@@ -30,11 +30,23 @@ extern "C" {
 #else
 #  define nulmark "/dev/null"
 #endif
+
+/**
+ * We test whether the extension we found starts with 't', and if so, we append
+ * ".tar" to the end of the output name.
+ */
 #define LZMA_EXTENSION  ".lzma"
 #define XZ_EXTENSION    ".xz"
+#define TXZ_EXTENSION   ".txz"
+
 #define GZ_EXTENSION    ".gz"
+#define TGZ_EXTENSION   ".tgz"
+
 #define ZSTD_EXTENSION  ".zst"
+#define TZSTD_EXTENSION ".tzst"
+
 #define LZ4_EXTENSION   ".lz4"
+#define TLZ4_EXTENSION  ".tlz4"
 
 
 /*-*************************************
diff --git a/tests/playTests.sh b/tests/playTests.sh
index 8adf97df..cd4d5045 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -896,6 +896,46 @@ if [ $LZ4MODE -ne 1 ]; then
     grep ".lz4" tmplg > $INTOVOID && die "Unsupported suffix listed"
 fi
 
+println "\n===>  tar extension tests "
+
+rm -f tmp tmp.tar tmp.tzst tmp.tgz tmp.txz tmp.tlz4
+
+./datagen > tmp
+tar cf tmp.tar tmp
+$ZSTD tmp.tar -o tmp.tzst
+rm tmp.tar
+$ZSTD -d tmp.tzst
+[ -e tmp.tar ] || die ".tzst failed to decompress to .tar!"
+rm -f tmp.tar tmp.tzst
+
+if [ $GZIPMODE -eq 1 ]; then
+    tar czf tmp.tgz tmp
+    $ZSTD -d tmp.tgz
+    [ -e tmp.tar ] || die ".tgz failed to decompress to .tar!"
+    rm -f tmp.tar tmp.tgz
+fi
+
+if [ $LZMAMODE -eq 1 ]; then
+    tar c tmp | $ZSTD --format=xz > tmp.txz
+    $ZSTD -d tmp.txz
+    [ -e tmp.tar ] || die ".txz failed to decompress to .tar!"
+    rm -f tmp.tar tmp.txz
+fi
+
+if [ $LZ4MODE -eq 1 ]; then
+    tar c tmp | $ZSTD --format=lz4 > tmp.tlz4
+    $ZSTD -d tmp.tlz4
+    [ -e tmp.tar ] || die ".tlz4 failed to decompress to .tar!"
+    rm -f tmp.tar tmp.tlz4
+fi
+
+touch tmp.t tmp.tz tmp.tzs
+! $ZSTD -d tmp.t
+! $ZSTD -d tmp.tz
+! $ZSTD -d tmp.tzs
+
+exit
+
 println "\n===>  zstd round-trip tests "
 
 roundTripTest
diff --git a/zlibWrapper/gzread.c b/zlibWrapper/gzread.c
index bcac9700..359d1788 100644
--- a/zlibWrapper/gzread.c
+++ b/zlibWrapper/gzread.c
@@ -8,6 +8,14 @@
 
 #include "gzguts.h"
 
+/* fix for Visual Studio, which doesn't support ssize_t type.
+ * see https://github.com/facebook/zstd/issues/1800#issuecomment-545945050 */
+#if defined(_MSC_VER) && !defined(ssize_t)
+#  include 
+   typedef SSIZE_T ssize_t;
+#endif
+
+
 /* Local functions */
 local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
 local int gz_avail OF((gz_statep));