From d76d1a9ef08e9baaec9beec40bd354b2b2e1e893 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 22 Dec 2016 20:18:43 +0100 Subject: [PATCH 1/6] added ZDICT_finalizeDictionary() --- NEWS | 3 ++- lib/dictBuilder/zdict.c | 49 +++++++++++++++++++++++++++++++++++ lib/dictBuilder/zdict.h | 57 +++++++++++++++++++++++++++++++++-------- 3 files changed, 97 insertions(+), 12 deletions(-) diff --git a/NEWS b/NEWS index 5e927181..6d5dffc6 100644 --- a/NEWS +++ b/NEWS @@ -1,7 +1,8 @@ v1.1.3 -cli : new : commands for advanced parameters, by Przemyslaw Skibinski +cli : new : advanced commands for detailed parameters, by Przemyslaw Skibinski API : fix : all symbols properly exposed in libzstd, by Nick Terrell API : new : ZSTD_create?Dict_byReference(), requested by Bartosz Taudul +API : new : ZDICT_finalizeDictionary() v1.1.2 API : streaming : decompression : changed : automatic implicit reset when chain-decoding new frames without init diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index ac22e870..c3b227d7 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -824,6 +824,55 @@ _cleanup: } + +size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, + const void* customDictContent, size_t dictContentSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t params) +{ + size_t hSize; +#define HBUFFSIZE 256 + BYTE header[HBUFFSIZE]; + int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel; + U32 const notificationLevel = params.notificationLevel; + + /* check conditions */ + if (dictBufferCapacity <= dictContentSize) return ERROR(dstSize_tooSmall); + if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong); + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall); + + /* dictionary header */ + MEM_writeLE32(header, ZSTD_DICT_MAGIC); + { U64 const randomID = XXH64(customDictContent, dictContentSize, 0); + U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768; + U32 const dictID = params.dictID ? params.dictID : compliantID; + MEM_writeLE32(header+4, dictID); + } + hSize = 8; + + /* entropy tables */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ + DISPLAYLEVEL(2, "statistics ... \n"); + { size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize, + compressionLevel, + samplesBuffer, samplesSizes, nbSamples, + customDictContent, dictContentSize, + notificationLevel); + if (ZDICT_isError(eSize)) return eSize; + hSize += eSize; + } + + /* copy elements in final buffer ; note : src and dst buffer can overlap */ + if (hSize + dictContentSize < dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize; + { size_t const dictSize = hSize + dictContentSize; + char* dictEnd = (char*)dictBuffer + dictSize; + memmove(dictEnd - dictContentSize, customDictContent, dictContentSize); + memcpy(dictBuffer, header, hSize); + return dictSize; + } +} + + size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_params_t params) diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h index d6cf1839..8dabfd5e 100644 --- a/lib/dictBuilder/zdict.h +++ b/lib/dictBuilder/zdict.h @@ -87,22 +87,57 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dict ZDICT_params_t parameters); -/*! ZDICT_addEntropyTablesFromBuffer() : +/*! ZDICT_finalizeDictionary() : + + Given a custom content as a basis for dictionary, and a set of samples, + finalize dictionary by adding headers and statistics. - Given a content-only dictionary (built using any 3rd party algorithm), - add entropy tables computed from an array of samples. Samples must be stored concatenated in a flat buffer `samplesBuffer`, supplied with an array of sizes `samplesSizes`, providing the size of each sample in order. - The input dictionary content must be stored *at the end* of `dictBuffer`. - Its size is `dictContentSize`. - The resulting dictionary with added entropy tables will be *written back to `dictBuffer`*, - starting from its beginning. - @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`). -*/ -ZDICTLIB_API size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); + dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes. + maxDictSize must be > dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes. + @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`), + or an error code, which can be tested by ZDICT_isError(). + note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0. + note 2 : dictBuffer and customDictContent can overlap +*/ +#define ZDICT_CONTENTSIZE_MIN 256 +#define ZDICT_DICTSIZE_MIN 512 +ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, + const void* customDictContent, size_t dictContentSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t parameters); + + + +/* Deprecation warnings */ +/* It is generally possible to disable deprecation warnings from compiler, + for example with -Wno-deprecated-declarations for gcc + or _CRT_SECURE_NO_WARNINGS in Visual. + Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */ +#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS +# define ZDICT_DEPRECATED(message) /* disable deprecation warnings */ +#else +# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define ZDICT_DEPRECATED(message) [[deprecated(message)]] +# elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__) +# define ZDICT_DEPRECATED(message) __attribute__((deprecated(message))) +# elif (ZDICT_GCC_VERSION >= 301) +# define ZDICT_DEPRECATED(message) __attribute__((deprecated)) +# elif defined(_MSC_VER) +# define ZDICT_DEPRECATED(message) __declspec(deprecated(message)) +# else +# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler") +# define ZDICT_DEPRECATED(message) +# endif +#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */ + +ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead") +size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); #endif /* ZDICT_STATIC_LINKING_ONLY */ From fce374a10015a88ac7fbbad647aa3fdd747d5558 Mon Sep 17 00:00:00 2001 From: Andrew Janke Date: Thu, 22 Dec 2016 17:40:10 -0500 Subject: [PATCH 2/6] zstdless: add shebang and quote $@ --- programs/zstdless | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/programs/zstdless b/programs/zstdless index ab021405..893799e7 100755 --- a/programs/zstdless +++ b/programs/zstdless @@ -1 +1,2 @@ -zstdcat $@ | less +#!/bin/sh +zstdcat "$@" | less From 78a0072d5aae129c6f09d9fa31442c30bdefd5d9 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 22 Dec 2016 17:28:49 -0800 Subject: [PATCH 3/6] Fix failing test due to deprecation warning --- lib/dictBuilder/zdict.h | 12 ++++++------ tests/symbols.c | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h index 8dabfd5e..c7a0f575 100644 --- a/lib/dictBuilder/zdict.h +++ b/lib/dictBuilder/zdict.h @@ -118,20 +118,20 @@ ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBuffer or _CRT_SECURE_NO_WARNINGS in Visual. Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */ #ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS -# define ZDICT_DEPRECATED(message) /* disable deprecation warnings */ +# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */ #else # define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ -# define ZDICT_DEPRECATED(message) [[deprecated(message)]] +# define ZDICT_DEPRECATED(message) ZDICTLIB_API [[deprecated(message)]] # elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__) -# define ZDICT_DEPRECATED(message) __attribute__((deprecated(message))) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message))) # elif (ZDICT_GCC_VERSION >= 301) -# define ZDICT_DEPRECATED(message) __attribute__((deprecated)) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated)) # elif defined(_MSC_VER) -# define ZDICT_DEPRECATED(message) __declspec(deprecated(message)) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message)) # else # pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler") -# define ZDICT_DEPRECATED(message) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API # endif #endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */ diff --git a/tests/symbols.c b/tests/symbols.c index 8d03df2f..e007148f 100644 --- a/tests/symbols.c +++ b/tests/symbols.c @@ -5,6 +5,7 @@ #define ZBUFF_DISABLE_DEPRECATE_WARNINGS #define ZBUFF_STATIC_LINKING_ONLY #include "zbuff.h" +#define ZDICT_DISABLE_DEPRECATE_WARNINGS #define ZDICT_STATIC_LINKING_ONLY #include "zdict.h" From bcbe77e9944694f71c8a54d687dea297cd3f7465 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 22 Dec 2016 18:01:14 -0800 Subject: [PATCH 4/6] ZDICT_finalizeDictionary() flipped comparison `ZDICT_finalizeDictionary()` had a flipped comparison. I also allowed `dictBufferCapacity == dictContentSize`. It might be the case that the user wants to fill the dictionary completely up, and then let zstd take exactly the space it needs for the entropy tables. --- lib/dictBuilder/zdict.c | 4 ++-- lib/dictBuilder/zdict.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index c3b227d7..c5cf6f80 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -837,7 +837,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, U32 const notificationLevel = params.notificationLevel; /* check conditions */ - if (dictBufferCapacity <= dictContentSize) return ERROR(dstSize_tooSmall); + if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall); if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong); if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall); @@ -863,7 +863,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, } /* copy elements in final buffer ; note : src and dst buffer can overlap */ - if (hSize + dictContentSize < dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize; + if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize; { size_t const dictSize = hSize + dictContentSize; char* dictEnd = (char*)dictBuffer + dictSize; memmove(dictEnd - dictContentSize, customDictContent, dictContentSize); diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h index 8dabfd5e..0641e36f 100644 --- a/lib/dictBuilder/zdict.h +++ b/lib/dictBuilder/zdict.h @@ -96,7 +96,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dict supplied with an array of sizes `samplesSizes`, providing the size of each sample in order. dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes. - maxDictSize must be > dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes. + maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes. @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`), or an error code, which can be tested by ZDICT_isError(). From aca113f4f58791ace39e1d9e733f78ed60449eb7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 23 Dec 2016 22:25:03 +0100 Subject: [PATCH 5/6] fixed ZSTD_sizeof_?Dict() --- NEWS | 1 + lib/compress/zstd_compress.c | 5 ++--- lib/decompress/zstd_decompress.c | 2 +- lib/dictBuilder/zdict.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/NEWS b/NEWS index 6d5dffc6..1b132ca9 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,6 @@ v1.1.3 cli : new : advanced commands for detailed parameters, by Przemyslaw Skibinski +cli : fix zstdless on Mac OS-X, by Andrew Janke API : fix : all symbols properly exposed in libzstd, by Nick Terrell API : new : ZSTD_create?Dict_byReference(), requested by Bartosz Taudul API : new : ZDICT_finalizeDictionary() diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index afac869c..7626b33a 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -51,8 +51,7 @@ static void ZSTD_resetSeqStore(seqStore_t* ssPtr) /*-************************************* * Context memory management ***************************************/ -struct ZSTD_CCtx_s -{ +struct ZSTD_CCtx_s { const BYTE* nextSrc; /* next block here to continue on current prefix */ const BYTE* base; /* All regular indexes relative to this position */ const BYTE* dictBase; /* extDict indexes relative to this position */ @@ -2742,7 +2741,7 @@ struct ZSTD_CDict_s { size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) { if (cdict==NULL) return 0; /* support sizeof on NULL */ - return ZSTD_sizeof_CCtx(cdict->refContext) + cdict->dictContentSize; + return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict); } ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, unsigned byReference, diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index e976cd26..02f3bf45 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1792,7 +1792,7 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict) size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) { if (ddict==NULL) return 0; /* support sizeof on NULL */ - return sizeof(*ddict) + sizeof(ddict->refContext) + ddict->dictSize; + return sizeof(*ddict) + ZSTD_sizeof_DCtx(ddict->refContext) + (ddict->dictBuffer ? ddict->dictSize : 0) ; } /*! ZSTD_getDictID_fromDict() : diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index c5cf6f80..0757dbbb 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -60,7 +60,7 @@ #define NOISELENGTH 32 #define MINRATIO 4 -static const int g_compressionLevel_default = 5; +static const int g_compressionLevel_default = 6; static const U32 g_selectivity_default = 9; static const size_t g_provision_entropySize = 200; static const size_t g_min_fast_dictContent = 192; From 37a2fb4ce128f0faffbbddf9b392e20a1b1b7226 Mon Sep 17 00:00:00 2001 From: Chocobo1 Date: Mon, 26 Dec 2016 23:04:59 +0800 Subject: [PATCH 6/6] Move -std=c++11 cxxflag to PZSTD_CXXFLAGS Fixes the problem that the compiler doesn't enable c++11 mode by default and the package build system has its own CXXFLAGS --- contrib/pzstd/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/pzstd/Makefile b/contrib/pzstd/Makefile index 99d955e9..f148bfd8 100644 --- a/contrib/pzstd/Makefile +++ b/contrib/pzstd/Makefile @@ -26,7 +26,7 @@ POSTCOMPILE = mv -f $*.Td $*.d # CFLAGS, CXXFLAGS, CPPFLAGS, and LDFLAGS are for the users to override CFLAGS ?= -O3 -Wall -Wextra -CXXFLAGS ?= -O3 -Wall -Wextra -pedantic -std=c++11 +CXXFLAGS ?= -O3 -Wall -Wextra -pedantic CPPFLAGS ?= LDFLAGS ?= @@ -37,7 +37,7 @@ GTEST_INC = -isystem googletest/googletest/include PZSTD_CPPFLAGS = $(PZSTD_INC) PZSTD_CCXXFLAGS = PZSTD_CFLAGS = $(PZSTD_CCXXFLAGS) -PZSTD_CXXFLAGS = $(PZSTD_CCXXFLAGS) +PZSTD_CXXFLAGS = $(PZSTD_CCXXFLAGS) -std=c++11 PZSTD_LDFLAGS = EXTRA_FLAGS = ALL_CFLAGS = $(EXTRA_FLAGS) $(CPPFLAGS) $(PZSTD_CPPFLAGS) $(CFLAGS) $(PZSTD_CFLAGS)