diff --git a/NEWS b/NEWS index 5e927181..1b132ca9 100644 --- a/NEWS +++ b/NEWS @@ -1,7 +1,9 @@ v1.1.3 -cli : new : commands for advanced parameters, by Przemyslaw Skibinski +cli : new : advanced commands for detailed parameters, by Przemyslaw Skibinski +cli : fix zstdless on Mac OS-X, by Andrew Janke API : fix : all symbols properly exposed in libzstd, by Nick Terrell API : new : ZSTD_create?Dict_byReference(), requested by Bartosz Taudul +API : new : ZDICT_finalizeDictionary() v1.1.2 API : streaming : decompression : changed : automatic implicit reset when chain-decoding new frames without init diff --git a/contrib/pzstd/Makefile b/contrib/pzstd/Makefile index 99d955e9..f148bfd8 100644 --- a/contrib/pzstd/Makefile +++ b/contrib/pzstd/Makefile @@ -26,7 +26,7 @@ POSTCOMPILE = mv -f $*.Td $*.d # CFLAGS, CXXFLAGS, CPPFLAGS, and LDFLAGS are for the users to override CFLAGS ?= -O3 -Wall -Wextra -CXXFLAGS ?= -O3 -Wall -Wextra -pedantic -std=c++11 +CXXFLAGS ?= -O3 -Wall -Wextra -pedantic CPPFLAGS ?= LDFLAGS ?= @@ -37,7 +37,7 @@ GTEST_INC = -isystem googletest/googletest/include PZSTD_CPPFLAGS = $(PZSTD_INC) PZSTD_CCXXFLAGS = PZSTD_CFLAGS = $(PZSTD_CCXXFLAGS) -PZSTD_CXXFLAGS = $(PZSTD_CCXXFLAGS) +PZSTD_CXXFLAGS = $(PZSTD_CCXXFLAGS) -std=c++11 PZSTD_LDFLAGS = EXTRA_FLAGS = ALL_CFLAGS = $(EXTRA_FLAGS) $(CPPFLAGS) $(PZSTD_CPPFLAGS) $(CFLAGS) $(PZSTD_CFLAGS) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index afac869c..7626b33a 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -51,8 +51,7 @@ static void ZSTD_resetSeqStore(seqStore_t* ssPtr) /*-************************************* * Context memory management ***************************************/ -struct ZSTD_CCtx_s -{ +struct ZSTD_CCtx_s { const BYTE* nextSrc; /* next block here to continue on current prefix */ const BYTE* base; /* All regular indexes relative to this position */ const BYTE* dictBase; /* extDict indexes relative to this position */ @@ -2742,7 +2741,7 @@ struct ZSTD_CDict_s { size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) { if (cdict==NULL) return 0; /* support sizeof on NULL */ - return ZSTD_sizeof_CCtx(cdict->refContext) + cdict->dictContentSize; + return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict); } ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, unsigned byReference, diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index e976cd26..02f3bf45 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1792,7 +1792,7 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict) size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) { if (ddict==NULL) return 0; /* support sizeof on NULL */ - return sizeof(*ddict) + sizeof(ddict->refContext) + ddict->dictSize; + return sizeof(*ddict) + ZSTD_sizeof_DCtx(ddict->refContext) + (ddict->dictBuffer ? ddict->dictSize : 0) ; } /*! ZSTD_getDictID_fromDict() : diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index ac22e870..0757dbbb 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -60,7 +60,7 @@ #define NOISELENGTH 32 #define MINRATIO 4 -static const int g_compressionLevel_default = 5; +static const int g_compressionLevel_default = 6; static const U32 g_selectivity_default = 9; static const size_t g_provision_entropySize = 200; static const size_t g_min_fast_dictContent = 192; @@ -824,6 +824,55 @@ _cleanup: } + +size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, + const void* customDictContent, size_t dictContentSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t params) +{ + size_t hSize; +#define HBUFFSIZE 256 + BYTE header[HBUFFSIZE]; + int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel; + U32 const notificationLevel = params.notificationLevel; + + /* check conditions */ + if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall); + if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong); + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall); + + /* dictionary header */ + MEM_writeLE32(header, ZSTD_DICT_MAGIC); + { U64 const randomID = XXH64(customDictContent, dictContentSize, 0); + U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768; + U32 const dictID = params.dictID ? params.dictID : compliantID; + MEM_writeLE32(header+4, dictID); + } + hSize = 8; + + /* entropy tables */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ + DISPLAYLEVEL(2, "statistics ... \n"); + { size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize, + compressionLevel, + samplesBuffer, samplesSizes, nbSamples, + customDictContent, dictContentSize, + notificationLevel); + if (ZDICT_isError(eSize)) return eSize; + hSize += eSize; + } + + /* copy elements in final buffer ; note : src and dst buffer can overlap */ + if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize; + { size_t const dictSize = hSize + dictContentSize; + char* dictEnd = (char*)dictBuffer + dictSize; + memmove(dictEnd - dictContentSize, customDictContent, dictContentSize); + memcpy(dictBuffer, header, hSize); + return dictSize; + } +} + + size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_params_t params) diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h index d6cf1839..63b8f072 100644 --- a/lib/dictBuilder/zdict.h +++ b/lib/dictBuilder/zdict.h @@ -87,22 +87,57 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dict ZDICT_params_t parameters); -/*! ZDICT_addEntropyTablesFromBuffer() : +/*! ZDICT_finalizeDictionary() : + + Given a custom content as a basis for dictionary, and a set of samples, + finalize dictionary by adding headers and statistics. - Given a content-only dictionary (built using any 3rd party algorithm), - add entropy tables computed from an array of samples. Samples must be stored concatenated in a flat buffer `samplesBuffer`, supplied with an array of sizes `samplesSizes`, providing the size of each sample in order. - The input dictionary content must be stored *at the end* of `dictBuffer`. - Its size is `dictContentSize`. - The resulting dictionary with added entropy tables will be *written back to `dictBuffer`*, - starting from its beginning. - @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`). -*/ -ZDICTLIB_API size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); + dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes. + maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes. + @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`), + or an error code, which can be tested by ZDICT_isError(). + note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0. + note 2 : dictBuffer and customDictContent can overlap +*/ +#define ZDICT_CONTENTSIZE_MIN 256 +#define ZDICT_DICTSIZE_MIN 512 +ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, + const void* customDictContent, size_t dictContentSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t parameters); + + + +/* Deprecation warnings */ +/* It is generally possible to disable deprecation warnings from compiler, + for example with -Wno-deprecated-declarations for gcc + or _CRT_SECURE_NO_WARNINGS in Visual. + Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */ +#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS +# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */ +#else +# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define ZDICT_DEPRECATED(message) ZDICTLIB_API [[deprecated(message)]] +# elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message))) +# elif (ZDICT_GCC_VERSION >= 301) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated)) +# elif defined(_MSC_VER) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message)) +# else +# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler") +# define ZDICT_DEPRECATED(message) ZDICTLIB_API +# endif +#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */ + +ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead") +size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); #endif /* ZDICT_STATIC_LINKING_ONLY */ diff --git a/programs/zstdless b/programs/zstdless index ab021405..893799e7 100755 --- a/programs/zstdless +++ b/programs/zstdless @@ -1 +1,2 @@ -zstdcat $@ | less +#!/bin/sh +zstdcat "$@" | less diff --git a/tests/symbols.c b/tests/symbols.c index 8d03df2f..e007148f 100644 --- a/tests/symbols.c +++ b/tests/symbols.c @@ -5,6 +5,7 @@ #define ZBUFF_DISABLE_DEPRECATE_WARNINGS #define ZBUFF_STATIC_LINKING_ONLY #include "zbuff.h" +#define ZDICT_DISABLE_DEPRECATE_WARNINGS #define ZDICT_STATIC_LINKING_ONLY #include "zdict.h"