From 2d76defbfeb0cb2a7e10723f1983c803deca8f0d Mon Sep 17 00:00:00 2001
From: Yann Collet Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx())
Introduction
@@ -56,7 +57,9 @@
unsigned ZSTD_versionNumber(void); /**< useful to check dll version */
-Simple API
+Default constant
+
+Simple API
size_t ZSTD_compress( void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
@@ -117,7 +120,7 @@ unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` fun
const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */
int ZSTD_maxCLevel(void); /*!< maximum compression level available */
-Explicit context
+Explicit context
Compression context
When compressing many times,
it is recommended to allocate a context just once, and re-use it for each successive compression operation.
@@ -149,7 +152,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
-Simple dictionary API
+Simple dictionary API
size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
void* dst, size_t dstCapacity,
@@ -171,7 +174,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
Note : When `dict == NULL || dictSize < 8` no dictionary is used.
ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel); @@ -212,7 +215,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times.
typedef struct ZSTD_inBuffer_s { const void* src; /**< start of input buffer */ @@ -226,7 +229,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ } ZSTD_outBuffer;
+Streaming compression - HowTo
A ZSTD_CStream object is required to track streaming operation. Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. ZSTD_CStream objects can be reused multiple times on consecutive compression operations. @@ -285,7 +288,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
-Streaming decompression - HowTo
+Streaming decompression - HowTo
A ZSTD_DStream object is required to track streaming operations. Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. ZSTD_DStream objects can be re-used multiple times. @@ -318,14 +321,14 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
-START OF ADVANCED AND EXPERIMENTAL FUNCTIONS
The definitions in this section are considered experimental. +START OF ADVANCED AND EXPERIMENTAL FUNCTIONS
The definitions in this section are considered experimental. They should never be used with a dynamic library, as prototypes may change in the future. They are provided for advanced scenarios. Use them only in association with static linking.-Advanced types
+Advanced types
typedef enum { ZSTD_fast=1, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt, ZSTD_btultra } ZSTD_strategy; /* from faster to stronger */ @@ -362,7 +365,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB ZSTD_dlm_byRef, /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ } ZSTD_dictLoadMethod_e;
-Frame size functions
+Frame size functions
size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);`src` should point to the start of a ZSTD encoded frame or skippable frame @@ -395,12 +398,12 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB however it does mean that all frame data must be present and valid.
-ZSTD_frameHeaderSize() :
srcSize must be >= ZSTD_frameHeaderSize_prefix. +ZSTD_frameHeaderSize() :
srcSize must be >= ZSTD_frameHeaderSize_prefix. @return : size of the Frame Header, or an error code (if srcSize is too small)-Memory management
+Memory management
size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); @@ -490,7 +493,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< t
-Advanced compression functions
+Advanced compression functions
ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);Create a digested dictionary for compression @@ -532,7 +535,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< t
Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters
-Advanced decompression functions
+Advanced decompression functions
unsigned ZSTD_isFrame(const void* buffer, size_t size);Tells if the content of `buffer` starts with a valid Frame Identifier. @@ -572,7 +575,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< t When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code.
-Advanced streaming functions
+Advanced streaming functions
Advanced Streaming compression functions
size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);/**< pledgedSrcSize must be correct. If it is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, "0" also disables frame content size field. It may be enabled in the future. */ size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< creates of an internal CDict (incompatible with static CCtx), except if dict == NULL or dictSize < 8, in which case no dict is used. Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.*/ @@ -604,14 +607,14 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict is referenced, it must outlive decompression session */ size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */
-Buffer-less and synchronous inner streaming functions
+Buffer-less and synchronous inner streaming functions
This is an advanced API, giving full control over buffer management, for users which need direct control over memory. But it's also a complex one, with several restrictions, documented below. Prefer normal streaming API for an easier experience.-Buffer-less streaming compression (synchronous mode)
+Buffer-less streaming compression (synchronous mode)
A ZSTD_CCtx object is required to track streaming operations. Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. ZSTD_CCtx object can be re-used multiple times within successive compression operations. @@ -647,7 +650,7 @@ size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
-Buffer-less streaming decompression (synchronous mode)
+Buffer-less streaming decompression (synchronous mode)
A ZSTD_DCtx object is required to track streaming operations. Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. A ZSTD_DCtx object can be re-used multiple times. @@ -738,7 +741,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
-New advanced API (experimental)
+New advanced API (experimental)
typedef enum { /* Opened question : should we have a format ZSTD_f_auto ? @@ -764,7 +767,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long /* compression parameters */ ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table * Default level is ZSTD_CLEVEL_DEFAULT==3. - * Special: value 0 means "do not change cLevel". + * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. * Note 1 : it's possible to pass a negative compression level by casting it to unsigned type. * Note 2 : setting a level sets all default values of other compression parameters. * Note 3 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */ @@ -1146,7 +1149,7 @@ size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx,
-ZSTD_getFrameHeader_advanced() :
same as ZSTD_getFrameHeader(), +ZSTD_getFrameHeader_advanced() :
same as ZSTD_getFrameHeader(), with added capability to select a format (like ZSTD_f_zstd1_magicless)@@ -1182,7 +1185,7 @@ size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx,
-Block level API
+Block level API
Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). User will have to take in charge required information to regenerate data, such as compressed and content sizes. diff --git a/lib/common/fse.h b/lib/common/fse.h index 8d703369..a5a6b6d4 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -72,6 +72,7 @@ extern "C" { #define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ + /*-**************************************** * FSE simple functions ******************************************/ @@ -129,7 +130,7 @@ FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, ******************************************/ /*! FSE_compress() does the following: -1. count symbol occurrence from source[] into table count[] +1. count symbol occurrence from source[] into table count[] (see hist.h) 2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) 3. save normalized counters to memory buffer using writeNCount() 4. build encoding table 'CTable' from normalized counters @@ -147,15 +148,6 @@ or to save and provide normalized distribution using external method. /* *** COMPRESSION *** */ -/*! FSE_count(): - Provides the precise count of each byte within a table 'count'. - 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). - *maxSymbolValuePtr will be updated if detected smaller than initial value. - @return : the count of the most frequent symbol (which is not identified). - if return == srcSize, there is only one symbol. - Can also return an error code, which can be tested with FSE_isError(). */ -FSE_PUBLIC_API size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); - /*! FSE_optimalTableLog(): dynamically downsize 'tableLog' when conditions are met. It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. @@ -167,7 +159,8 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). @return : tableLog, or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue); +FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t srcSize, unsigned maxSymbolValue); /*! FSE_NCountWriteBound(): Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. @@ -178,8 +171,9 @@ FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tab Compactly save 'normalizedCounter' into 'buffer'. @return : size of the compressed table, or an errorCode, which can be tested using FSE_isError(). */ -FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); - +FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, + unsigned maxSymbolValue, unsigned tableLog); /*! Constructor and Destructor of FSE_CTable. Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ @@ -250,7 +244,9 @@ If there is an error, the function will return an ErrorCode (which can be tested @return : size read from 'rBuffer', or an errorCode, which can be tested using FSE_isError(). maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ -FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); +FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize); /*! Constructor and Destructor of FSE_DTable. Note that its size depends on 'tableLog' */ @@ -325,33 +321,8 @@ If there is an error, the function will return an error code, which can be teste /* ***************************************** -* FSE advanced API -*******************************************/ -/* FSE_count_wksp() : - * Same as FSE_count(), but using an externally provided scratch buffer. - * `workSpace` size must be table of >= `1024` unsigned - */ -size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, unsigned* workSpace); - -/** FSE_countFast() : - * same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr - */ -size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); - -/* FSE_countFast_wksp() : - * Same as FSE_countFast(), but using an externally provided scratch buffer. - * `workSpace` must be a table of minimum `1024` unsigned - */ -size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace); - -/*! FSE_count_simple() : - * Same as FSE_countFast(), but does not use any additional memory (not even on stack). - * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`). -*/ -size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); - - + * FSE advanced API + ***************************************** */ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); /**< same as FSE_optimalTableLog(), which used `minus==2` */ diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index eeff3f2c..07b3ab89 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -1,6 +1,6 @@ /* ****************************************************************** FSE : Finite State Entropy encoder - Copyright (C) 2013-2015, Yann Collet. + Copyright (C) 2013-present, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -37,9 +37,11 @@ ****************************************************************/ #include
/* malloc, free, qsort */ #include /* memcpy, memset */ -#include /* printf (debug) */ -#include "bitstream.h" #include "compiler.h" +#include "mem.h" /* U32, U16, etc. */ +#include "debug.h" /* assert, DEBUGLOG */ +#include "hist.h" /* HIST_count_wksp */ +#include "bitstream.h" #define FSE_STATIC_LINKING_ONLY #include "fse.h" #include "error_private.h" @@ -49,7 +51,6 @@ * Error Management ****************************************************************/ #define FSE_isError ERR_isError -#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ /* ************************************************************** @@ -190,8 +191,9 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned #ifndef FSE_COMMONDEFS_ONLY + /*-************************************************************** -* FSE NCount encoding-decoding +* FSE NCount encoding ****************************************************************/ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) { @@ -299,159 +301,6 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized } - -/*-************************************************************** -* Counting histogram -****************************************************************/ -/*! FSE_count_simple - This function counts byte values within `src`, and store the histogram into table `count`. - It doesn't use any additional memory. - But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. - For this reason, prefer using a table `count` with 256 elements. - @return : count of most numerous element. -*/ -size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize) -{ - const BYTE* ip = (const BYTE*)src; - const BYTE* const end = ip + srcSize; - unsigned maxSymbolValue = *maxSymbolValuePtr; - unsigned max=0; - - memset(count, 0, (maxSymbolValue+1)*sizeof(*count)); - if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } - - while (ip max) max = count[s]; } - - return (size_t)max; -} - - -/* FSE_count_parallel_wksp() : - * Same as FSE_count_parallel(), but using an externally provided scratch buffer. - * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`. - * @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ -static size_t FSE_count_parallel_wksp( - unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, - unsigned checkMax, unsigned* const workSpace) -{ - const BYTE* ip = (const BYTE*)source; - const BYTE* const iend = ip+sourceSize; - unsigned maxSymbolValue = *maxSymbolValuePtr; - unsigned max=0; - U32* const Counting1 = workSpace; - U32* const Counting2 = Counting1 + 256; - U32* const Counting3 = Counting2 + 256; - U32* const Counting4 = Counting3 + 256; - - memset(workSpace, 0, 4*256*sizeof(unsigned)); - - /* safety checks */ - if (!sourceSize) { - memset(count, 0, maxSymbolValue + 1); - *maxSymbolValuePtr = 0; - return 0; - } - if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ - - /* by stripes of 16 bytes */ - { U32 cached = MEM_read32(ip); ip += 4; - while (ip < iend-15) { - U32 c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - } - ip-=4; - } - - /* finish last symbols */ - while (ip maxSymbolValue; s--) { - Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; - if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); - } } - - { U32 s; - if (maxSymbolValue > 255) maxSymbolValue = 255; - for (s=0; s<=maxSymbolValue; s++) { - count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; - if (count[s] > max) max = count[s]; - } } - - while (!count[maxSymbolValue]) maxSymbolValue--; - *maxSymbolValuePtr = maxSymbolValue; - return (size_t)max; -} - -/* FSE_countFast_wksp() : - * Same as FSE_countFast(), but using an externally provided scratch buffer. - * `workSpace` size must be table of >= `1024` unsigned */ -size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, - unsigned* workSpace) -{ - if (sourceSize < 1500) /* heuristic threshold */ - return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); - return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); -} - -/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ -size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize) -{ - unsigned tmpCounters[1024]; - return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters); -} - -/* FSE_count_wksp() : - * Same as FSE_count(), but using an externally provided scratch buffer. - * `workSpace` size must be table of >= `1024` unsigned */ -size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, unsigned* workSpace) -{ - if (*maxSymbolValuePtr < 255) - return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); - *maxSymbolValuePtr = 255; - return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); -} - -size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize) -{ - unsigned tmpCounters[1024]; - return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters); -} - - - /*-************************************************************** * FSE Compression Code ****************************************************************/ @@ -645,11 +494,11 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, U32 s; U32 nTotal = 0; for (s=0; s<=maxSymbolValue; s++) - printf("%3i: %4i \n", s, normalizedCounter[s]); + RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); for (s=0; s<=maxSymbolValue; s++) nTotal += abs(normalizedCounter[s]); if (nTotal != (1U< not compressible */ if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ @@ -851,7 +700,7 @@ typedef struct { size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) { fseWkspMax_t scratchBuffer; - FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ + DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); } diff --git a/lib/compress/hist.c b/lib/compress/hist.c new file mode 100644 index 00000000..da3b749d --- /dev/null +++ b/lib/compress/hist.c @@ -0,0 +1,200 @@ +/* ****************************************************************** + hist : Histogram functions + part of Finite State Entropy project + Copyright (C) 2013-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* --- dependencies --- */ +#include "mem.h" /* U32, BYTE, etc. */ +#include "debug.h" /* assert, DEBUGLOG */ +#include "error_private.h" /* ERROR */ +#include "hist.h" + + +/* --- Error management --- */ +unsigned HIST_isError(size_t code) { return ERR_isError(code); } + +/*-************************************************************** + * Histogram functions + ****************************************************************/ +/*! HIST_count_simple : + Counts byte values within `src`, storing histogram into table `count`. + Doesn't use any additional memory, very limited stack usage. + But unsafe : doesn't check that all values within `src` fit into `count`. + For this reason, prefer using a table `count` of size 256. + @return : count of most numerous element. + this function doesn't produce any error (i.e. it must succeed). +*/ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* const end = ip + srcSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned largestCount=0; + + memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); + if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } + + while (ip largestCount) largestCount = count[s]; + } + + return largestCount; +} + + +/* HIST_count_parallel_wksp() : + * Same as HIST_count_parallel(), but using an externally provided scratch buffer. + * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32. + * @return : largest histogram frequency, + * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ +static size_t HIST_count_parallel_wksp( + unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + unsigned checkMax, + unsigned* const workSpace) +{ + const BYTE* ip = (const BYTE*)source; + const BYTE* const iend = ip+sourceSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned max=0; + U32* const Counting1 = workSpace; + U32* const Counting2 = Counting1 + 256; + U32* const Counting3 = Counting2 + 256; + U32* const Counting4 = Counting3 + 256; + + memset(workSpace, 0, 4*256*sizeof(unsigned)); + + /* safety checks */ + if (!sourceSize) { + memset(count, 0, maxSymbolValue + 1); + *maxSymbolValuePtr = 0; + return 0; + } + if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ + + /* by stripes of 16 bytes */ + { U32 cached = MEM_read32(ip); ip += 4; + while (ip < iend-15) { + U32 c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + } + ip-=4; + } + + /* finish last symbols */ + while (ip maxSymbolValue; s--) { + Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; + if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); + } } + + { U32 s; + if (maxSymbolValue > 255) maxSymbolValue = 255; + for (s=0; s<=maxSymbolValue; s++) { + count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; + if (count[s] > max) max = count[s]; + } } + + while (!count[maxSymbolValue]) maxSymbolValue--; + *maxSymbolValuePtr = maxSymbolValue; + return (size_t)max; +} + +/* HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + unsigned* workSpace) +{ + if (sourceSize < 1500) /* heuristic threshold */ + return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); +} + +/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters); +} + +/* HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, unsigned* workSpace) +{ + if (*maxSymbolValuePtr < 255) + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); + *maxSymbolValuePtr = 255; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); +} + +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters); +} diff --git a/lib/compress/hist.h b/lib/compress/hist.h new file mode 100644 index 00000000..293b188b --- /dev/null +++ b/lib/compress/hist.h @@ -0,0 +1,90 @@ +/* ****************************************************************** + hist : Histogram functions + part of Finite State Entropy project + Copyright (C) 2013-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* --- dependencies --- */ +#include /* size_t */ + + +/* --- simple histogram functions --- */ + +/*! HIST_count(): + * Provides the precise count of each byte within a table 'count'. + * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). + * Updates *maxSymbolValuePtr with actual largest symbol value detected. + * @return : count of the most frequent symbol (which isn't identified). + * or an error code, which can be tested using HIST_isError(). + * note : if return == srcSize, there is only one symbol. + */ +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +unsigned HIST_isError(size_t code); /*< tells if a return value is an error code */ + + +/* --- advanced histogram functions --- */ + +#define HIST_WKSP_SIZE_U32 1024 +/** HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * Benefit is this function will use very little stack space. + * `workSpace` must be a table of unsigned of size >= HIST_WKSP_SIZE_U32 + */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + unsigned* workSpace); + +/** HIST_countFast() : + * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. + * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` + */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +/** HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` must be a table of unsigned of size >= HIST_WKSP_SIZE_U32 + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + unsigned* workSpace); + +/*! HIST_count_simple() : + * Same as HIST_countFast(), but does not use any additional memory (not even on stack). + * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`. + * It is also a bit slower for large inputs. + * This function doesn't produce any error (i.e. it must succeed). + */ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index b927efcc..2e531918 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -45,8 +45,9 @@ ****************************************************************/ #include /* memcpy, memset */ #include /* printf (debug) */ -#include "bitstream.h" #include "compiler.h" +#include "bitstream.h" +#include "hist.h" #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ #include "fse.h" /* header compression */ #define HUF_STATIC_LINKING_ONLY @@ -100,9 +101,9 @@ size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, if (wtSize <= 1) return 0; /* Not compressible */ /* Scan input and build symbol stats */ - { CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize) ); + { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */ if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ - if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ + if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ } tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); @@ -667,7 +668,7 @@ static size_t HUF_compress_internal ( } /* Scan input and build symbol stats */ - { CHECK_V_F(largest, FSE_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) ); + { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) ); if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ if (largest <= (srcSize >> 7)+1) return 0; /* heuristic : probably not compressible enough */ } diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index d79e9c74..9c4e54c2 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -14,6 +14,7 @@ #include /* memset */ #include "cpu.h" #include "mem.h" +#include "hist.h" /* HIST_countFast_wksp */ #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ #include "fse.h" #define HUF_STATIC_LINKING_ONLY @@ -2109,7 +2110,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, ZSTD_seqToCodes(seqStorePtr); /* build CTable for Literal Lengths */ { U32 max = MaxLL; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace); + size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace); /* can't fail */ DEBUGLOG(5, "Building LL table"); nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, count, max, mostFrequent, nbSeq, LLFSELog, prevEntropy->fse.litlengthCTable, LL_defaultNorm, LL_defaultNormLog, ZSTD_defaultAllowed, strategy); @@ -2126,7 +2127,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, } } /* build CTable for Offsets */ { U32 max = MaxOff; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace); + size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace); /* can't fail */ /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; DEBUGLOG(5, "Building OF table"); @@ -2144,7 +2145,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, } } /* build CTable for MatchLengths */ { U32 max = MaxML; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace); + size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace); /* can't fail */ DEBUGLOG(5, "Building ML table"); nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, count, max, mostFrequent, nbSeq, MLFSELog, prevEntropy->fse.matchlengthCTable, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy); diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 46a5395a..91ae4ae2 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -9,6 +9,7 @@ */ #include "zstd_compress_internal.h" +#include "hist.h" #include "zstd_opt.h" @@ -142,7 +143,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, assert(optPtr->litFreq != NULL); { unsigned lit = MaxLit; - FSE_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ + HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ } optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c index 40765311..936d307f 100644 --- a/tests/decodecorpus.c +++ b/tests/decodecorpus.c @@ -437,7 +437,8 @@ static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t ds U32 count[HUF_SYMBOLVALUE_MAX+1]; /* Scan input and build symbol stats */ - { size_t const largest = FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP); + { size_t const largest = HIST_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP); + assert(!HIST_isError(largest)); if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; } /* single symbol, rle */ if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ } @@ -834,7 +835,8 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, /* CTable for Literal Lengths */ { U32 max = MaxLL; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP); + size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP); /* cannot fail */ + assert(!HIST_isError(mostFrequent)); if (mostFrequent == nbSeq) { /* do RLE if we have the chance */ *op++ = llCodeTable[0]; @@ -865,7 +867,8 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, /* CTable for Offsets */ /* see Literal Lengths for descriptions of mode choices */ { U32 max = MaxOff; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP); + size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP); /* cannot fail */ + assert(!HIST_isError(mostFrequent)); if (mostFrequent == nbSeq) { *op++ = ofCodeTable[0]; FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); @@ -892,7 +895,8 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, /* CTable for MatchLengths */ /* see Literal Lengths for descriptions of mode choices */ { U32 max = MaxML; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP); + size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP); /* cannot fail */ + assert(!HIST_isError(mostFrequent)); if (mostFrequent == nbSeq) { *op++ = *mlCodeTable; FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);