grouped all histogram functions into hist.c
renamed functions with HIST_* prefix
This commit is contained in:
parent
fa41bcc2c2
commit
2d76defbfe
@ -34,6 +34,7 @@ SET(Sources
|
||||
${LIBRARY_DIR}/common/zstd_common.c
|
||||
${LIBRARY_DIR}/common/error_private.c
|
||||
${LIBRARY_DIR}/common/xxhash.c
|
||||
${LIBRARY_DIR}/compress/hist.c
|
||||
${LIBRARY_DIR}/compress/fse_compress.c
|
||||
${LIBRARY_DIR}/compress/huf_compress.c
|
||||
${LIBRARY_DIR}/compress/zstd_compress.c
|
||||
@ -63,6 +64,7 @@ SET(Headers
|
||||
${LIBRARY_DIR}/common/huf.h
|
||||
${LIBRARY_DIR}/common/mem.h
|
||||
${LIBRARY_DIR}/common/zstd_internal.h
|
||||
${LIBRARY_DIR}/compress/hist.h
|
||||
${LIBRARY_DIR}/compress/zstd_compress_internal.h
|
||||
${LIBRARY_DIR}/compress/zstd_fast.h
|
||||
${LIBRARY_DIR}/compress/zstd_double_fast.h
|
||||
|
@ -1,6 +1,6 @@
|
||||
# ################################################################
|
||||
# zstd - Makefile
|
||||
# Copyright (C) Yann Collet 2014-2016
|
||||
# Copyright (C) Yann Collet 2014-present
|
||||
# All rights reserved.
|
||||
#
|
||||
# BSD license
|
||||
@ -47,7 +47,7 @@ ADD_EXECUTABLE(fuzzer ${PROGRAMS_DIR}/datagen.c ${TESTS_DIR}/fuzzer.c)
|
||||
TARGET_LINK_LIBRARIES(fuzzer libzstd_static)
|
||||
|
||||
IF (UNIX)
|
||||
ADD_EXECUTABLE(paramgrill ${PROGRAMS_DIR}/datagen.c ${TESTS_DIR}/paramgrill.c)
|
||||
ADD_EXECUTABLE(paramgrill ${PROGRAMS_DIR}/bench.c ${PROGRAMS_DIR}/datagen.c ${TESTS_DIR}/paramgrill.c)
|
||||
TARGET_LINK_LIBRARIES(paramgrill libzstd_static m) #m is math library
|
||||
|
||||
ADD_EXECUTABLE(datagen ${PROGRAMS_DIR}/datagen.c ${TESTS_DIR}/datagencli.c)
|
||||
|
@ -10,27 +10,28 @@
|
||||
<ol>
|
||||
<li><a href="#Chapter1">Introduction</a></li>
|
||||
<li><a href="#Chapter2">Version</a></li>
|
||||
<li><a href="#Chapter3">Simple API</a></li>
|
||||
<li><a href="#Chapter4">Explicit context</a></li>
|
||||
<li><a href="#Chapter5">Simple dictionary API</a></li>
|
||||
<li><a href="#Chapter6">Bulk processing dictionary API</a></li>
|
||||
<li><a href="#Chapter7">Streaming</a></li>
|
||||
<li><a href="#Chapter8">Streaming compression - HowTo</a></li>
|
||||
<li><a href="#Chapter9">Streaming decompression - HowTo</a></li>
|
||||
<li><a href="#Chapter10">START OF ADVANCED AND EXPERIMENTAL FUNCTIONS</a></li>
|
||||
<li><a href="#Chapter11">Advanced types</a></li>
|
||||
<li><a href="#Chapter12">Frame size functions</a></li>
|
||||
<li><a href="#Chapter13">ZSTD_frameHeaderSize() :</a></li>
|
||||
<li><a href="#Chapter14">Memory management</a></li>
|
||||
<li><a href="#Chapter15">Advanced compression functions</a></li>
|
||||
<li><a href="#Chapter16">Advanced decompression functions</a></li>
|
||||
<li><a href="#Chapter17">Advanced streaming functions</a></li>
|
||||
<li><a href="#Chapter18">Buffer-less and synchronous inner streaming functions</a></li>
|
||||
<li><a href="#Chapter19">Buffer-less streaming compression (synchronous mode)</a></li>
|
||||
<li><a href="#Chapter20">Buffer-less streaming decompression (synchronous mode)</a></li>
|
||||
<li><a href="#Chapter21">New advanced API (experimental)</a></li>
|
||||
<li><a href="#Chapter22">ZSTD_getFrameHeader_advanced() :</a></li>
|
||||
<li><a href="#Chapter23">Block level API</a></li>
|
||||
<li><a href="#Chapter3">Default constant</a></li>
|
||||
<li><a href="#Chapter4">Simple API</a></li>
|
||||
<li><a href="#Chapter5">Explicit context</a></li>
|
||||
<li><a href="#Chapter6">Simple dictionary API</a></li>
|
||||
<li><a href="#Chapter7">Bulk processing dictionary API</a></li>
|
||||
<li><a href="#Chapter8">Streaming</a></li>
|
||||
<li><a href="#Chapter9">Streaming compression - HowTo</a></li>
|
||||
<li><a href="#Chapter10">Streaming decompression - HowTo</a></li>
|
||||
<li><a href="#Chapter11">START OF ADVANCED AND EXPERIMENTAL FUNCTIONS</a></li>
|
||||
<li><a href="#Chapter12">Advanced types</a></li>
|
||||
<li><a href="#Chapter13">Frame size functions</a></li>
|
||||
<li><a href="#Chapter14">ZSTD_frameHeaderSize() :</a></li>
|
||||
<li><a href="#Chapter15">Memory management</a></li>
|
||||
<li><a href="#Chapter16">Advanced compression functions</a></li>
|
||||
<li><a href="#Chapter17">Advanced decompression functions</a></li>
|
||||
<li><a href="#Chapter18">Advanced streaming functions</a></li>
|
||||
<li><a href="#Chapter19">Buffer-less and synchronous inner streaming functions</a></li>
|
||||
<li><a href="#Chapter20">Buffer-less streaming compression (synchronous mode)</a></li>
|
||||
<li><a href="#Chapter21">Buffer-less streaming decompression (synchronous mode)</a></li>
|
||||
<li><a href="#Chapter22">New advanced API (experimental)</a></li>
|
||||
<li><a href="#Chapter23">ZSTD_getFrameHeader_advanced() :</a></li>
|
||||
<li><a href="#Chapter24">Block level API</a></li>
|
||||
</ol>
|
||||
<hr>
|
||||
<a name="Chapter1"></a><h2>Introduction</h2><pre>
|
||||
@ -56,7 +57,9 @@
|
||||
|
||||
<pre><b>unsigned ZSTD_versionNumber(void); </b>/**< useful to check dll version */<b>
|
||||
</b></pre><BR>
|
||||
<a name="Chapter3"></a><h2>Simple API</h2><pre></pre>
|
||||
<a name="Chapter3"></a><h2>Default constant</h2><pre></pre>
|
||||
|
||||
<a name="Chapter4"></a><h2>Simple API</h2><pre></pre>
|
||||
|
||||
<pre><b>size_t ZSTD_compress( void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
@ -117,7 +120,7 @@ unsigned ZSTD_isError(size_t code); </b>/*!< tells if a `size_t` fun
|
||||
const char* ZSTD_getErrorName(size_t code); </b>/*!< provides readable string from an error code */<b>
|
||||
int ZSTD_maxCLevel(void); </b>/*!< maximum compression level available */<b>
|
||||
</pre></b><BR>
|
||||
<a name="Chapter4"></a><h2>Explicit context</h2><pre></pre>
|
||||
<a name="Chapter5"></a><h2>Explicit context</h2><pre></pre>
|
||||
|
||||
<h3>Compression context</h3><pre> When compressing many times,
|
||||
it is recommended to allocate a context just once, and re-use it for each successive compression operation.
|
||||
@ -149,7 +152,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
|
||||
</b><p> Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx())
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter5"></a><h2>Simple dictionary API</h2><pre></pre>
|
||||
<a name="Chapter6"></a><h2>Simple dictionary API</h2><pre></pre>
|
||||
|
||||
<pre><b>size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
@ -171,7 +174,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
|
||||
Note : When `dict == NULL || dictSize < 8` no dictionary is used.
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter6"></a><h2>Bulk processing dictionary API</h2><pre></pre>
|
||||
<a name="Chapter7"></a><h2>Bulk processing dictionary API</h2><pre></pre>
|
||||
|
||||
<pre><b>ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
|
||||
int compressionLevel);
|
||||
@ -212,7 +215,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
|
||||
Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times.
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter7"></a><h2>Streaming</h2><pre></pre>
|
||||
<a name="Chapter8"></a><h2>Streaming</h2><pre></pre>
|
||||
|
||||
<pre><b>typedef struct ZSTD_inBuffer_s {
|
||||
const void* src; </b>/**< start of input buffer */<b>
|
||||
@ -226,7 +229,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
|
||||
size_t pos; </b>/**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */<b>
|
||||
} ZSTD_outBuffer;
|
||||
</b></pre><BR>
|
||||
<a name="Chapter8"></a><h2>Streaming compression - HowTo</h2><pre>
|
||||
<a name="Chapter9"></a><h2>Streaming compression - HowTo</h2><pre>
|
||||
A ZSTD_CStream object is required to track streaming operation.
|
||||
Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
|
||||
ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
|
||||
@ -285,7 +288,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
|
||||
</b></pre><BR>
|
||||
<pre><b>size_t ZSTD_CStreamOutSize(void); </b>/**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */<b>
|
||||
</b></pre><BR>
|
||||
<a name="Chapter9"></a><h2>Streaming decompression - HowTo</h2><pre>
|
||||
<a name="Chapter10"></a><h2>Streaming decompression - HowTo</h2><pre>
|
||||
A ZSTD_DStream object is required to track streaming operations.
|
||||
Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
|
||||
ZSTD_DStream objects can be re-used multiple times.
|
||||
@ -318,14 +321,14 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
</b></pre><BR>
|
||||
<pre><b>size_t ZSTD_DStreamOutSize(void); </b>/*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */<b>
|
||||
</b></pre><BR>
|
||||
<a name="Chapter10"></a><h2>START OF ADVANCED AND EXPERIMENTAL FUNCTIONS</h2><pre> The definitions in this section are considered experimental.
|
||||
<a name="Chapter11"></a><h2>START OF ADVANCED AND EXPERIMENTAL FUNCTIONS</h2><pre> The definitions in this section are considered experimental.
|
||||
They should never be used with a dynamic library, as prototypes may change in the future.
|
||||
They are provided for advanced scenarios.
|
||||
Use them only in association with static linking.
|
||||
|
||||
<BR></pre>
|
||||
|
||||
<a name="Chapter11"></a><h2>Advanced types</h2><pre></pre>
|
||||
<a name="Chapter12"></a><h2>Advanced types</h2><pre></pre>
|
||||
|
||||
<pre><b>typedef enum { ZSTD_fast=1, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2,
|
||||
ZSTD_btlazy2, ZSTD_btopt, ZSTD_btultra } ZSTD_strategy; </b>/* from faster to stronger */<b>
|
||||
@ -362,7 +365,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
ZSTD_dlm_byRef, </b>/**< Reference dictionary content -- the dictionary buffer must outlive its users. */<b>
|
||||
} ZSTD_dictLoadMethod_e;
|
||||
</b></pre><BR>
|
||||
<a name="Chapter12"></a><h2>Frame size functions</h2><pre></pre>
|
||||
<a name="Chapter13"></a><h2>Frame size functions</h2><pre></pre>
|
||||
|
||||
<pre><b>size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
|
||||
</b><p> `src` should point to the start of a ZSTD encoded frame or skippable frame
|
||||
@ -395,12 +398,12 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
however it does mean that all frame data must be present and valid.
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter13"></a><h2>ZSTD_frameHeaderSize() :</h2><pre> srcSize must be >= ZSTD_frameHeaderSize_prefix.
|
||||
<a name="Chapter14"></a><h2>ZSTD_frameHeaderSize() :</h2><pre> srcSize must be >= ZSTD_frameHeaderSize_prefix.
|
||||
@return : size of the Frame Header,
|
||||
or an error code (if srcSize is too small)
|
||||
<BR></pre>
|
||||
|
||||
<a name="Chapter14"></a><h2>Memory management</h2><pre></pre>
|
||||
<a name="Chapter15"></a><h2>Memory management</h2><pre></pre>
|
||||
|
||||
<pre><b>size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
|
||||
size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
|
||||
@ -490,7 +493,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; </b>/**< t
|
||||
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter15"></a><h2>Advanced compression functions</h2><pre></pre>
|
||||
<a name="Chapter16"></a><h2>Advanced compression functions</h2><pre></pre>
|
||||
|
||||
<pre><b>ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
|
||||
</b><p> Create a digested dictionary for compression
|
||||
@ -532,7 +535,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; </b>/**< t
|
||||
</b><p> Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter16"></a><h2>Advanced decompression functions</h2><pre></pre>
|
||||
<a name="Chapter17"></a><h2>Advanced decompression functions</h2><pre></pre>
|
||||
|
||||
<pre><b>unsigned ZSTD_isFrame(const void* buffer, size_t size);
|
||||
</b><p> Tells if the content of `buffer` starts with a valid Frame Identifier.
|
||||
@ -572,7 +575,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; </b>/**< t
|
||||
When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code.
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter17"></a><h2>Advanced streaming functions</h2><pre></pre>
|
||||
<a name="Chapter18"></a><h2>Advanced streaming functions</h2><pre></pre>
|
||||
|
||||
<h3>Advanced Streaming compression functions</h3><pre></pre><b><pre>size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); </b>/**< pledgedSrcSize must be correct. If it is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, "0" also disables frame content size field. It may be enabled in the future. */<b>
|
||||
size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); </b>/**< creates of an internal CDict (incompatible with static CCtx), except if dict == NULL or dictSize < 8, in which case no dict is used. Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.*/<b>
|
||||
@ -604,14 +607,14 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di
|
||||
size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); </b>/**< note : ddict is referenced, it must outlive decompression session */<b>
|
||||
size_t ZSTD_resetDStream(ZSTD_DStream* zds); </b>/**< re-use decompression parameters from previous init; saves dictionary loading */<b>
|
||||
</pre></b><BR>
|
||||
<a name="Chapter18"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
|
||||
<a name="Chapter19"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
|
||||
This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
|
||||
But it's also a complex one, with several restrictions, documented below.
|
||||
Prefer normal streaming API for an easier experience.
|
||||
|
||||
<BR></pre>
|
||||
|
||||
<a name="Chapter19"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
|
||||
<a name="Chapter20"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
|
||||
A ZSTD_CCtx object is required to track streaming operations.
|
||||
Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
|
||||
ZSTD_CCtx object can be re-used multiple times within successive compression operations.
|
||||
@ -647,7 +650,7 @@ size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
|
||||
size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); </b>/* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */<b>
|
||||
size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); </b>/**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */<b>
|
||||
</pre></b><BR>
|
||||
<a name="Chapter20"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
|
||||
<a name="Chapter21"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
|
||||
A ZSTD_DCtx object is required to track streaming operations.
|
||||
Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
|
||||
A ZSTD_DCtx object can be re-used multiple times.
|
||||
@ -738,7 +741,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
|
||||
</pre></b><BR>
|
||||
<pre><b>typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
|
||||
</b></pre><BR>
|
||||
<a name="Chapter21"></a><h2>New advanced API (experimental)</h2><pre></pre>
|
||||
<a name="Chapter22"></a><h2>New advanced API (experimental)</h2><pre></pre>
|
||||
|
||||
<pre><b>typedef enum {
|
||||
</b>/* Opened question : should we have a format ZSTD_f_auto ?<b>
|
||||
@ -764,7 +767,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
|
||||
</b>/* compression parameters */<b>
|
||||
ZSTD_p_compressionLevel=100, </b>/* Update all compression parameters according to pre-defined cLevel table<b>
|
||||
* Default level is ZSTD_CLEVEL_DEFAULT==3.
|
||||
* Special: value 0 means "do not change cLevel".
|
||||
* Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
|
||||
* Note 1 : it's possible to pass a negative compression level by casting it to unsigned type.
|
||||
* Note 2 : setting a level sets all default values of other compression parameters.
|
||||
* Note 3 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */
|
||||
@ -1146,7 +1149,7 @@ size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx,
|
||||
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter22"></a><h2>ZSTD_getFrameHeader_advanced() :</h2><pre> same as ZSTD_getFrameHeader(),
|
||||
<a name="Chapter23"></a><h2>ZSTD_getFrameHeader_advanced() :</h2><pre> same as ZSTD_getFrameHeader(),
|
||||
with added capability to select a format (like ZSTD_f_zstd1_magicless)
|
||||
<BR></pre>
|
||||
|
||||
@ -1182,7 +1185,7 @@ size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx,
|
||||
|
||||
</p></pre><BR>
|
||||
|
||||
<a name="Chapter23"></a><h2>Block level API</h2><pre></pre>
|
||||
<a name="Chapter24"></a><h2>Block level API</h2><pre></pre>
|
||||
|
||||
<pre><b></b><p> Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
|
||||
User will have to take in charge required information to regenerate data, such as compressed and content sizes.
|
||||
|
@ -72,6 +72,7 @@ extern "C" {
|
||||
#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
|
||||
FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* FSE simple functions
|
||||
******************************************/
|
||||
@ -129,7 +130,7 @@ FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src,
|
||||
******************************************/
|
||||
/*!
|
||||
FSE_compress() does the following:
|
||||
1. count symbol occurrence from source[] into table count[]
|
||||
1. count symbol occurrence from source[] into table count[] (see hist.h)
|
||||
2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
|
||||
3. save normalized counters to memory buffer using writeNCount()
|
||||
4. build encoding table 'CTable' from normalized counters
|
||||
@ -147,15 +148,6 @@ or to save and provide normalized distribution using external method.
|
||||
|
||||
/* *** COMPRESSION *** */
|
||||
|
||||
/*! FSE_count():
|
||||
Provides the precise count of each byte within a table 'count'.
|
||||
'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
|
||||
*maxSymbolValuePtr will be updated if detected smaller than initial value.
|
||||
@return : the count of the most frequent symbol (which is not identified).
|
||||
if return == srcSize, there is only one symbol.
|
||||
Can also return an error code, which can be tested with FSE_isError(). */
|
||||
FSE_PUBLIC_API size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
|
||||
|
||||
/*! FSE_optimalTableLog():
|
||||
dynamically downsize 'tableLog' when conditions are met.
|
||||
It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
|
||||
@ -167,7 +159,8 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
|
||||
'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
|
||||
@return : tableLog,
|
||||
or an errorCode, which can be tested using FSE_isError() */
|
||||
FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
|
||||
FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
|
||||
const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
|
||||
|
||||
/*! FSE_NCountWriteBound():
|
||||
Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
|
||||
@ -178,8 +171,9 @@ FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tab
|
||||
Compactly save 'normalizedCounter' into 'buffer'.
|
||||
@return : size of the compressed table,
|
||||
or an errorCode, which can be tested using FSE_isError(). */
|
||||
FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
|
||||
|
||||
FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
|
||||
const short* normalizedCounter,
|
||||
unsigned maxSymbolValue, unsigned tableLog);
|
||||
|
||||
/*! Constructor and Destructor of FSE_CTable.
|
||||
Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
|
||||
@ -250,7 +244,9 @@ If there is an error, the function will return an ErrorCode (which can be tested
|
||||
@return : size read from 'rBuffer',
|
||||
or an errorCode, which can be tested using FSE_isError().
|
||||
maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
|
||||
FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
|
||||
FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
|
||||
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
|
||||
const void* rBuffer, size_t rBuffSize);
|
||||
|
||||
/*! Constructor and Destructor of FSE_DTable.
|
||||
Note that its size depends on 'tableLog' */
|
||||
@ -325,33 +321,8 @@ If there is an error, the function will return an error code, which can be teste
|
||||
|
||||
|
||||
/* *****************************************
|
||||
* FSE advanced API
|
||||
*******************************************/
|
||||
/* FSE_count_wksp() :
|
||||
* Same as FSE_count(), but using an externally provided scratch buffer.
|
||||
* `workSpace` size must be table of >= `1024` unsigned
|
||||
*/
|
||||
size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize, unsigned* workSpace);
|
||||
|
||||
/** FSE_countFast() :
|
||||
* same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr
|
||||
*/
|
||||
size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
|
||||
|
||||
/* FSE_countFast_wksp() :
|
||||
* Same as FSE_countFast(), but using an externally provided scratch buffer.
|
||||
* `workSpace` must be a table of minimum `1024` unsigned
|
||||
*/
|
||||
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace);
|
||||
|
||||
/*! FSE_count_simple() :
|
||||
* Same as FSE_countFast(), but does not use any additional memory (not even on stack).
|
||||
* This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
|
||||
*/
|
||||
size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
|
||||
|
||||
|
||||
* FSE advanced API
|
||||
***************************************** */
|
||||
|
||||
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
|
||||
/**< same as FSE_optimalTableLog(), which used `minus==2` */
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* ******************************************************************
|
||||
FSE : Finite State Entropy encoder
|
||||
Copyright (C) 2013-2015, Yann Collet.
|
||||
Copyright (C) 2013-present, Yann Collet.
|
||||
|
||||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
||||
|
||||
@ -37,9 +37,11 @@
|
||||
****************************************************************/
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memcpy, memset */
|
||||
#include <stdio.h> /* printf (debug) */
|
||||
#include "bitstream.h"
|
||||
#include "compiler.h"
|
||||
#include "mem.h" /* U32, U16, etc. */
|
||||
#include "debug.h" /* assert, DEBUGLOG */
|
||||
#include "hist.h" /* HIST_count_wksp */
|
||||
#include "bitstream.h"
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "fse.h"
|
||||
#include "error_private.h"
|
||||
@ -49,7 +51,6 @@
|
||||
* Error Management
|
||||
****************************************************************/
|
||||
#define FSE_isError ERR_isError
|
||||
#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
|
||||
|
||||
|
||||
/* **************************************************************
|
||||
@ -190,8 +191,9 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
|
||||
|
||||
#ifndef FSE_COMMONDEFS_ONLY
|
||||
|
||||
|
||||
/*-**************************************************************
|
||||
* FSE NCount encoding-decoding
|
||||
* FSE NCount encoding
|
||||
****************************************************************/
|
||||
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
|
||||
{
|
||||
@ -299,159 +301,6 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*-**************************************************************
|
||||
* Counting histogram
|
||||
****************************************************************/
|
||||
/*! FSE_count_simple
|
||||
This function counts byte values within `src`, and store the histogram into table `count`.
|
||||
It doesn't use any additional memory.
|
||||
But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
|
||||
For this reason, prefer using a table `count` with 256 elements.
|
||||
@return : count of most numerous element.
|
||||
*/
|
||||
size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
const BYTE* const end = ip + srcSize;
|
||||
unsigned maxSymbolValue = *maxSymbolValuePtr;
|
||||
unsigned max=0;
|
||||
|
||||
memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
|
||||
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
|
||||
|
||||
while (ip<end) {
|
||||
assert(*ip <= maxSymbolValue);
|
||||
count[*ip++]++;
|
||||
}
|
||||
|
||||
while (!count[maxSymbolValue]) maxSymbolValue--;
|
||||
*maxSymbolValuePtr = maxSymbolValue;
|
||||
|
||||
{ U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
|
||||
|
||||
return (size_t)max;
|
||||
}
|
||||
|
||||
|
||||
/* FSE_count_parallel_wksp() :
|
||||
* Same as FSE_count_parallel(), but using an externally provided scratch buffer.
|
||||
* `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`.
|
||||
* @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
|
||||
static size_t FSE_count_parallel_wksp(
|
||||
unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize,
|
||||
unsigned checkMax, unsigned* const workSpace)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)source;
|
||||
const BYTE* const iend = ip+sourceSize;
|
||||
unsigned maxSymbolValue = *maxSymbolValuePtr;
|
||||
unsigned max=0;
|
||||
U32* const Counting1 = workSpace;
|
||||
U32* const Counting2 = Counting1 + 256;
|
||||
U32* const Counting3 = Counting2 + 256;
|
||||
U32* const Counting4 = Counting3 + 256;
|
||||
|
||||
memset(workSpace, 0, 4*256*sizeof(unsigned));
|
||||
|
||||
/* safety checks */
|
||||
if (!sourceSize) {
|
||||
memset(count, 0, maxSymbolValue + 1);
|
||||
*maxSymbolValuePtr = 0;
|
||||
return 0;
|
||||
}
|
||||
if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
|
||||
|
||||
/* by stripes of 16 bytes */
|
||||
{ U32 cached = MEM_read32(ip); ip += 4;
|
||||
while (ip < iend-15) {
|
||||
U32 c = cached; cached = MEM_read32(ip); ip += 4;
|
||||
Counting1[(BYTE) c ]++;
|
||||
Counting2[(BYTE)(c>>8) ]++;
|
||||
Counting3[(BYTE)(c>>16)]++;
|
||||
Counting4[ c>>24 ]++;
|
||||
c = cached; cached = MEM_read32(ip); ip += 4;
|
||||
Counting1[(BYTE) c ]++;
|
||||
Counting2[(BYTE)(c>>8) ]++;
|
||||
Counting3[(BYTE)(c>>16)]++;
|
||||
Counting4[ c>>24 ]++;
|
||||
c = cached; cached = MEM_read32(ip); ip += 4;
|
||||
Counting1[(BYTE) c ]++;
|
||||
Counting2[(BYTE)(c>>8) ]++;
|
||||
Counting3[(BYTE)(c>>16)]++;
|
||||
Counting4[ c>>24 ]++;
|
||||
c = cached; cached = MEM_read32(ip); ip += 4;
|
||||
Counting1[(BYTE) c ]++;
|
||||
Counting2[(BYTE)(c>>8) ]++;
|
||||
Counting3[(BYTE)(c>>16)]++;
|
||||
Counting4[ c>>24 ]++;
|
||||
}
|
||||
ip-=4;
|
||||
}
|
||||
|
||||
/* finish last symbols */
|
||||
while (ip<iend) Counting1[*ip++]++;
|
||||
|
||||
if (checkMax) { /* verify stats will fit into destination table */
|
||||
U32 s; for (s=255; s>maxSymbolValue; s--) {
|
||||
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
|
||||
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
|
||||
} }
|
||||
|
||||
{ U32 s;
|
||||
if (maxSymbolValue > 255) maxSymbolValue = 255;
|
||||
for (s=0; s<=maxSymbolValue; s++) {
|
||||
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
|
||||
if (count[s] > max) max = count[s];
|
||||
} }
|
||||
|
||||
while (!count[maxSymbolValue]) maxSymbolValue--;
|
||||
*maxSymbolValuePtr = maxSymbolValue;
|
||||
return (size_t)max;
|
||||
}
|
||||
|
||||
/* FSE_countFast_wksp() :
|
||||
* Same as FSE_countFast(), but using an externally provided scratch buffer.
|
||||
* `workSpace` size must be table of >= `1024` unsigned */
|
||||
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize,
|
||||
unsigned* workSpace)
|
||||
{
|
||||
if (sourceSize < 1500) /* heuristic threshold */
|
||||
return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
|
||||
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
|
||||
}
|
||||
|
||||
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
|
||||
size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize)
|
||||
{
|
||||
unsigned tmpCounters[1024];
|
||||
return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
|
||||
}
|
||||
|
||||
/* FSE_count_wksp() :
|
||||
* Same as FSE_count(), but using an externally provided scratch buffer.
|
||||
* `workSpace` size must be table of >= `1024` unsigned */
|
||||
size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize, unsigned* workSpace)
|
||||
{
|
||||
if (*maxSymbolValuePtr < 255)
|
||||
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
|
||||
*maxSymbolValuePtr = 255;
|
||||
return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
|
||||
}
|
||||
|
||||
size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
unsigned tmpCounters[1024];
|
||||
return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*-**************************************************************
|
||||
* FSE Compression Code
|
||||
****************************************************************/
|
||||
@ -645,11 +494,11 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
||||
U32 s;
|
||||
U32 nTotal = 0;
|
||||
for (s=0; s<=maxSymbolValue; s++)
|
||||
printf("%3i: %4i \n", s, normalizedCounter[s]);
|
||||
RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
|
||||
for (s=0; s<=maxSymbolValue; s++)
|
||||
nTotal += abs(normalizedCounter[s]);
|
||||
if (nTotal != (1U<<tableLog))
|
||||
printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
|
||||
RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
|
||||
getchar();
|
||||
}
|
||||
#endif
|
||||
@ -816,7 +665,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
|
||||
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
|
||||
|
||||
/* Scan input and build symbol stats */
|
||||
{ CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
|
||||
{ CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
|
||||
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
|
||||
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
|
||||
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
|
||||
@ -851,7 +700,7 @@ typedef struct {
|
||||
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
|
||||
{
|
||||
fseWkspMax_t scratchBuffer;
|
||||
FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
|
||||
DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
|
||||
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
||||
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
|
||||
}
|
||||
|
200
lib/compress/hist.c
Normal file
200
lib/compress/hist.c
Normal file
@ -0,0 +1,200 @@
|
||||
/* ******************************************************************
|
||||
hist : Histogram functions
|
||||
part of Finite State Entropy project
|
||||
Copyright (C) 2013-present, Yann Collet.
|
||||
|
||||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
You can contact the author at :
|
||||
- FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
- Public forum : https://groups.google.com/forum/#!forum/lz4c
|
||||
****************************************************************** */
|
||||
|
||||
/* --- dependencies --- */
|
||||
#include "mem.h" /* U32, BYTE, etc. */
|
||||
#include "debug.h" /* assert, DEBUGLOG */
|
||||
#include "error_private.h" /* ERROR */
|
||||
#include "hist.h"
|
||||
|
||||
|
||||
/* --- Error management --- */
|
||||
unsigned HIST_isError(size_t code) { return ERR_isError(code); }
|
||||
|
||||
/*-**************************************************************
|
||||
* Histogram functions
|
||||
****************************************************************/
|
||||
/*! HIST_count_simple :
|
||||
Counts byte values within `src`, storing histogram into table `count`.
|
||||
Doesn't use any additional memory, very limited stack usage.
|
||||
But unsafe : doesn't check that all values within `src` fit into `count`.
|
||||
For this reason, prefer using a table `count` of size 256.
|
||||
@return : count of most numerous element.
|
||||
this function doesn't produce any error (i.e. it must succeed).
|
||||
*/
|
||||
unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
const BYTE* const end = ip + srcSize;
|
||||
unsigned maxSymbolValue = *maxSymbolValuePtr;
|
||||
unsigned largestCount=0;
|
||||
|
||||
memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
|
||||
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
|
||||
|
||||
while (ip<end) {
|
||||
assert(*ip <= maxSymbolValue);
|
||||
count[*ip++]++;
|
||||
}
|
||||
|
||||
while (!count[maxSymbolValue]) maxSymbolValue--;
|
||||
*maxSymbolValuePtr = maxSymbolValue;
|
||||
|
||||
{ U32 s;
|
||||
for (s=0; s<=maxSymbolValue; s++)
|
||||
if (count[s] > largestCount) largestCount = count[s];
|
||||
}
|
||||
|
||||
return largestCount;
|
||||
}
|
||||
|
||||
|
||||
/* HIST_count_parallel_wksp() :
|
||||
* Same as HIST_count_parallel(), but using an externally provided scratch buffer.
|
||||
* `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
|
||||
* @return : largest histogram frequency,
|
||||
* or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
|
||||
static size_t HIST_count_parallel_wksp(
|
||||
unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize,
|
||||
unsigned checkMax,
|
||||
unsigned* const workSpace)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)source;
|
||||
const BYTE* const iend = ip+sourceSize;
|
||||
unsigned maxSymbolValue = *maxSymbolValuePtr;
|
||||
unsigned max=0;
|
||||
U32* const Counting1 = workSpace;
|
||||
U32* const Counting2 = Counting1 + 256;
|
||||
U32* const Counting3 = Counting2 + 256;
|
||||
U32* const Counting4 = Counting3 + 256;
|
||||
|
||||
memset(workSpace, 0, 4*256*sizeof(unsigned));
|
||||
|
||||
/* safety checks */
|
||||
if (!sourceSize) {
|
||||
memset(count, 0, maxSymbolValue + 1);
|
||||
*maxSymbolValuePtr = 0;
|
||||
return 0;
|
||||
}
|
||||
if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
|
||||
|
||||
/* by stripes of 16 bytes */
|
||||
{ U32 cached = MEM_read32(ip); ip += 4;
|
||||
while (ip < iend-15) {
|
||||
U32 c = cached; cached = MEM_read32(ip); ip += 4;
|
||||
Counting1[(BYTE) c ]++;
|
||||
Counting2[(BYTE)(c>>8) ]++;
|
||||
Counting3[(BYTE)(c>>16)]++;
|
||||
Counting4[ c>>24 ]++;
|
||||
c = cached; cached = MEM_read32(ip); ip += 4;
|
||||
Counting1[(BYTE) c ]++;
|
||||
Counting2[(BYTE)(c>>8) ]++;
|
||||
Counting3[(BYTE)(c>>16)]++;
|
||||
Counting4[ c>>24 ]++;
|
||||
c = cached; cached = MEM_read32(ip); ip += 4;
|
||||
Counting1[(BYTE) c ]++;
|
||||
Counting2[(BYTE)(c>>8) ]++;
|
||||
Counting3[(BYTE)(c>>16)]++;
|
||||
Counting4[ c>>24 ]++;
|
||||
c = cached; cached = MEM_read32(ip); ip += 4;
|
||||
Counting1[(BYTE) c ]++;
|
||||
Counting2[(BYTE)(c>>8) ]++;
|
||||
Counting3[(BYTE)(c>>16)]++;
|
||||
Counting4[ c>>24 ]++;
|
||||
}
|
||||
ip-=4;
|
||||
}
|
||||
|
||||
/* finish last symbols */
|
||||
while (ip<iend) Counting1[*ip++]++;
|
||||
|
||||
if (checkMax) { /* verify stats will fit into destination table */
|
||||
U32 s; for (s=255; s>maxSymbolValue; s--) {
|
||||
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
|
||||
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
|
||||
} }
|
||||
|
||||
{ U32 s;
|
||||
if (maxSymbolValue > 255) maxSymbolValue = 255;
|
||||
for (s=0; s<=maxSymbolValue; s++) {
|
||||
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
|
||||
if (count[s] > max) max = count[s];
|
||||
} }
|
||||
|
||||
while (!count[maxSymbolValue]) maxSymbolValue--;
|
||||
*maxSymbolValuePtr = maxSymbolValue;
|
||||
return (size_t)max;
|
||||
}
|
||||
|
||||
/* HIST_countFast_wksp() :
|
||||
* Same as HIST_countFast(), but using an externally provided scratch buffer.
|
||||
* `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
|
||||
size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize,
|
||||
unsigned* workSpace)
|
||||
{
|
||||
if (sourceSize < 1500) /* heuristic threshold */
|
||||
return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
|
||||
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
|
||||
}
|
||||
|
||||
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
|
||||
size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize)
|
||||
{
|
||||
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
|
||||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
|
||||
}
|
||||
|
||||
/* HIST_count_wksp() :
|
||||
* Same as HIST_count(), but using an externally provided scratch buffer.
|
||||
* `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
|
||||
size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize, unsigned* workSpace)
|
||||
{
|
||||
if (*maxSymbolValuePtr < 255)
|
||||
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
|
||||
*maxSymbolValuePtr = 255;
|
||||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
|
||||
}
|
||||
|
||||
size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
|
||||
return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
|
||||
}
|
90
lib/compress/hist.h
Normal file
90
lib/compress/hist.h
Normal file
@ -0,0 +1,90 @@
|
||||
/* ******************************************************************
|
||||
hist : Histogram functions
|
||||
part of Finite State Entropy project
|
||||
Copyright (C) 2013-present, Yann Collet.
|
||||
|
||||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
You can contact the author at :
|
||||
- FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
||||
- Public forum : https://groups.google.com/forum/#!forum/lz4c
|
||||
****************************************************************** */
|
||||
|
||||
/* --- dependencies --- */
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
|
||||
/* --- simple histogram functions --- */
|
||||
|
||||
/*! HIST_count():
|
||||
* Provides the precise count of each byte within a table 'count'.
|
||||
* 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
|
||||
* Updates *maxSymbolValuePtr with actual largest symbol value detected.
|
||||
* @return : count of the most frequent symbol (which isn't identified).
|
||||
* or an error code, which can be tested using HIST_isError().
|
||||
* note : if return == srcSize, there is only one symbol.
|
||||
*/
|
||||
size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
unsigned HIST_isError(size_t code); /*< tells if a return value is an error code */
|
||||
|
||||
|
||||
/* --- advanced histogram functions --- */
|
||||
|
||||
#define HIST_WKSP_SIZE_U32 1024
|
||||
/** HIST_count_wksp() :
|
||||
* Same as HIST_count(), but using an externally provided scratch buffer.
|
||||
* Benefit is this function will use very little stack space.
|
||||
* `workSpace` must be a table of unsigned of size >= HIST_WKSP_SIZE_U32
|
||||
*/
|
||||
size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned* workSpace);
|
||||
|
||||
/** HIST_countFast() :
|
||||
* same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
|
||||
* This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`
|
||||
*/
|
||||
size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
/** HIST_countFast_wksp() :
|
||||
* Same as HIST_countFast(), but using an externally provided scratch buffer.
|
||||
* `workSpace` must be a table of unsigned of size >= HIST_WKSP_SIZE_U32
|
||||
*/
|
||||
size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned* workSpace);
|
||||
|
||||
/*! HIST_count_simple() :
|
||||
* Same as HIST_countFast(), but does not use any additional memory (not even on stack).
|
||||
* This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`.
|
||||
* It is also a bit slower for large inputs.
|
||||
* This function doesn't produce any error (i.e. it must succeed).
|
||||
*/
|
||||
unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* src, size_t srcSize);
|
@ -45,8 +45,9 @@
|
||||
****************************************************************/
|
||||
#include <string.h> /* memcpy, memset */
|
||||
#include <stdio.h> /* printf (debug) */
|
||||
#include "bitstream.h"
|
||||
#include "compiler.h"
|
||||
#include "bitstream.h"
|
||||
#include "hist.h"
|
||||
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
|
||||
#include "fse.h" /* header compression */
|
||||
#define HUF_STATIC_LINKING_ONLY
|
||||
@ -100,9 +101,9 @@ size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable,
|
||||
if (wtSize <= 1) return 0; /* Not compressible */
|
||||
|
||||
/* Scan input and build symbol stats */
|
||||
{ CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize) );
|
||||
{ unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
|
||||
if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
|
||||
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
|
||||
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
|
||||
}
|
||||
|
||||
tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
|
||||
@ -667,7 +668,7 @@ static size_t HUF_compress_internal (
|
||||
}
|
||||
|
||||
/* Scan input and build symbol stats */
|
||||
{ CHECK_V_F(largest, FSE_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
|
||||
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
|
||||
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
|
||||
if (largest <= (srcSize >> 7)+1) return 0; /* heuristic : probably not compressible enough */
|
||||
}
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <string.h> /* memset */
|
||||
#include "cpu.h"
|
||||
#include "mem.h"
|
||||
#include "hist.h" /* HIST_countFast_wksp */
|
||||
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
|
||||
#include "fse.h"
|
||||
#define HUF_STATIC_LINKING_ONLY
|
||||
@ -2109,7 +2110,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
||||
ZSTD_seqToCodes(seqStorePtr);
|
||||
/* build CTable for Literal Lengths */
|
||||
{ U32 max = MaxLL;
|
||||
size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace);
|
||||
size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace); /* can't fail */
|
||||
DEBUGLOG(5, "Building LL table");
|
||||
nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
|
||||
LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, count, max, mostFrequent, nbSeq, LLFSELog, prevEntropy->fse.litlengthCTable, LL_defaultNorm, LL_defaultNormLog, ZSTD_defaultAllowed, strategy);
|
||||
@ -2126,7 +2127,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
||||
} }
|
||||
/* build CTable for Offsets */
|
||||
{ U32 max = MaxOff;
|
||||
size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace);
|
||||
size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace); /* can't fail */
|
||||
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
|
||||
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
|
||||
DEBUGLOG(5, "Building OF table");
|
||||
@ -2144,7 +2145,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
||||
} }
|
||||
/* build CTable for MatchLengths */
|
||||
{ U32 max = MaxML;
|
||||
size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace);
|
||||
size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace); /* can't fail */
|
||||
DEBUGLOG(5, "Building ML table");
|
||||
nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
|
||||
MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, count, max, mostFrequent, nbSeq, MLFSELog, prevEntropy->fse.matchlengthCTable, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy);
|
||||
|
@ -9,6 +9,7 @@
|
||||
*/
|
||||
|
||||
#include "zstd_compress_internal.h"
|
||||
#include "hist.h"
|
||||
#include "zstd_opt.h"
|
||||
|
||||
|
||||
@ -142,7 +143,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
|
||||
assert(optPtr->litFreq != NULL);
|
||||
{ unsigned lit = MaxLit;
|
||||
FSE_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
||||
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
||||
}
|
||||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
||||
|
||||
|
@ -437,7 +437,8 @@ static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t ds
|
||||
U32 count[HUF_SYMBOLVALUE_MAX+1];
|
||||
|
||||
/* Scan input and build symbol stats */
|
||||
{ size_t const largest = FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP);
|
||||
{ size_t const largest = HIST_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP);
|
||||
assert(!HIST_isError(largest));
|
||||
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; } /* single symbol, rle */
|
||||
if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
|
||||
}
|
||||
@ -834,7 +835,8 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
|
||||
|
||||
/* CTable for Literal Lengths */
|
||||
{ U32 max = MaxLL;
|
||||
size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP);
|
||||
size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP); /* cannot fail */
|
||||
assert(!HIST_isError(mostFrequent));
|
||||
if (mostFrequent == nbSeq) {
|
||||
/* do RLE if we have the chance */
|
||||
*op++ = llCodeTable[0];
|
||||
@ -865,7 +867,8 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
|
||||
/* CTable for Offsets */
|
||||
/* see Literal Lengths for descriptions of mode choices */
|
||||
{ U32 max = MaxOff;
|
||||
size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP);
|
||||
size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP); /* cannot fail */
|
||||
assert(!HIST_isError(mostFrequent));
|
||||
if (mostFrequent == nbSeq) {
|
||||
*op++ = ofCodeTable[0];
|
||||
FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
|
||||
@ -892,7 +895,8 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
|
||||
/* CTable for MatchLengths */
|
||||
/* see Literal Lengths for descriptions of mode choices */
|
||||
{ U32 max = MaxML;
|
||||
size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP);
|
||||
size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP); /* cannot fail */
|
||||
assert(!HIST_isError(mostFrequent));
|
||||
if (mostFrequent == nbSeq) {
|
||||
*op++ = *mlCodeTable;
|
||||
FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
|
||||
|
Loading…
x
Reference in New Issue
Block a user