grouped all histogram functions into hist.c

renamed functions with HIST_* prefix
This commit is contained in:
Yann Collet 2018-06-13 19:49:31 -04:00
parent fa41bcc2c2
commit 2d76defbfe
11 changed files with 382 additions and 260 deletions

View File

@ -34,6 +34,7 @@ SET(Sources
${LIBRARY_DIR}/common/zstd_common.c
${LIBRARY_DIR}/common/error_private.c
${LIBRARY_DIR}/common/xxhash.c
${LIBRARY_DIR}/compress/hist.c
${LIBRARY_DIR}/compress/fse_compress.c
${LIBRARY_DIR}/compress/huf_compress.c
${LIBRARY_DIR}/compress/zstd_compress.c
@ -63,6 +64,7 @@ SET(Headers
${LIBRARY_DIR}/common/huf.h
${LIBRARY_DIR}/common/mem.h
${LIBRARY_DIR}/common/zstd_internal.h
${LIBRARY_DIR}/compress/hist.h
${LIBRARY_DIR}/compress/zstd_compress_internal.h
${LIBRARY_DIR}/compress/zstd_fast.h
${LIBRARY_DIR}/compress/zstd_double_fast.h

View File

@ -1,6 +1,6 @@
# ################################################################
# zstd - Makefile
# Copyright (C) Yann Collet 2014-2016
# Copyright (C) Yann Collet 2014-present
# All rights reserved.
#
# BSD license
@ -47,7 +47,7 @@ ADD_EXECUTABLE(fuzzer ${PROGRAMS_DIR}/datagen.c ${TESTS_DIR}/fuzzer.c)
TARGET_LINK_LIBRARIES(fuzzer libzstd_static)
IF (UNIX)
ADD_EXECUTABLE(paramgrill ${PROGRAMS_DIR}/datagen.c ${TESTS_DIR}/paramgrill.c)
ADD_EXECUTABLE(paramgrill ${PROGRAMS_DIR}/bench.c ${PROGRAMS_DIR}/datagen.c ${TESTS_DIR}/paramgrill.c)
TARGET_LINK_LIBRARIES(paramgrill libzstd_static m) #m is math library
ADD_EXECUTABLE(datagen ${PROGRAMS_DIR}/datagen.c ${TESTS_DIR}/datagencli.c)

View File

@ -10,27 +10,28 @@
<ol>
<li><a href="#Chapter1">Introduction</a></li>
<li><a href="#Chapter2">Version</a></li>
<li><a href="#Chapter3">Simple API</a></li>
<li><a href="#Chapter4">Explicit context</a></li>
<li><a href="#Chapter5">Simple dictionary API</a></li>
<li><a href="#Chapter6">Bulk processing dictionary API</a></li>
<li><a href="#Chapter7">Streaming</a></li>
<li><a href="#Chapter8">Streaming compression - HowTo</a></li>
<li><a href="#Chapter9">Streaming decompression - HowTo</a></li>
<li><a href="#Chapter10">START OF ADVANCED AND EXPERIMENTAL FUNCTIONS</a></li>
<li><a href="#Chapter11">Advanced types</a></li>
<li><a href="#Chapter12">Frame size functions</a></li>
<li><a href="#Chapter13">ZSTD_frameHeaderSize() :</a></li>
<li><a href="#Chapter14">Memory management</a></li>
<li><a href="#Chapter15">Advanced compression functions</a></li>
<li><a href="#Chapter16">Advanced decompression functions</a></li>
<li><a href="#Chapter17">Advanced streaming functions</a></li>
<li><a href="#Chapter18">Buffer-less and synchronous inner streaming functions</a></li>
<li><a href="#Chapter19">Buffer-less streaming compression (synchronous mode)</a></li>
<li><a href="#Chapter20">Buffer-less streaming decompression (synchronous mode)</a></li>
<li><a href="#Chapter21">New advanced API (experimental)</a></li>
<li><a href="#Chapter22">ZSTD_getFrameHeader_advanced() :</a></li>
<li><a href="#Chapter23">Block level API</a></li>
<li><a href="#Chapter3">Default constant</a></li>
<li><a href="#Chapter4">Simple API</a></li>
<li><a href="#Chapter5">Explicit context</a></li>
<li><a href="#Chapter6">Simple dictionary API</a></li>
<li><a href="#Chapter7">Bulk processing dictionary API</a></li>
<li><a href="#Chapter8">Streaming</a></li>
<li><a href="#Chapter9">Streaming compression - HowTo</a></li>
<li><a href="#Chapter10">Streaming decompression - HowTo</a></li>
<li><a href="#Chapter11">START OF ADVANCED AND EXPERIMENTAL FUNCTIONS</a></li>
<li><a href="#Chapter12">Advanced types</a></li>
<li><a href="#Chapter13">Frame size functions</a></li>
<li><a href="#Chapter14">ZSTD_frameHeaderSize() :</a></li>
<li><a href="#Chapter15">Memory management</a></li>
<li><a href="#Chapter16">Advanced compression functions</a></li>
<li><a href="#Chapter17">Advanced decompression functions</a></li>
<li><a href="#Chapter18">Advanced streaming functions</a></li>
<li><a href="#Chapter19">Buffer-less and synchronous inner streaming functions</a></li>
<li><a href="#Chapter20">Buffer-less streaming compression (synchronous mode)</a></li>
<li><a href="#Chapter21">Buffer-less streaming decompression (synchronous mode)</a></li>
<li><a href="#Chapter22">New advanced API (experimental)</a></li>
<li><a href="#Chapter23">ZSTD_getFrameHeader_advanced() :</a></li>
<li><a href="#Chapter24">Block level API</a></li>
</ol>
<hr>
<a name="Chapter1"></a><h2>Introduction</h2><pre>
@ -56,7 +57,9 @@
<pre><b>unsigned ZSTD_versionNumber(void); </b>/**< useful to check dll version */<b>
</b></pre><BR>
<a name="Chapter3"></a><h2>Simple API</h2><pre></pre>
<a name="Chapter3"></a><h2>Default constant</h2><pre></pre>
<a name="Chapter4"></a><h2>Simple API</h2><pre></pre>
<pre><b>size_t ZSTD_compress( void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
@ -117,7 +120,7 @@ unsigned ZSTD_isError(size_t code); </b>/*!< tells if a `size_t` fun
const char* ZSTD_getErrorName(size_t code); </b>/*!< provides readable string from an error code */<b>
int ZSTD_maxCLevel(void); </b>/*!< maximum compression level available */<b>
</pre></b><BR>
<a name="Chapter4"></a><h2>Explicit context</h2><pre></pre>
<a name="Chapter5"></a><h2>Explicit context</h2><pre></pre>
<h3>Compression context</h3><pre> When compressing many times,
it is recommended to allocate a context just once, and re-use it for each successive compression operation.
@ -149,7 +152,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
</b><p> Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx())
</p></pre><BR>
<a name="Chapter5"></a><h2>Simple dictionary API</h2><pre></pre>
<a name="Chapter6"></a><h2>Simple dictionary API</h2><pre></pre>
<pre><b>size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
void* dst, size_t dstCapacity,
@ -171,7 +174,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
Note : When `dict == NULL || dictSize < 8` no dictionary is used.
</p></pre><BR>
<a name="Chapter6"></a><h2>Bulk processing dictionary API</h2><pre></pre>
<a name="Chapter7"></a><h2>Bulk processing dictionary API</h2><pre></pre>
<pre><b>ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
int compressionLevel);
@ -212,7 +215,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times.
</p></pre><BR>
<a name="Chapter7"></a><h2>Streaming</h2><pre></pre>
<a name="Chapter8"></a><h2>Streaming</h2><pre></pre>
<pre><b>typedef struct ZSTD_inBuffer_s {
const void* src; </b>/**< start of input buffer */<b>
@ -226,7 +229,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
size_t pos; </b>/**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */<b>
} ZSTD_outBuffer;
</b></pre><BR>
<a name="Chapter8"></a><h2>Streaming compression - HowTo</h2><pre>
<a name="Chapter9"></a><h2>Streaming compression - HowTo</h2><pre>
A ZSTD_CStream object is required to track streaming operation.
Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
@ -285,7 +288,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
</b></pre><BR>
<pre><b>size_t ZSTD_CStreamOutSize(void); </b>/**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */<b>
</b></pre><BR>
<a name="Chapter9"></a><h2>Streaming decompression - HowTo</h2><pre>
<a name="Chapter10"></a><h2>Streaming decompression - HowTo</h2><pre>
A ZSTD_DStream object is required to track streaming operations.
Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
ZSTD_DStream objects can be re-used multiple times.
@ -318,14 +321,14 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
</b></pre><BR>
<pre><b>size_t ZSTD_DStreamOutSize(void); </b>/*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */<b>
</b></pre><BR>
<a name="Chapter10"></a><h2>START OF ADVANCED AND EXPERIMENTAL FUNCTIONS</h2><pre> The definitions in this section are considered experimental.
<a name="Chapter11"></a><h2>START OF ADVANCED AND EXPERIMENTAL FUNCTIONS</h2><pre> The definitions in this section are considered experimental.
They should never be used with a dynamic library, as prototypes may change in the future.
They are provided for advanced scenarios.
Use them only in association with static linking.
<BR></pre>
<a name="Chapter11"></a><h2>Advanced types</h2><pre></pre>
<a name="Chapter12"></a><h2>Advanced types</h2><pre></pre>
<pre><b>typedef enum { ZSTD_fast=1, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2,
ZSTD_btlazy2, ZSTD_btopt, ZSTD_btultra } ZSTD_strategy; </b>/* from faster to stronger */<b>
@ -362,7 +365,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
ZSTD_dlm_byRef, </b>/**< Reference dictionary content -- the dictionary buffer must outlive its users. */<b>
} ZSTD_dictLoadMethod_e;
</b></pre><BR>
<a name="Chapter12"></a><h2>Frame size functions</h2><pre></pre>
<a name="Chapter13"></a><h2>Frame size functions</h2><pre></pre>
<pre><b>size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
</b><p> `src` should point to the start of a ZSTD encoded frame or skippable frame
@ -395,12 +398,12 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
however it does mean that all frame data must be present and valid.
</p></pre><BR>
<a name="Chapter13"></a><h2>ZSTD_frameHeaderSize() :</h2><pre> srcSize must be >= ZSTD_frameHeaderSize_prefix.
<a name="Chapter14"></a><h2>ZSTD_frameHeaderSize() :</h2><pre> srcSize must be >= ZSTD_frameHeaderSize_prefix.
@return : size of the Frame Header,
or an error code (if srcSize is too small)
<BR></pre>
<a name="Chapter14"></a><h2>Memory management</h2><pre></pre>
<a name="Chapter15"></a><h2>Memory management</h2><pre></pre>
<pre><b>size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
@ -490,7 +493,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; </b>/**< t
</p></pre><BR>
<a name="Chapter15"></a><h2>Advanced compression functions</h2><pre></pre>
<a name="Chapter16"></a><h2>Advanced compression functions</h2><pre></pre>
<pre><b>ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
</b><p> Create a digested dictionary for compression
@ -532,7 +535,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; </b>/**< t
</b><p> Same as ZSTD_compress_usingCDict(), with fine-tune control over frame parameters
</p></pre><BR>
<a name="Chapter16"></a><h2>Advanced decompression functions</h2><pre></pre>
<a name="Chapter17"></a><h2>Advanced decompression functions</h2><pre></pre>
<pre><b>unsigned ZSTD_isFrame(const void* buffer, size_t size);
</b><p> Tells if the content of `buffer` starts with a valid Frame Identifier.
@ -572,7 +575,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; </b>/**< t
When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code.
</p></pre><BR>
<a name="Chapter17"></a><h2>Advanced streaming functions</h2><pre></pre>
<a name="Chapter18"></a><h2>Advanced streaming functions</h2><pre></pre>
<h3>Advanced Streaming compression functions</h3><pre></pre><b><pre>size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); </b>/**< pledgedSrcSize must be correct. If it is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, "0" also disables frame content size field. It may be enabled in the future. */<b>
size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); </b>/**< creates of an internal CDict (incompatible with static CCtx), except if dict == NULL or dictSize < 8, in which case no dict is used. Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.*/<b>
@ -604,14 +607,14 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di
size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); </b>/**< note : ddict is referenced, it must outlive decompression session */<b>
size_t ZSTD_resetDStream(ZSTD_DStream* zds); </b>/**< re-use decompression parameters from previous init; saves dictionary loading */<b>
</pre></b><BR>
<a name="Chapter18"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
<a name="Chapter19"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
But it's also a complex one, with several restrictions, documented below.
Prefer normal streaming API for an easier experience.
<BR></pre>
<a name="Chapter19"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
<a name="Chapter20"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
A ZSTD_CCtx object is required to track streaming operations.
Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
ZSTD_CCtx object can be re-used multiple times within successive compression operations.
@ -647,7 +650,7 @@ size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); </b>/* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */<b>
size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); </b>/**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */<b>
</pre></b><BR>
<a name="Chapter20"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
<a name="Chapter21"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
A ZSTD_DCtx object is required to track streaming operations.
Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
A ZSTD_DCtx object can be re-used multiple times.
@ -738,7 +741,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
</pre></b><BR>
<pre><b>typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
</b></pre><BR>
<a name="Chapter21"></a><h2>New advanced API (experimental)</h2><pre></pre>
<a name="Chapter22"></a><h2>New advanced API (experimental)</h2><pre></pre>
<pre><b>typedef enum {
</b>/* Opened question : should we have a format ZSTD_f_auto ?<b>
@ -764,7 +767,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
</b>/* compression parameters */<b>
ZSTD_p_compressionLevel=100, </b>/* Update all compression parameters according to pre-defined cLevel table<b>
* Default level is ZSTD_CLEVEL_DEFAULT==3.
* Special: value 0 means "do not change cLevel".
* Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
* Note 1 : it's possible to pass a negative compression level by casting it to unsigned type.
* Note 2 : setting a level sets all default values of other compression parameters.
* Note 3 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */
@ -1146,7 +1149,7 @@ size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx,
</p></pre><BR>
<a name="Chapter22"></a><h2>ZSTD_getFrameHeader_advanced() :</h2><pre> same as ZSTD_getFrameHeader(),
<a name="Chapter23"></a><h2>ZSTD_getFrameHeader_advanced() :</h2><pre> same as ZSTD_getFrameHeader(),
with added capability to select a format (like ZSTD_f_zstd1_magicless)
<BR></pre>
@ -1182,7 +1185,7 @@ size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx,
</p></pre><BR>
<a name="Chapter23"></a><h2>Block level API</h2><pre></pre>
<a name="Chapter24"></a><h2>Block level API</h2><pre></pre>
<pre><b></b><p> Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
User will have to take in charge required information to regenerate data, such as compressed and content sizes.

View File

@ -72,6 +72,7 @@ extern "C" {
#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
/*-****************************************
* FSE simple functions
******************************************/
@ -129,7 +130,7 @@ FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src,
******************************************/
/*!
FSE_compress() does the following:
1. count symbol occurrence from source[] into table count[]
1. count symbol occurrence from source[] into table count[] (see hist.h)
2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
3. save normalized counters to memory buffer using writeNCount()
4. build encoding table 'CTable' from normalized counters
@ -147,15 +148,6 @@ or to save and provide normalized distribution using external method.
/* *** COMPRESSION *** */
/*! FSE_count():
Provides the precise count of each byte within a table 'count'.
'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
*maxSymbolValuePtr will be updated if detected smaller than initial value.
@return : the count of the most frequent symbol (which is not identified).
if return == srcSize, there is only one symbol.
Can also return an error code, which can be tested with FSE_isError(). */
FSE_PUBLIC_API size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
/*! FSE_optimalTableLog():
dynamically downsize 'tableLog' when conditions are met.
It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
@ -167,7 +159,8 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
@return : tableLog,
or an errorCode, which can be tested using FSE_isError() */
FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
/*! FSE_NCountWriteBound():
Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
@ -178,8 +171,9 @@ FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tab
Compactly save 'normalizedCounter' into 'buffer'.
@return : size of the compressed table,
or an errorCode, which can be tested using FSE_isError(). */
FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
const short* normalizedCounter,
unsigned maxSymbolValue, unsigned tableLog);
/*! Constructor and Destructor of FSE_CTable.
Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
@ -250,7 +244,9 @@ If there is an error, the function will return an ErrorCode (which can be tested
@return : size read from 'rBuffer',
or an errorCode, which can be tested using FSE_isError().
maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
const void* rBuffer, size_t rBuffSize);
/*! Constructor and Destructor of FSE_DTable.
Note that its size depends on 'tableLog' */
@ -325,33 +321,8 @@ If there is an error, the function will return an error code, which can be teste
/* *****************************************
* FSE advanced API
*******************************************/
/* FSE_count_wksp() :
* Same as FSE_count(), but using an externally provided scratch buffer.
* `workSpace` size must be table of >= `1024` unsigned
*/
size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize, unsigned* workSpace);
/** FSE_countFast() :
* same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr
*/
size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
/* FSE_countFast_wksp() :
* Same as FSE_countFast(), but using an externally provided scratch buffer.
* `workSpace` must be a table of minimum `1024` unsigned
*/
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace);
/*! FSE_count_simple() :
* Same as FSE_countFast(), but does not use any additional memory (not even on stack).
* This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
*/
size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
* FSE advanced API
***************************************** */
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
/**< same as FSE_optimalTableLog(), which used `minus==2` */

View File

@ -1,6 +1,6 @@
/* ******************************************************************
FSE : Finite State Entropy encoder
Copyright (C) 2013-2015, Yann Collet.
Copyright (C) 2013-present, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
@ -37,9 +37,11 @@
****************************************************************/
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memcpy, memset */
#include <stdio.h> /* printf (debug) */
#include "bitstream.h"
#include "compiler.h"
#include "mem.h" /* U32, U16, etc. */
#include "debug.h" /* assert, DEBUGLOG */
#include "hist.h" /* HIST_count_wksp */
#include "bitstream.h"
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
#include "error_private.h"
@ -49,7 +51,6 @@
* Error Management
****************************************************************/
#define FSE_isError ERR_isError
#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
/* **************************************************************
@ -190,8 +191,9 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
#ifndef FSE_COMMONDEFS_ONLY
/*-**************************************************************
* FSE NCount encoding-decoding
* FSE NCount encoding
****************************************************************/
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
{
@ -299,159 +301,6 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
}
/*-**************************************************************
* Counting histogram
****************************************************************/
/*! FSE_count_simple
This function counts byte values within `src`, and store the histogram into table `count`.
It doesn't use any additional memory.
But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
For this reason, prefer using a table `count` with 256 elements.
@return : count of most numerous element.
*/
size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
{
const BYTE* ip = (const BYTE*)src;
const BYTE* const end = ip + srcSize;
unsigned maxSymbolValue = *maxSymbolValuePtr;
unsigned max=0;
memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
while (ip<end) {
assert(*ip <= maxSymbolValue);
count[*ip++]++;
}
while (!count[maxSymbolValue]) maxSymbolValue--;
*maxSymbolValuePtr = maxSymbolValue;
{ U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
return (size_t)max;
}
/* FSE_count_parallel_wksp() :
* Same as FSE_count_parallel(), but using an externally provided scratch buffer.
* `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`.
* @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
static size_t FSE_count_parallel_wksp(
unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
unsigned checkMax, unsigned* const workSpace)
{
const BYTE* ip = (const BYTE*)source;
const BYTE* const iend = ip+sourceSize;
unsigned maxSymbolValue = *maxSymbolValuePtr;
unsigned max=0;
U32* const Counting1 = workSpace;
U32* const Counting2 = Counting1 + 256;
U32* const Counting3 = Counting2 + 256;
U32* const Counting4 = Counting3 + 256;
memset(workSpace, 0, 4*256*sizeof(unsigned));
/* safety checks */
if (!sourceSize) {
memset(count, 0, maxSymbolValue + 1);
*maxSymbolValuePtr = 0;
return 0;
}
if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
/* by stripes of 16 bytes */
{ U32 cached = MEM_read32(ip); ip += 4;
while (ip < iend-15) {
U32 c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
}
ip-=4;
}
/* finish last symbols */
while (ip<iend) Counting1[*ip++]++;
if (checkMax) { /* verify stats will fit into destination table */
U32 s; for (s=255; s>maxSymbolValue; s--) {
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
} }
{ U32 s;
if (maxSymbolValue > 255) maxSymbolValue = 255;
for (s=0; s<=maxSymbolValue; s++) {
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
if (count[s] > max) max = count[s];
} }
while (!count[maxSymbolValue]) maxSymbolValue--;
*maxSymbolValuePtr = maxSymbolValue;
return (size_t)max;
}
/* FSE_countFast_wksp() :
* Same as FSE_countFast(), but using an externally provided scratch buffer.
* `workSpace` size must be table of >= `1024` unsigned */
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
unsigned* workSpace)
{
if (sourceSize < 1500) /* heuristic threshold */
return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
}
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize)
{
unsigned tmpCounters[1024];
return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
}
/* FSE_count_wksp() :
* Same as FSE_count(), but using an externally provided scratch buffer.
* `workSpace` size must be table of >= `1024` unsigned */
size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize, unsigned* workSpace)
{
if (*maxSymbolValuePtr < 255)
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
*maxSymbolValuePtr = 255;
return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
}
size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
{
unsigned tmpCounters[1024];
return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
}
/*-**************************************************************
* FSE Compression Code
****************************************************************/
@ -645,11 +494,11 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
U32 s;
U32 nTotal = 0;
for (s=0; s<=maxSymbolValue; s++)
printf("%3i: %4i \n", s, normalizedCounter[s]);
RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
for (s=0; s<=maxSymbolValue; s++)
nTotal += abs(normalizedCounter[s]);
if (nTotal != (1U<<tableLog))
printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
getchar();
}
#endif
@ -816,7 +665,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
/* Scan input and build symbol stats */
{ CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
{ CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
@ -851,7 +700,7 @@ typedef struct {
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
{
fseWkspMax_t scratchBuffer;
FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
}

200
lib/compress/hist.c Normal file
View File

@ -0,0 +1,200 @@
/* ******************************************************************
hist : Histogram functions
part of Finite State Entropy project
Copyright (C) 2013-present, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- Public forum : https://groups.google.com/forum/#!forum/lz4c
****************************************************************** */
/* --- dependencies --- */
#include "mem.h" /* U32, BYTE, etc. */
#include "debug.h" /* assert, DEBUGLOG */
#include "error_private.h" /* ERROR */
#include "hist.h"
/* --- Error management --- */
unsigned HIST_isError(size_t code) { return ERR_isError(code); }
/*-**************************************************************
* Histogram functions
****************************************************************/
/*! HIST_count_simple :
Counts byte values within `src`, storing histogram into table `count`.
Doesn't use any additional memory, very limited stack usage.
But unsafe : doesn't check that all values within `src` fit into `count`.
For this reason, prefer using a table `count` of size 256.
@return : count of most numerous element.
this function doesn't produce any error (i.e. it must succeed).
*/
unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
{
const BYTE* ip = (const BYTE*)src;
const BYTE* const end = ip + srcSize;
unsigned maxSymbolValue = *maxSymbolValuePtr;
unsigned largestCount=0;
memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
while (ip<end) {
assert(*ip <= maxSymbolValue);
count[*ip++]++;
}
while (!count[maxSymbolValue]) maxSymbolValue--;
*maxSymbolValuePtr = maxSymbolValue;
{ U32 s;
for (s=0; s<=maxSymbolValue; s++)
if (count[s] > largestCount) largestCount = count[s];
}
return largestCount;
}
/* HIST_count_parallel_wksp() :
* Same as HIST_count_parallel(), but using an externally provided scratch buffer.
* `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
* @return : largest histogram frequency,
* or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
static size_t HIST_count_parallel_wksp(
unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
unsigned checkMax,
unsigned* const workSpace)
{
const BYTE* ip = (const BYTE*)source;
const BYTE* const iend = ip+sourceSize;
unsigned maxSymbolValue = *maxSymbolValuePtr;
unsigned max=0;
U32* const Counting1 = workSpace;
U32* const Counting2 = Counting1 + 256;
U32* const Counting3 = Counting2 + 256;
U32* const Counting4 = Counting3 + 256;
memset(workSpace, 0, 4*256*sizeof(unsigned));
/* safety checks */
if (!sourceSize) {
memset(count, 0, maxSymbolValue + 1);
*maxSymbolValuePtr = 0;
return 0;
}
if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
/* by stripes of 16 bytes */
{ U32 cached = MEM_read32(ip); ip += 4;
while (ip < iend-15) {
U32 c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
c = cached; cached = MEM_read32(ip); ip += 4;
Counting1[(BYTE) c ]++;
Counting2[(BYTE)(c>>8) ]++;
Counting3[(BYTE)(c>>16)]++;
Counting4[ c>>24 ]++;
}
ip-=4;
}
/* finish last symbols */
while (ip<iend) Counting1[*ip++]++;
if (checkMax) { /* verify stats will fit into destination table */
U32 s; for (s=255; s>maxSymbolValue; s--) {
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
} }
{ U32 s;
if (maxSymbolValue > 255) maxSymbolValue = 255;
for (s=0; s<=maxSymbolValue; s++) {
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
if (count[s] > max) max = count[s];
} }
while (!count[maxSymbolValue]) maxSymbolValue--;
*maxSymbolValuePtr = maxSymbolValue;
return (size_t)max;
}
/* HIST_countFast_wksp() :
* Same as HIST_countFast(), but using an externally provided scratch buffer.
* `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
unsigned* workSpace)
{
if (sourceSize < 1500) /* heuristic threshold */
return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
}
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize)
{
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
}
/* HIST_count_wksp() :
* Same as HIST_count(), but using an externally provided scratch buffer.
* `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize, unsigned* workSpace)
{
if (*maxSymbolValuePtr < 255)
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
*maxSymbolValuePtr = 255;
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
}
size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
{
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
}

90
lib/compress/hist.h Normal file
View File

@ -0,0 +1,90 @@
/* ******************************************************************
hist : Histogram functions
part of Finite State Entropy project
Copyright (C) 2013-present, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
- Public forum : https://groups.google.com/forum/#!forum/lz4c
****************************************************************** */
/* --- dependencies --- */
#include <stddef.h> /* size_t */
/* --- simple histogram functions --- */
/*! HIST_count():
* Provides the precise count of each byte within a table 'count'.
* 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
* Updates *maxSymbolValuePtr with actual largest symbol value detected.
* @return : count of the most frequent symbol (which isn't identified).
* or an error code, which can be tested using HIST_isError().
* note : if return == srcSize, there is only one symbol.
*/
size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize);
unsigned HIST_isError(size_t code); /*< tells if a return value is an error code */
/* --- advanced histogram functions --- */
#define HIST_WKSP_SIZE_U32 1024
/** HIST_count_wksp() :
* Same as HIST_count(), but using an externally provided scratch buffer.
* Benefit is this function will use very little stack space.
* `workSpace` must be a table of unsigned of size >= HIST_WKSP_SIZE_U32
*/
size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize,
unsigned* workSpace);
/** HIST_countFast() :
* same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
* This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`
*/
size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize);
/** HIST_countFast_wksp() :
* Same as HIST_countFast(), but using an externally provided scratch buffer.
* `workSpace` must be a table of unsigned of size >= HIST_WKSP_SIZE_U32
*/
size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize,
unsigned* workSpace);
/*! HIST_count_simple() :
* Same as HIST_countFast(), but does not use any additional memory (not even on stack).
* This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`.
* It is also a bit slower for large inputs.
* This function doesn't produce any error (i.e. it must succeed).
*/
unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize);

View File

@ -45,8 +45,9 @@
****************************************************************/
#include <string.h> /* memcpy, memset */
#include <stdio.h> /* printf (debug) */
#include "bitstream.h"
#include "compiler.h"
#include "bitstream.h"
#include "hist.h"
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
#include "fse.h" /* header compression */
#define HUF_STATIC_LINKING_ONLY
@ -100,9 +101,9 @@ size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable,
if (wtSize <= 1) return 0; /* Not compressible */
/* Scan input and build symbol stats */
{ CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize) );
{ unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
}
tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
@ -667,7 +668,7 @@ static size_t HUF_compress_internal (
}
/* Scan input and build symbol stats */
{ CHECK_V_F(largest, FSE_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
if (largest <= (srcSize >> 7)+1) return 0; /* heuristic : probably not compressible enough */
}

View File

@ -14,6 +14,7 @@
#include <string.h> /* memset */
#include "cpu.h"
#include "mem.h"
#include "hist.h" /* HIST_countFast_wksp */
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
#include "fse.h"
#define HUF_STATIC_LINKING_ONLY
@ -2109,7 +2110,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
ZSTD_seqToCodes(seqStorePtr);
/* build CTable for Literal Lengths */
{ U32 max = MaxLL;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace);
size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace); /* can't fail */
DEBUGLOG(5, "Building LL table");
nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, count, max, mostFrequent, nbSeq, LLFSELog, prevEntropy->fse.litlengthCTable, LL_defaultNorm, LL_defaultNormLog, ZSTD_defaultAllowed, strategy);
@ -2126,7 +2127,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
} }
/* build CTable for Offsets */
{ U32 max = MaxOff;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace);
size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace); /* can't fail */
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
DEBUGLOG(5, "Building OF table");
@ -2144,7 +2145,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
} }
/* build CTable for MatchLengths */
{ U32 max = MaxML;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace);
size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace); /* can't fail */
DEBUGLOG(5, "Building ML table");
nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, count, max, mostFrequent, nbSeq, MLFSELog, prevEntropy->fse.matchlengthCTable, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy);

View File

@ -9,6 +9,7 @@
*/
#include "zstd_compress_internal.h"
#include "hist.h"
#include "zstd_opt.h"
@ -142,7 +143,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
assert(optPtr->litFreq != NULL);
{ unsigned lit = MaxLit;
FSE_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
}
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);

View File

@ -437,7 +437,8 @@ static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t ds
U32 count[HUF_SYMBOLVALUE_MAX+1];
/* Scan input and build symbol stats */
{ size_t const largest = FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP);
{ size_t const largest = HIST_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP);
assert(!HIST_isError(largest));
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; } /* single symbol, rle */
if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
}
@ -834,7 +835,8 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
/* CTable for Literal Lengths */
{ U32 max = MaxLL;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP);
size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP); /* cannot fail */
assert(!HIST_isError(mostFrequent));
if (mostFrequent == nbSeq) {
/* do RLE if we have the chance */
*op++ = llCodeTable[0];
@ -865,7 +867,8 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
/* CTable for Offsets */
/* see Literal Lengths for descriptions of mode choices */
{ U32 max = MaxOff;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP);
size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP); /* cannot fail */
assert(!HIST_isError(mostFrequent));
if (mostFrequent == nbSeq) {
*op++ = ofCodeTable[0];
FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
@ -892,7 +895,8 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
/* CTable for MatchLengths */
/* see Literal Lengths for descriptions of mode choices */
{ U32 max = MaxML;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP);
size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP); /* cannot fail */
assert(!HIST_isError(mostFrequent));
if (mostFrequent == nbSeq) {
*op++ = *mlCodeTable;
FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);