Merge pull request #2733 from terrelln/huf-cspeed
[HUF] Improve Huffman encoding speed
This commit is contained in:
commit
6ee70bae46
@ -299,7 +299,7 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
|
||||
weightTotal = 0;
|
||||
{ U32 n; for (n=0; n<oSize; n++) {
|
||||
if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
|
||||
if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
|
||||
rankStats[huffWeight[n]]++;
|
||||
weightTotal += (1 << huffWeight[n]) >> 1;
|
||||
} }
|
||||
|
@ -89,9 +89,9 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
|
||||
|
||||
/** HUF_compress4X_wksp() :
|
||||
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
|
||||
* `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
|
||||
#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
|
||||
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
|
||||
* `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
|
||||
#define HUF_WORKSPACE_SIZE ((8 << 10) + 256)
|
||||
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
|
||||
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned tableLog,
|
||||
@ -136,15 +136,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
|
||||
|
||||
/* static allocation of HUF's Compression Table */
|
||||
/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
|
||||
struct HUF_CElt_s {
|
||||
U16 val;
|
||||
BYTE nbBits;
|
||||
}; /* typedef'd to HUF_CElt */
|
||||
typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */
|
||||
#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
|
||||
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
|
||||
typedef size_t HUF_CElt; /* consider it an incomplete type */
|
||||
#define HUF_CTABLE_SIZE_ST(maxSymbolValue) ((maxSymbolValue)+2) /* Use tables of size_t, for proper alignment */
|
||||
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))
|
||||
#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
|
||||
HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
|
||||
HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */
|
||||
|
||||
/* static allocation of HUF's DTable */
|
||||
typedef U32 HUF_DTable;
|
||||
@ -194,6 +190,7 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym
|
||||
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
|
||||
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
|
||||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
|
||||
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
|
||||
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
|
||||
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
|
||||
|
||||
@ -250,11 +247,10 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
|
||||
* Loading a CTable saved with HUF_writeCTable() */
|
||||
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
|
||||
|
||||
/** HUF_getNbBits() :
|
||||
/** HUF_getNbBitsFromCTable() :
|
||||
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
|
||||
* Note 1 : is not inlined, as HUF_CElt definition is private
|
||||
* Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
|
||||
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
|
||||
* Note 1 : is not inlined, as HUF_CElt definition is private */
|
||||
U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
|
||||
|
||||
/*
|
||||
* HUF_decompress() does the following:
|
||||
@ -306,8 +302,9 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c
|
||||
/* ====================== */
|
||||
|
||||
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
|
||||
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
|
||||
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
|
||||
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
|
||||
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
|
||||
/** HUF_compress1X_repeat() :
|
||||
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
|
||||
* If it uses hufTable it does not modify hufTable or repeat.
|
||||
|
@ -53,6 +53,28 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
|
||||
/* *******************************************************
|
||||
* HUF : Huffman block compression
|
||||
*********************************************************/
|
||||
#define HUF_WORKSPACE_MAX_ALIGNMENT 8
|
||||
|
||||
static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
|
||||
{
|
||||
size_t const mask = align - 1;
|
||||
size_t const rem = (size_t)workspace & mask;
|
||||
size_t const add = (align - rem) & mask;
|
||||
BYTE* const aligned = (BYTE*)workspace + add;
|
||||
assert((align & (align - 1)) == 0); /* pow 2 */
|
||||
assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
|
||||
if (*workspaceSizePtr >= add) {
|
||||
assert(add < align);
|
||||
assert(((size_t)aligned & mask) == 0);
|
||||
*workspaceSizePtr -= add;
|
||||
return aligned;
|
||||
} else {
|
||||
*workspaceSizePtr = 0;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* HUF_compressWeights() :
|
||||
* Same as FSE_compress(), but dedicated to huff0's weights compression.
|
||||
* The use case needs much less stack memory.
|
||||
@ -75,7 +97,7 @@ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightT
|
||||
|
||||
unsigned maxSymbolValue = HUF_TABLELOG_MAX;
|
||||
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
|
||||
HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
|
||||
HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, sizeof(U32));
|
||||
|
||||
if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
|
||||
|
||||
@ -106,6 +128,40 @@ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightT
|
||||
return (size_t)(op-ostart);
|
||||
}
|
||||
|
||||
static size_t HUF_getNbBits(HUF_CElt elt)
|
||||
{
|
||||
return elt & 0xFF;
|
||||
}
|
||||
|
||||
static size_t HUF_getNbBitsFast(HUF_CElt elt)
|
||||
{
|
||||
return elt;
|
||||
}
|
||||
|
||||
static size_t HUF_getValue(HUF_CElt elt)
|
||||
{
|
||||
return elt & ~0xFF;
|
||||
}
|
||||
|
||||
static size_t HUF_getValueFast(HUF_CElt elt)
|
||||
{
|
||||
return elt;
|
||||
}
|
||||
|
||||
static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
|
||||
{
|
||||
assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
|
||||
*elt = nbBits;
|
||||
}
|
||||
|
||||
static void HUF_setValue(HUF_CElt* elt, size_t value)
|
||||
{
|
||||
size_t const nbBits = HUF_getNbBits(*elt);
|
||||
if (nbBits > 0) {
|
||||
assert((value >> nbBits) == 0);
|
||||
*elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
HUF_CompressWeightsWksp wksp;
|
||||
@ -117,9 +173,10 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
|
||||
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
|
||||
void* workspace, size_t workspaceSize)
|
||||
{
|
||||
HUF_CElt const* const ct = CTable + 1;
|
||||
BYTE* op = (BYTE*)dst;
|
||||
U32 n;
|
||||
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
|
||||
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, sizeof(U32));
|
||||
|
||||
/* check conditions */
|
||||
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
|
||||
@ -130,7 +187,7 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
|
||||
for (n=1; n<huffLog+1; n++)
|
||||
wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
|
||||
for (n=0; n<maxSymbolValue; n++)
|
||||
wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];
|
||||
wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
|
||||
|
||||
/* attempt weights compression by FSE */
|
||||
if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
|
||||
@ -167,6 +224,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
||||
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
|
||||
U32 tableLog = 0;
|
||||
U32 nbSymbols = 0;
|
||||
HUF_CElt* const ct = CTable + 1;
|
||||
|
||||
/* get symbol weights */
|
||||
CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
|
||||
@ -176,6 +234,8 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
||||
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
||||
if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
|
||||
|
||||
CTable[0] = tableLog;
|
||||
|
||||
/* Prepare base value per rank */
|
||||
{ U32 n, nextRankStart = 0;
|
||||
for (n=1; n<=tableLog; n++) {
|
||||
@ -187,13 +247,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
||||
/* fill nbBits */
|
||||
{ U32 n; for (n=0; n<nbSymbols; n++) {
|
||||
const U32 w = huffWeight[n];
|
||||
CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
|
||||
HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
|
||||
} }
|
||||
|
||||
/* fill val */
|
||||
{ U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
|
||||
U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
|
||||
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
|
||||
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
|
||||
/* determine stating value per rank */
|
||||
valPerRank[tableLog+1] = 0; /* for w==0 */
|
||||
{ U16 min = 0;
|
||||
@ -203,18 +263,18 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
||||
min >>= 1;
|
||||
} }
|
||||
/* assign value within rank, symbol order */
|
||||
{ U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
|
||||
{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
|
||||
}
|
||||
|
||||
*maxSymbolValuePtr = nbSymbols - 1;
|
||||
return readSize;
|
||||
}
|
||||
|
||||
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
|
||||
U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
|
||||
{
|
||||
const HUF_CElt* table = (const HUF_CElt*)symbolTable;
|
||||
const HUF_CElt* ct = CTable + 1;
|
||||
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
|
||||
return table[symbolValue].nbBits;
|
||||
return (U32)HUF_getNbBits(ct[symbolValue]);
|
||||
}
|
||||
|
||||
|
||||
@ -491,6 +551,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
|
||||
*/
|
||||
static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
|
||||
{
|
||||
HUF_CElt* const ct = CTable + 1;
|
||||
/* fill result into ctable (val, nbBits) */
|
||||
int n;
|
||||
U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
|
||||
@ -506,20 +567,20 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
|
||||
min >>= 1;
|
||||
} }
|
||||
for (n=0; n<alphabetSize; n++)
|
||||
CTable[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
|
||||
HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
|
||||
for (n=0; n<alphabetSize; n++)
|
||||
CTable[n].val = valPerRank[CTable[n].nbBits]++; /* assign value within rank, symbol order */
|
||||
HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
|
||||
CTable[0] = maxNbBits;
|
||||
}
|
||||
|
||||
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
|
||||
size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
|
||||
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, sizeof(U32));
|
||||
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
|
||||
nodeElt* const huffNode = huffNode0+1;
|
||||
int nonNullRank;
|
||||
|
||||
/* safety checks */
|
||||
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
|
||||
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
|
||||
return ERROR(workSpace_tooSmall);
|
||||
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
|
||||
@ -537,91 +598,327 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
|
||||
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
|
||||
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
|
||||
|
||||
HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
|
||||
HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
|
||||
|
||||
return maxNbBits;
|
||||
}
|
||||
|
||||
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
|
||||
{
|
||||
HUF_CElt const* ct = CTable + 1;
|
||||
size_t nbBits = 0;
|
||||
int s;
|
||||
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
||||
nbBits += CTable[s].nbBits * count[s];
|
||||
nbBits += HUF_getNbBits(ct[s]) * count[s];
|
||||
}
|
||||
return nbBits >> 3;
|
||||
}
|
||||
|
||||
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
|
||||
HUF_CElt const* ct = CTable + 1;
|
||||
int bad = 0;
|
||||
int s;
|
||||
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
||||
bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
|
||||
bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
|
||||
}
|
||||
return !bad;
|
||||
}
|
||||
|
||||
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
|
||||
|
||||
FORCE_INLINE_TEMPLATE void
|
||||
HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
|
||||
/** HUF_CStream_t:
|
||||
* Huffman uses its own BIT_CStream_t implementation.
|
||||
* There are three major differences from BIT_CStream_t:
|
||||
* 1. HUF_addBits() takes a HUF_CElt (size_t) which is
|
||||
* the pair (nbBits, value) in the format:
|
||||
* format:
|
||||
* - Bits [0, 4) = nbBits
|
||||
* - Bits [4, 64 - nbBits) = 0
|
||||
* - Bits [64 - nbBits, 64) = value
|
||||
* 2. The bitContainer is built from the upper bits and
|
||||
* right shifted. E.g. to add a new value of N bits
|
||||
* you right shift the bitContainer by N, then or in
|
||||
* the new value into the N upper bits.
|
||||
* 3. The bitstream has two bit containers. You can add
|
||||
* bits to the second container and merge them into
|
||||
* the first container.
|
||||
*/
|
||||
|
||||
#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
|
||||
|
||||
typedef struct {
|
||||
size_t bitContainer[2];
|
||||
size_t bitPos[2];
|
||||
|
||||
BYTE* startPtr;
|
||||
BYTE* ptr;
|
||||
BYTE* endPtr;
|
||||
} HUF_CStream_t;
|
||||
|
||||
/**! HUF_initCStream():
|
||||
* Initializes the bistream.
|
||||
* @returns 0 or an error code.
|
||||
*/
|
||||
static size_t HUF_initCStream(HUF_CStream_t* bitC,
|
||||
void* startPtr, size_t dstCapacity)
|
||||
{
|
||||
BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
|
||||
ZSTD_memset(bitC, 0, sizeof(*bitC));
|
||||
bitC->startPtr = (BYTE*)startPtr;
|
||||
bitC->ptr = bitC->startPtr;
|
||||
bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
|
||||
if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HUF_FLUSHBITS(s) BIT_flushBits(s)
|
||||
/*! HUF_addBits():
|
||||
* Adds the symbol stored in HUF_CElt elt to the bitstream.
|
||||
*
|
||||
* @param elt The element we're adding. This is a (nbBits, value) pair.
|
||||
* See the HUF_CStream_t docs for the format.
|
||||
* @param idx Insert into the bistream at this idx.
|
||||
* @param kFast This is a template parameter. If the bitstream is guaranteed
|
||||
* to have at least 4 unused bits after this call it may be 1,
|
||||
* otherwise it must be 0. HUF_addBits() is faster when fast is set.
|
||||
*/
|
||||
FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
|
||||
{
|
||||
assert(idx <= 1);
|
||||
assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
|
||||
/* This is efficient on x86-64 with BMI2 because shrx
|
||||
* only reads the low 6 bits of the register. The compiler
|
||||
* knows this and elides the mask. When fast is set,
|
||||
* every operation can use the same value loaded from elt.
|
||||
*/
|
||||
bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
|
||||
bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
|
||||
/* We only read the low 8 bits of bitC->bitPos[idx] so it
|
||||
* doesn't matter that the high bits have noise from the value.
|
||||
*/
|
||||
bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
|
||||
assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
||||
/* The last 4-bits of elt are dirty if fast is set,
|
||||
* so we must not be overwriting bits that have already been
|
||||
* inserted into the bit container.
|
||||
*/
|
||||
#if DEBUGLEVEL >= 1
|
||||
{
|
||||
size_t const nbBits = HUF_getNbBits(elt);
|
||||
size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
|
||||
/* Middle bits are 0. */
|
||||
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
|
||||
/* We didn't overwrite any bits in the bit container. */
|
||||
assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define HUF_FLUSHBITS_1(stream) \
|
||||
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
|
||||
FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
|
||||
{
|
||||
bitC->bitContainer[1] = 0;
|
||||
bitC->bitPos[1] = 0;
|
||||
}
|
||||
|
||||
/*! HUF_mergeIndex1() :
|
||||
* Merges the bit container @ index 1 into the bit container @ index 0
|
||||
* and zeros the bit container @ index 1.
|
||||
*/
|
||||
FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
|
||||
{
|
||||
assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
|
||||
bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
|
||||
bitC->bitContainer[0] |= bitC->bitContainer[1];
|
||||
bitC->bitPos[0] += bitC->bitPos[1];
|
||||
assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
||||
}
|
||||
|
||||
/*! HUF_flushBits() :
|
||||
* Flushes the bits in the bit container @ index 0.
|
||||
*
|
||||
* @post bitPos will be < 8.
|
||||
* @param kFast If kFast is set then we must know a-priori that
|
||||
* the bit container will not overflow.
|
||||
*/
|
||||
FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
|
||||
{
|
||||
/* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
|
||||
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
||||
size_t const nbBytes = nbBits >> 3;
|
||||
/* The top nbBits bits of bitContainer are the ones we need. */
|
||||
size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
|
||||
/* Mask bitPos to account for the bytes we consumed. */
|
||||
bitC->bitPos[0] &= 7;
|
||||
assert(nbBits > 0);
|
||||
assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
|
||||
assert(bitC->ptr <= bitC->endPtr);
|
||||
MEM_writeLEST(bitC->ptr, bitContainer);
|
||||
bitC->ptr += nbBytes;
|
||||
assert(!kFast || bitC->ptr <= bitC->endPtr);
|
||||
if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
|
||||
/* bitContainer doesn't need to be modified because the leftover
|
||||
* bits are already the top bitPos bits. And we don't care about
|
||||
* noise in the lower values.
|
||||
*/
|
||||
}
|
||||
|
||||
/*! HUF_endMark()
|
||||
* @returns The Huffman stream end mark: A 1-bit value = 1.
|
||||
*/
|
||||
static HUF_CElt HUF_endMark(void)
|
||||
{
|
||||
HUF_CElt endMark;
|
||||
HUF_setNbBits(&endMark, 1);
|
||||
HUF_setValue(&endMark, 1);
|
||||
return endMark;
|
||||
}
|
||||
|
||||
/*! HUF_closeCStream() :
|
||||
* @return Size of CStream, in bytes,
|
||||
* or 0 if it could not fit into dstBuffer */
|
||||
static size_t HUF_closeCStream(HUF_CStream_t* bitC)
|
||||
{
|
||||
HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
|
||||
HUF_flushBits(bitC, /* kFast */ 0);
|
||||
{
|
||||
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
||||
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
|
||||
return (bitC->ptr - bitC->startPtr) + (nbBits > 0);
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE void
|
||||
HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
|
||||
{
|
||||
HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE void
|
||||
HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
|
||||
const BYTE* ip, size_t srcSize,
|
||||
const HUF_CElt* ct,
|
||||
int kUnroll, int kFastFlush, int kLastFast)
|
||||
{
|
||||
/* Join to kUnroll */
|
||||
int n = (int)srcSize;
|
||||
int rem = n % kUnroll;
|
||||
if (rem > 0) {
|
||||
for (; rem > 0; --rem) {
|
||||
HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
|
||||
}
|
||||
HUF_flushBits(bitC, kFastFlush);
|
||||
}
|
||||
assert(n % kUnroll == 0);
|
||||
|
||||
/* Join to 2 * kUnroll */
|
||||
if (n % (2 * kUnroll)) {
|
||||
int u;
|
||||
for (u = 1; u < kUnroll; ++u) {
|
||||
HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
|
||||
}
|
||||
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
|
||||
HUF_flushBits(bitC, kFastFlush);
|
||||
n -= kUnroll;
|
||||
}
|
||||
assert(n % (2 * kUnroll) == 0);
|
||||
|
||||
for (; n>0; n-= 2 * kUnroll) {
|
||||
/* Encode kUnroll symbols into the bitstream @ index 0. */
|
||||
int u;
|
||||
for (u = 1; u < kUnroll; ++u) {
|
||||
HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
|
||||
}
|
||||
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
|
||||
HUF_flushBits(bitC, kFastFlush);
|
||||
/* Encode kUnroll symbols into the bitstream @ index 1.
|
||||
* This allows us to start filling the bit container
|
||||
* without any data dependencies.
|
||||
*/
|
||||
HUF_zeroIndex1(bitC);
|
||||
for (u = 1; u < kUnroll; ++u) {
|
||||
HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
|
||||
}
|
||||
HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
|
||||
/* Merge bitstream @ index 1 into the bitstream @ index 0 */
|
||||
HUF_mergeIndex1(bitC);
|
||||
HUF_flushBits(bitC, kFastFlush);
|
||||
}
|
||||
assert(n == 0);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a tight upper bound on the output space needed by Huffman
|
||||
* with 8 bytes buffer to handle over-writes. If the output is at least
|
||||
* this large we don't need to do bounds checks during Huffman encoding.
|
||||
*/
|
||||
static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
|
||||
{
|
||||
return ((srcSize * tableLog) >> 3) + 8;
|
||||
}
|
||||
|
||||
#define HUF_FLUSHBITS_2(stream) \
|
||||
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
const HUF_CElt* CTable)
|
||||
{
|
||||
U32 const tableLog = (U32)CTable[0];
|
||||
HUF_CElt const* ct = CTable + 1;
|
||||
const BYTE* ip = (const BYTE*) src;
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
BYTE* op = ostart;
|
||||
size_t n;
|
||||
BIT_CStream_t bitC;
|
||||
HUF_CStream_t bitC;
|
||||
|
||||
/* init */
|
||||
if (dstSize < 8) return 0; /* not enough space to compress */
|
||||
{ size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
|
||||
{ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
|
||||
if (HUF_isError(initErr)) return 0; }
|
||||
|
||||
n = srcSize & ~3; /* join to mod 4 */
|
||||
switch (srcSize & 3)
|
||||
{
|
||||
case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
|
||||
HUF_FLUSHBITS_2(&bitC);
|
||||
/* fall-through */
|
||||
case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
|
||||
HUF_FLUSHBITS_1(&bitC);
|
||||
/* fall-through */
|
||||
case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
|
||||
HUF_FLUSHBITS(&bitC);
|
||||
/* fall-through */
|
||||
case 0 : /* fall-through */
|
||||
default: break;
|
||||
if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
|
||||
else {
|
||||
if (MEM_32bits()) {
|
||||
switch (tableLog) {
|
||||
case 11:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
|
||||
break;
|
||||
case 10:
|
||||
case 9:
|
||||
case 8:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
|
||||
break;
|
||||
case 7:
|
||||
default:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (tableLog) {
|
||||
case 11:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
|
||||
break;
|
||||
case 10:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
|
||||
break;
|
||||
case 9:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
|
||||
break;
|
||||
case 8:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
|
||||
break;
|
||||
case 7:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
|
||||
break;
|
||||
case 6:
|
||||
default:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(bitC.ptr <= bitC.endPtr);
|
||||
|
||||
for (; n>0; n-=4) { /* note : n&3==0 at this stage */
|
||||
HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
|
||||
HUF_FLUSHBITS_1(&bitC);
|
||||
HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
|
||||
HUF_FLUSHBITS_2(&bitC);
|
||||
HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
|
||||
HUF_FLUSHBITS_1(&bitC);
|
||||
HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
|
||||
HUF_FLUSHBITS(&bitC);
|
||||
}
|
||||
|
||||
return BIT_closeCStream(&bitC);
|
||||
return HUF_closeCStream(&bitC);
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
@ -668,9 +965,13 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
|
||||
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
||||
{
|
||||
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
||||
return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
||||
}
|
||||
|
||||
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
|
||||
{
|
||||
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
|
||||
}
|
||||
|
||||
static size_t
|
||||
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
@ -690,8 +991,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
|
||||
assert(op <= oend);
|
||||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
|
||||
if (cSize==0) return 0;
|
||||
assert(cSize <= 65535);
|
||||
if (cSize == 0 || cSize > 65535) return 0;
|
||||
MEM_writeLE16(ostart, (U16)cSize);
|
||||
op += cSize;
|
||||
}
|
||||
@ -699,8 +999,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
ip += segmentSize;
|
||||
assert(op <= oend);
|
||||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
|
||||
if (cSize==0) return 0;
|
||||
assert(cSize <= 65535);
|
||||
if (cSize == 0 || cSize > 65535) return 0;
|
||||
MEM_writeLE16(ostart+2, (U16)cSize);
|
||||
op += cSize;
|
||||
}
|
||||
@ -708,8 +1007,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
ip += segmentSize;
|
||||
assert(op <= oend);
|
||||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
|
||||
if (cSize==0) return 0;
|
||||
assert(cSize <= 65535);
|
||||
if (cSize == 0 || cSize > 65535) return 0;
|
||||
MEM_writeLE16(ostart+4, (U16)cSize);
|
||||
op += cSize;
|
||||
}
|
||||
@ -718,7 +1016,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
assert(op <= oend);
|
||||
assert(ip <= iend);
|
||||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
|
||||
if (cSize==0) return 0;
|
||||
if (cSize == 0 || cSize > 65535) return 0;
|
||||
op += cSize;
|
||||
}
|
||||
|
||||
@ -727,7 +1025,12 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
|
||||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
||||
{
|
||||
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
||||
return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
||||
}
|
||||
|
||||
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
|
||||
{
|
||||
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
|
||||
}
|
||||
|
||||
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
|
||||
@ -751,10 +1054,11 @@ static size_t HUF_compressCTable_internal(
|
||||
|
||||
typedef struct {
|
||||
unsigned count[HUF_SYMBOLVALUE_MAX + 1];
|
||||
HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
|
||||
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
|
||||
union {
|
||||
HUF_buildCTable_wksp_tables buildCTable_wksp;
|
||||
HUF_WriteCTableWksp writeCTable_wksp;
|
||||
U32 hist_wksp[HIST_WKSP_SIZE_U32];
|
||||
} wksps;
|
||||
} HUF_compress_tables_t;
|
||||
|
||||
@ -763,26 +1067,25 @@ typedef struct {
|
||||
|
||||
/* HUF_compress_internal() :
|
||||
* `workSpace_align4` must be aligned on 4-bytes boundaries,
|
||||
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */
|
||||
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
|
||||
static size_t
|
||||
HUF_compress_internal (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog,
|
||||
HUF_nbStreams_e nbStreams,
|
||||
void* workSpace_align4, size_t wkspSize,
|
||||
void* workSpace, size_t wkspSize,
|
||||
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
|
||||
const int bmi2, unsigned suspectUncompressible)
|
||||
{
|
||||
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4;
|
||||
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, sizeof(size_t));
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
BYTE* op = ostart;
|
||||
|
||||
HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
|
||||
assert(((size_t)workSpace_align4 & 3) == 0); /* must be aligned on 4-bytes boundaries */
|
||||
HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
|
||||
|
||||
/* checks & inits */
|
||||
if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
|
||||
if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
|
||||
if (!srcSize) return 0; /* Uncompressed */
|
||||
if (!dstSize) return 0; /* cannot fit anything within dst budget */
|
||||
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
|
||||
@ -814,7 +1117,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
||||
}
|
||||
|
||||
/* Scan input and build symbol stats */
|
||||
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) );
|
||||
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
|
||||
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
|
||||
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
||||
}
|
||||
@ -839,9 +1142,12 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
||||
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
|
||||
CHECK_F(maxBits);
|
||||
huffLog = (U32)maxBits;
|
||||
/* Zero unused symbols in CTable, so we can check it for validity */
|
||||
ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0,
|
||||
sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
|
||||
}
|
||||
/* Zero unused symbols in CTable, so we can check it for validity */
|
||||
{
|
||||
size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
|
||||
size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
|
||||
ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
|
||||
}
|
||||
|
||||
/* Write table description header */
|
||||
@ -939,7 +1245,7 @@ size_t HUF_compress1X (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog)
|
||||
{
|
||||
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
|
||||
U64 workSpace[HUF_WORKSPACE_SIZE_U64];
|
||||
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
@ -947,7 +1253,7 @@ size_t HUF_compress2 (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog)
|
||||
{
|
||||
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
|
||||
U64 workSpace[HUF_WORKSPACE_SIZE_U64];
|
||||
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
||||
}
|
||||
|
||||
|
@ -63,7 +63,7 @@ typedef struct {
|
||||
} ZSTD_localDict;
|
||||
|
||||
typedef struct {
|
||||
HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
|
||||
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
|
||||
HUF_repeat repeatMode;
|
||||
} ZSTD_hufCTables_t;
|
||||
|
||||
|
@ -126,7 +126,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
optPtr->litSum = 0;
|
||||
for (lit=0; lit<=MaxLit; lit++) {
|
||||
U32 const scaleLog = 11; /* scale to 2K */
|
||||
U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
|
||||
U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
|
||||
assert(bitCost <= scaleLog);
|
||||
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
||||
optPtr->litSum += optPtr->litFreq[lit];
|
||||
|
@ -185,7 +185,7 @@ BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX];
|
||||
BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX];
|
||||
BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX];
|
||||
|
||||
unsigned WKSP[HUF_WORKSPACE_SIZE_U32];
|
||||
U64 WKSP[HUF_WORKSPACE_SIZE_U64];
|
||||
|
||||
typedef struct {
|
||||
size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */
|
||||
@ -199,7 +199,7 @@ typedef struct {
|
||||
int hufInit;
|
||||
/* the distribution used in the previous block for repeat mode */
|
||||
BYTE hufDist[DISTSIZE];
|
||||
HUF_CElt hufTable [256];
|
||||
HUF_CElt hufTable [HUF_CTABLE_SIZE_ST(255)];
|
||||
|
||||
int fseInit;
|
||||
FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
|
||||
|
1
tests/fuzz/.gitignore
vendored
1
tests/fuzz/.gitignore
vendored
@ -17,6 +17,7 @@ decompress_dstSize_tooSmall
|
||||
fse_read_ncount
|
||||
sequence_compression_api
|
||||
seekable_roundtrip
|
||||
huf_round_trip
|
||||
fuzz-*.log
|
||||
rt_lib_*
|
||||
d_lib_*
|
||||
|
@ -103,7 +103,8 @@ FUZZ_TARGETS := \
|
||||
decompress_dstSize_tooSmall \
|
||||
fse_read_ncount \
|
||||
sequence_compression_api \
|
||||
seekable_roundtrip
|
||||
seekable_roundtrip \
|
||||
huf_round_trip
|
||||
|
||||
all: libregression.a $(FUZZ_TARGETS)
|
||||
|
||||
@ -200,6 +201,9 @@ sequence_compression_api: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_sequenc
|
||||
seekable_roundtrip: $(FUZZ_HEADERS) $(SEEKABLE_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) $(SEEKABLE_OBJS) rt_fuzz_seekable_roundtrip.o
|
||||
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) $(SEEKABLE_OBJS) rt_fuzz_seekable_roundtrip.o $(LIB_FUZZING_ENGINE) -o $@
|
||||
|
||||
huf_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o
|
||||
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o $(LIB_FUZZING_ENGINE) -o $@
|
||||
|
||||
libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o
|
||||
$(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o
|
||||
|
||||
|
@ -63,6 +63,7 @@ TARGET_INFO = {
|
||||
'fse_read_ncount': TargetInfo(InputType.RAW_DATA),
|
||||
'sequence_compression_api': TargetInfo(InputType.RAW_DATA),
|
||||
'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
|
||||
'huf_round_trip': TargetInfo(InputType.RAW_DATA),
|
||||
}
|
||||
TARGETS = list(TARGET_INFO.keys())
|
||||
ALL_TARGETS = TARGETS + ['all']
|
||||
|
132
tests/fuzz/huf_round_trip.c
Normal file
132
tests/fuzz/huf_round_trip.c
Normal file
@ -0,0 +1,132 @@
|
||||
/*
|
||||
* Copyright (c) Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This fuzz target performs a zstd round-trip test (compress & decompress),
|
||||
* compares the result with the original, and calls abort() on corruption.
|
||||
*/
|
||||
|
||||
#define HUF_STATIC_LINKING_ONLY
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "common/cpu.h"
|
||||
#include "compress/hist.h"
|
||||
#include "common/huf.h"
|
||||
#include "fuzz_helpers.h"
|
||||
#include "fuzz_data_producer.h"
|
||||
|
||||
static size_t adjustTableLog(size_t tableLog, size_t maxSymbol)
|
||||
{
|
||||
size_t const alphabetSize = maxSymbol + 1;
|
||||
size_t minTableLog = BIT_highbit32(alphabetSize) + 1;
|
||||
if ((alphabetSize & (alphabetSize - 1)) != 0) {
|
||||
++minTableLog;
|
||||
}
|
||||
assert(minTableLog <= 9);
|
||||
if (tableLog < minTableLog)
|
||||
return minTableLog;
|
||||
else
|
||||
return tableLog;
|
||||
}
|
||||
|
||||
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
|
||||
{
|
||||
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
|
||||
/* Select random parameters: #streams, X1 or X2 decoding, bmi2 */
|
||||
int const streams = FUZZ_dataProducer_int32Range(producer, 0, 1);
|
||||
int const symbols = FUZZ_dataProducer_int32Range(producer, 0, 1);
|
||||
int const bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()) && FUZZ_dataProducer_int32Range(producer, 0, 1);
|
||||
/* Select a random cBufSize - it may be too small */
|
||||
size_t const cBufSize = FUZZ_dataProducer_uint32Range(producer, 0, 4 * size);
|
||||
/* Select a random tableLog - we'll adjust it up later */
|
||||
size_t tableLog = FUZZ_dataProducer_uint32Range(producer, 1, 12);
|
||||
size_t const kMaxSize = 256 * 1024;
|
||||
size = FUZZ_dataProducer_remainingBytes(producer);
|
||||
if (size > kMaxSize)
|
||||
size = kMaxSize;
|
||||
|
||||
if (size <= 1) {
|
||||
FUZZ_dataProducer_free(producer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t maxSymbol = 255;
|
||||
|
||||
U32 count[256];
|
||||
size_t const mostFrequent = HIST_count(count, &maxSymbol, src, size);
|
||||
FUZZ_ZASSERT(mostFrequent);
|
||||
if (mostFrequent == size) {
|
||||
/* RLE */
|
||||
FUZZ_dataProducer_free(producer);
|
||||
return 0;
|
||||
|
||||
}
|
||||
FUZZ_ASSERT(maxSymbol <= 255);
|
||||
tableLog = adjustTableLog(tableLog, maxSymbol);
|
||||
|
||||
size_t const wkspSize = HUF_WORKSPACE_SIZE;
|
||||
void* wksp = FUZZ_malloc(wkspSize);
|
||||
void* rBuf = FUZZ_malloc(size);
|
||||
void* cBuf = FUZZ_malloc(cBufSize);
|
||||
HUF_CElt* ct = (HUF_CElt*)FUZZ_malloc(HUF_CTABLE_SIZE(maxSymbol));
|
||||
HUF_DTable* dt = (HUF_DTable*)FUZZ_malloc(HUF_DTABLE_SIZE(tableLog) * sizeof(HUF_DTable));
|
||||
dt[0] = tableLog * 0x01000001;
|
||||
|
||||
tableLog = HUF_optimalTableLog(tableLog, size, maxSymbol);
|
||||
FUZZ_ASSERT(tableLog <= 12);
|
||||
tableLog = HUF_buildCTable_wksp(ct, count, maxSymbol, tableLog, wksp, wkspSize);
|
||||
FUZZ_ZASSERT(tableLog);
|
||||
size_t const tableSize = HUF_writeCTable_wksp(cBuf, cBufSize, ct, maxSymbol, tableLog, wksp, wkspSize);
|
||||
if (ERR_isError(tableSize)) {
|
||||
/* Errors on uncompressible data or cBufSize too small */
|
||||
goto _out;
|
||||
}
|
||||
FUZZ_ZASSERT(tableSize);
|
||||
if (symbols == 0) {
|
||||
FUZZ_ZASSERT(HUF_readDTableX1_wksp_bmi2(dt, cBuf, tableSize, wksp, wkspSize, bmi2));
|
||||
} else {
|
||||
size_t const ret = HUF_readDTableX2_wksp(dt, cBuf, tableSize, wksp, wkspSize);
|
||||
if (ERR_getErrorCode(ret) == ZSTD_error_tableLog_tooLarge) {
|
||||
FUZZ_ZASSERT(HUF_readDTableX1_wksp_bmi2(dt, cBuf, tableSize, wksp, wkspSize, bmi2));
|
||||
} else {
|
||||
FUZZ_ZASSERT(ret);
|
||||
}
|
||||
}
|
||||
|
||||
size_t cSize;
|
||||
size_t rSize;
|
||||
if (streams == 0) {
|
||||
cSize = HUF_compress1X_usingCTable_bmi2(cBuf, cBufSize, src, size, ct, bmi2);
|
||||
FUZZ_ZASSERT(cSize);
|
||||
if (cSize != 0)
|
||||
rSize = HUF_decompress1X_usingDTable_bmi2(rBuf, size, cBuf, cSize, dt, bmi2);
|
||||
} else {
|
||||
cSize = HUF_compress4X_usingCTable_bmi2(cBuf, cBufSize, src, size, ct, bmi2);
|
||||
FUZZ_ZASSERT(cSize);
|
||||
if (cSize != 0)
|
||||
rSize = HUF_decompress4X_usingDTable_bmi2(rBuf, size, cBuf, cSize, dt, bmi2);
|
||||
}
|
||||
if (cSize != 0) {
|
||||
FUZZ_ZASSERT(rSize);
|
||||
FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size");
|
||||
FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!");
|
||||
}
|
||||
_out:
|
||||
free(rBuf);
|
||||
free(cBuf);
|
||||
free(ct);
|
||||
free(dt);
|
||||
free(wksp);
|
||||
FUZZ_dataProducer_free(producer);
|
||||
return 0;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user