Merge pull request #2733 from terrelln/huf-cspeed

[HUF] Improve Huffman encoding speed
This commit is contained in:
Nick Terrell 2021-08-03 12:59:54 -04:00 committed by GitHub
commit 6ee70bae46
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 539 additions and 98 deletions

View File

@ -299,7 +299,7 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
weightTotal = 0;
{ U32 n; for (n=0; n<oSize; n++) {
if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
rankStats[huffWeight[n]]++;
weightTotal += (1 << huffWeight[n]) >> 1;
} }

View File

@ -89,9 +89,9 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
/** HUF_compress4X_wksp() :
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
* `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
* `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
#define HUF_WORKSPACE_SIZE ((8 << 10) + 256)
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog,
@ -136,15 +136,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
/* static allocation of HUF's Compression Table */
/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
struct HUF_CElt_s {
U16 val;
BYTE nbBits;
}; /* typedef'd to HUF_CElt */
typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */
#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
typedef size_t HUF_CElt; /* consider it an incomplete type */
#define HUF_CTABLE_SIZE_ST(maxSymbolValue) ((maxSymbolValue)+2) /* Use tables of size_t, for proper alignment */
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))
#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */
/* static allocation of HUF's DTable */
typedef U32 HUF_DTable;
@ -194,6 +190,7 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
@ -250,11 +247,10 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
* Loading a CTable saved with HUF_writeCTable() */
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
/** HUF_getNbBits() :
/** HUF_getNbBitsFromCTable() :
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
* Note 1 : is not inlined, as HUF_CElt definition is private
* Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
* Note 1 : is not inlined, as HUF_CElt definition is private */
U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
/*
* HUF_decompress() does the following:
@ -306,8 +302,9 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c
/* ====================== */
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
/** HUF_compress1X_repeat() :
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat.

View File

@ -53,6 +53,28 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
/* *******************************************************
* HUF : Huffman block compression
*********************************************************/
#define HUF_WORKSPACE_MAX_ALIGNMENT 8
static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
{
size_t const mask = align - 1;
size_t const rem = (size_t)workspace & mask;
size_t const add = (align - rem) & mask;
BYTE* const aligned = (BYTE*)workspace + add;
assert((align & (align - 1)) == 0); /* pow 2 */
assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
if (*workspaceSizePtr >= add) {
assert(add < align);
assert(((size_t)aligned & mask) == 0);
*workspaceSizePtr -= add;
return aligned;
} else {
*workspaceSizePtr = 0;
return NULL;
}
}
/* HUF_compressWeights() :
* Same as FSE_compress(), but dedicated to huff0's weights compression.
* The use case needs much less stack memory.
@ -75,7 +97,7 @@ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightT
unsigned maxSymbolValue = HUF_TABLELOG_MAX;
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, sizeof(U32));
if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
@ -106,6 +128,40 @@ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightT
return (size_t)(op-ostart);
}
static size_t HUF_getNbBits(HUF_CElt elt)
{
return elt & 0xFF;
}
static size_t HUF_getNbBitsFast(HUF_CElt elt)
{
return elt;
}
static size_t HUF_getValue(HUF_CElt elt)
{
return elt & ~0xFF;
}
static size_t HUF_getValueFast(HUF_CElt elt)
{
return elt;
}
static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
{
assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
*elt = nbBits;
}
static void HUF_setValue(HUF_CElt* elt, size_t value)
{
size_t const nbBits = HUF_getNbBits(*elt);
if (nbBits > 0) {
assert((value >> nbBits) == 0);
*elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
}
}
typedef struct {
HUF_CompressWeightsWksp wksp;
@ -117,9 +173,10 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
void* workspace, size_t workspaceSize)
{
HUF_CElt const* const ct = CTable + 1;
BYTE* op = (BYTE*)dst;
U32 n;
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, sizeof(U32));
/* check conditions */
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
@ -130,7 +187,7 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
for (n=1; n<huffLog+1; n++)
wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
for (n=0; n<maxSymbolValue; n++)
wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];
wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
/* attempt weights compression by FSE */
if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
@ -167,6 +224,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
U32 tableLog = 0;
U32 nbSymbols = 0;
HUF_CElt* const ct = CTable + 1;
/* get symbol weights */
CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
@ -176,6 +234,8 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
CTable[0] = tableLog;
/* Prepare base value per rank */
{ U32 n, nextRankStart = 0;
for (n=1; n<=tableLog; n++) {
@ -187,13 +247,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
/* fill nbBits */
{ U32 n; for (n=0; n<nbSymbols; n++) {
const U32 w = huffWeight[n];
CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
} }
/* fill val */
{ U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
/* determine stating value per rank */
valPerRank[tableLog+1] = 0; /* for w==0 */
{ U16 min = 0;
@ -203,18 +263,18 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
min >>= 1;
} }
/* assign value within rank, symbol order */
{ U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
}
*maxSymbolValuePtr = nbSymbols - 1;
return readSize;
}
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
{
const HUF_CElt* table = (const HUF_CElt*)symbolTable;
const HUF_CElt* ct = CTable + 1;
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
return table[symbolValue].nbBits;
return (U32)HUF_getNbBits(ct[symbolValue]);
}
@ -491,6 +551,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
*/
static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
{
HUF_CElt* const ct = CTable + 1;
/* fill result into ctable (val, nbBits) */
int n;
U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
@ -506,20 +567,20 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
min >>= 1;
} }
for (n=0; n<alphabetSize; n++)
CTable[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
for (n=0; n<alphabetSize; n++)
CTable[n].val = valPerRank[CTable[n].nbBits]++; /* assign value within rank, symbol order */
HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
CTable[0] = maxNbBits;
}
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
{
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, sizeof(U32));
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
nodeElt* const huffNode = huffNode0+1;
int nonNullRank;
/* safety checks */
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
return ERROR(workSpace_tooSmall);
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
@ -537,91 +598,327 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
return maxNbBits;
}
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
{
HUF_CElt const* ct = CTable + 1;
size_t nbBits = 0;
int s;
for (s = 0; s <= (int)maxSymbolValue; ++s) {
nbBits += CTable[s].nbBits * count[s];
nbBits += HUF_getNbBits(ct[s]) * count[s];
}
return nbBits >> 3;
}
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
HUF_CElt const* ct = CTable + 1;
int bad = 0;
int s;
for (s = 0; s <= (int)maxSymbolValue; ++s) {
bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
}
return !bad;
}
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
FORCE_INLINE_TEMPLATE void
HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
/** HUF_CStream_t:
* Huffman uses its own BIT_CStream_t implementation.
* There are three major differences from BIT_CStream_t:
* 1. HUF_addBits() takes a HUF_CElt (size_t) which is
* the pair (nbBits, value) in the format:
* format:
* - Bits [0, 4) = nbBits
* - Bits [4, 64 - nbBits) = 0
* - Bits [64 - nbBits, 64) = value
* 2. The bitContainer is built from the upper bits and
* right shifted. E.g. to add a new value of N bits
* you right shift the bitContainer by N, then or in
* the new value into the N upper bits.
* 3. The bitstream has two bit containers. You can add
* bits to the second container and merge them into
* the first container.
*/
#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
typedef struct {
size_t bitContainer[2];
size_t bitPos[2];
BYTE* startPtr;
BYTE* ptr;
BYTE* endPtr;
} HUF_CStream_t;
/**! HUF_initCStream():
* Initializes the bistream.
* @returns 0 or an error code.
*/
static size_t HUF_initCStream(HUF_CStream_t* bitC,
void* startPtr, size_t dstCapacity)
{
BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
ZSTD_memset(bitC, 0, sizeof(*bitC));
bitC->startPtr = (BYTE*)startPtr;
bitC->ptr = bitC->startPtr;
bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
return 0;
}
#define HUF_FLUSHBITS(s) BIT_flushBits(s)
/*! HUF_addBits():
* Adds the symbol stored in HUF_CElt elt to the bitstream.
*
* @param elt The element we're adding. This is a (nbBits, value) pair.
* See the HUF_CStream_t docs for the format.
* @param idx Insert into the bistream at this idx.
* @param kFast This is a template parameter. If the bitstream is guaranteed
* to have at least 4 unused bits after this call it may be 1,
* otherwise it must be 0. HUF_addBits() is faster when fast is set.
*/
FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
{
assert(idx <= 1);
assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
/* This is efficient on x86-64 with BMI2 because shrx
* only reads the low 6 bits of the register. The compiler
* knows this and elides the mask. When fast is set,
* every operation can use the same value loaded from elt.
*/
bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
/* We only read the low 8 bits of bitC->bitPos[idx] so it
* doesn't matter that the high bits have noise from the value.
*/
bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
/* The last 4-bits of elt are dirty if fast is set,
* so we must not be overwriting bits that have already been
* inserted into the bit container.
*/
#if DEBUGLEVEL >= 1
{
size_t const nbBits = HUF_getNbBits(elt);
size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
/* Middle bits are 0. */
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
/* We didn't overwrite any bits in the bit container. */
assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
}
#endif
}
#define HUF_FLUSHBITS_1(stream) \
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
{
bitC->bitContainer[1] = 0;
bitC->bitPos[1] = 0;
}
/*! HUF_mergeIndex1() :
* Merges the bit container @ index 1 into the bit container @ index 0
* and zeros the bit container @ index 1.
*/
FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
{
assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
bitC->bitContainer[0] |= bitC->bitContainer[1];
bitC->bitPos[0] += bitC->bitPos[1];
assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
}
/*! HUF_flushBits() :
* Flushes the bits in the bit container @ index 0.
*
* @post bitPos will be < 8.
* @param kFast If kFast is set then we must know a-priori that
* the bit container will not overflow.
*/
FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
{
/* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
size_t const nbBits = bitC->bitPos[0] & 0xFF;
size_t const nbBytes = nbBits >> 3;
/* The top nbBits bits of bitContainer are the ones we need. */
size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
/* Mask bitPos to account for the bytes we consumed. */
bitC->bitPos[0] &= 7;
assert(nbBits > 0);
assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
assert(bitC->ptr <= bitC->endPtr);
MEM_writeLEST(bitC->ptr, bitContainer);
bitC->ptr += nbBytes;
assert(!kFast || bitC->ptr <= bitC->endPtr);
if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
/* bitContainer doesn't need to be modified because the leftover
* bits are already the top bitPos bits. And we don't care about
* noise in the lower values.
*/
}
/*! HUF_endMark()
* @returns The Huffman stream end mark: A 1-bit value = 1.
*/
static HUF_CElt HUF_endMark(void)
{
HUF_CElt endMark;
HUF_setNbBits(&endMark, 1);
HUF_setValue(&endMark, 1);
return endMark;
}
/*! HUF_closeCStream() :
* @return Size of CStream, in bytes,
* or 0 if it could not fit into dstBuffer */
static size_t HUF_closeCStream(HUF_CStream_t* bitC)
{
HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
HUF_flushBits(bitC, /* kFast */ 0);
{
size_t const nbBits = bitC->bitPos[0] & 0xFF;
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
return (bitC->ptr - bitC->startPtr) + (nbBits > 0);
}
}
FORCE_INLINE_TEMPLATE void
HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
{
HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
}
FORCE_INLINE_TEMPLATE void
HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
const BYTE* ip, size_t srcSize,
const HUF_CElt* ct,
int kUnroll, int kFastFlush, int kLastFast)
{
/* Join to kUnroll */
int n = (int)srcSize;
int rem = n % kUnroll;
if (rem > 0) {
for (; rem > 0; --rem) {
HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
}
HUF_flushBits(bitC, kFastFlush);
}
assert(n % kUnroll == 0);
/* Join to 2 * kUnroll */
if (n % (2 * kUnroll)) {
int u;
for (u = 1; u < kUnroll; ++u) {
HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
}
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
HUF_flushBits(bitC, kFastFlush);
n -= kUnroll;
}
assert(n % (2 * kUnroll) == 0);
for (; n>0; n-= 2 * kUnroll) {
/* Encode kUnroll symbols into the bitstream @ index 0. */
int u;
for (u = 1; u < kUnroll; ++u) {
HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
}
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
HUF_flushBits(bitC, kFastFlush);
/* Encode kUnroll symbols into the bitstream @ index 1.
* This allows us to start filling the bit container
* without any data dependencies.
*/
HUF_zeroIndex1(bitC);
for (u = 1; u < kUnroll; ++u) {
HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
}
HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
/* Merge bitstream @ index 1 into the bitstream @ index 0 */
HUF_mergeIndex1(bitC);
HUF_flushBits(bitC, kFastFlush);
}
assert(n == 0);
}
/**
* Returns a tight upper bound on the output space needed by Huffman
* with 8 bytes buffer to handle over-writes. If the output is at least
* this large we don't need to do bounds checks during Huffman encoding.
*/
static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
{
return ((srcSize * tableLog) >> 3) + 8;
}
#define HUF_FLUSHBITS_2(stream) \
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
FORCE_INLINE_TEMPLATE size_t
HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable)
{
U32 const tableLog = (U32)CTable[0];
HUF_CElt const* ct = CTable + 1;
const BYTE* ip = (const BYTE*) src;
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
size_t n;
BIT_CStream_t bitC;
HUF_CStream_t bitC;
/* init */
if (dstSize < 8) return 0; /* not enough space to compress */
{ size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
{ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
if (HUF_isError(initErr)) return 0; }
n = srcSize & ~3; /* join to mod 4 */
switch (srcSize & 3)
{
case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
HUF_FLUSHBITS_2(&bitC);
/* fall-through */
case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
HUF_FLUSHBITS_1(&bitC);
/* fall-through */
case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
HUF_FLUSHBITS(&bitC);
/* fall-through */
case 0 : /* fall-through */
default: break;
if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
else {
if (MEM_32bits()) {
switch (tableLog) {
case 11:
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
break;
case 10:
case 9:
case 8:
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
break;
case 7:
default:
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
break;
}
} else {
switch (tableLog) {
case 11:
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
break;
case 10:
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
break;
case 9:
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
break;
case 8:
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
break;
case 7:
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
break;
case 6:
default:
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
break;
}
}
}
assert(bitC.ptr <= bitC.endPtr);
for (; n>0; n-=4) { /* note : n&3==0 at this stage */
HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
HUF_FLUSHBITS_1(&bitC);
HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
HUF_FLUSHBITS_2(&bitC);
HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
HUF_FLUSHBITS_1(&bitC);
HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
HUF_FLUSHBITS(&bitC);
}
return BIT_closeCStream(&bitC);
return HUF_closeCStream(&bitC);
}
#if DYNAMIC_BMI2
@ -668,9 +965,13 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
{
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
}
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
{
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
}
static size_t
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
@ -690,8 +991,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
assert(op <= oend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
assert(cSize <= 65535);
if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart, (U16)cSize);
op += cSize;
}
@ -699,8 +999,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize;
assert(op <= oend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
assert(cSize <= 65535);
if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart+2, (U16)cSize);
op += cSize;
}
@ -708,8 +1007,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize;
assert(op <= oend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
assert(cSize <= 65535);
if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart+4, (U16)cSize);
op += cSize;
}
@ -718,7 +1016,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
assert(op <= oend);
assert(ip <= iend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
if (cSize==0) return 0;
if (cSize == 0 || cSize > 65535) return 0;
op += cSize;
}
@ -727,7 +1025,12 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
{
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
}
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
{
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
}
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
@ -751,10 +1054,11 @@ static size_t HUF_compressCTable_internal(
typedef struct {
unsigned count[HUF_SYMBOLVALUE_MAX + 1];
HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
union {
HUF_buildCTable_wksp_tables buildCTable_wksp;
HUF_WriteCTableWksp writeCTable_wksp;
U32 hist_wksp[HIST_WKSP_SIZE_U32];
} wksps;
} HUF_compress_tables_t;
@ -763,26 +1067,25 @@ typedef struct {
/* HUF_compress_internal() :
* `workSpace_align4` must be aligned on 4-bytes boundaries,
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
static size_t
HUF_compress_internal (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
HUF_nbStreams_e nbStreams,
void* workSpace_align4, size_t wkspSize,
void* workSpace, size_t wkspSize,
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
const int bmi2, unsigned suspectUncompressible)
{
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4;
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, sizeof(size_t));
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
assert(((size_t)workSpace_align4 & 3) == 0); /* must be aligned on 4-bytes boundaries */
HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
/* checks & inits */
if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
if (!srcSize) return 0; /* Uncompressed */
if (!dstSize) return 0; /* cannot fit anything within dst budget */
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
@ -814,7 +1117,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
}
/* Scan input and build symbol stats */
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) );
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
}
@ -839,9 +1142,12 @@ HUF_compress_internal (void* dst, size_t dstSize,
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
CHECK_F(maxBits);
huffLog = (U32)maxBits;
/* Zero unused symbols in CTable, so we can check it for validity */
ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0,
sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
}
/* Zero unused symbols in CTable, so we can check it for validity */
{
size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
}
/* Write table description header */
@ -939,7 +1245,7 @@ size_t HUF_compress1X (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog)
{
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
U64 workSpace[HUF_WORKSPACE_SIZE_U64];
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
}
@ -947,7 +1253,7 @@ size_t HUF_compress2 (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog)
{
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
U64 workSpace[HUF_WORKSPACE_SIZE_U64];
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
}

View File

@ -63,7 +63,7 @@ typedef struct {
} ZSTD_localDict;
typedef struct {
HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
HUF_repeat repeatMode;
} ZSTD_hufCTables_t;

View File

@ -126,7 +126,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
optPtr->litSum = 0;
for (lit=0; lit<=MaxLit; lit++) {
U32 const scaleLog = 11; /* scale to 2K */
U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
assert(bitCost <= scaleLog);
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
optPtr->litSum += optPtr->litFreq[lit];

View File

@ -185,7 +185,7 @@ BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX];
BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX];
BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX];
unsigned WKSP[HUF_WORKSPACE_SIZE_U32];
U64 WKSP[HUF_WORKSPACE_SIZE_U64];
typedef struct {
size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */
@ -199,7 +199,7 @@ typedef struct {
int hufInit;
/* the distribution used in the previous block for repeat mode */
BYTE hufDist[DISTSIZE];
HUF_CElt hufTable [256];
HUF_CElt hufTable [HUF_CTABLE_SIZE_ST(255)];
int fseInit;
FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];

View File

@ -17,6 +17,7 @@ decompress_dstSize_tooSmall
fse_read_ncount
sequence_compression_api
seekable_roundtrip
huf_round_trip
fuzz-*.log
rt_lib_*
d_lib_*

View File

@ -103,7 +103,8 @@ FUZZ_TARGETS := \
decompress_dstSize_tooSmall \
fse_read_ncount \
sequence_compression_api \
seekable_roundtrip
seekable_roundtrip \
huf_round_trip
all: libregression.a $(FUZZ_TARGETS)
@ -200,6 +201,9 @@ sequence_compression_api: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_sequenc
seekable_roundtrip: $(FUZZ_HEADERS) $(SEEKABLE_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) $(SEEKABLE_OBJS) rt_fuzz_seekable_roundtrip.o
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) $(SEEKABLE_OBJS) rt_fuzz_seekable_roundtrip.o $(LIB_FUZZING_ENGINE) -o $@
huf_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o $(LIB_FUZZING_ENGINE) -o $@
libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o
$(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o

View File

@ -63,6 +63,7 @@ TARGET_INFO = {
'fse_read_ncount': TargetInfo(InputType.RAW_DATA),
'sequence_compression_api': TargetInfo(InputType.RAW_DATA),
'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
'huf_round_trip': TargetInfo(InputType.RAW_DATA),
}
TARGETS = list(TARGET_INFO.keys())
ALL_TARGETS = TARGETS + ['all']

132
tests/fuzz/huf_round_trip.c Normal file
View File

@ -0,0 +1,132 @@
/*
* Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/**
* This fuzz target performs a zstd round-trip test (compress & decompress),
* compares the result with the original, and calls abort() on corruption.
*/
#define HUF_STATIC_LINKING_ONLY
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "common/cpu.h"
#include "compress/hist.h"
#include "common/huf.h"
#include "fuzz_helpers.h"
#include "fuzz_data_producer.h"
static size_t adjustTableLog(size_t tableLog, size_t maxSymbol)
{
size_t const alphabetSize = maxSymbol + 1;
size_t minTableLog = BIT_highbit32(alphabetSize) + 1;
if ((alphabetSize & (alphabetSize - 1)) != 0) {
++minTableLog;
}
assert(minTableLog <= 9);
if (tableLog < minTableLog)
return minTableLog;
else
return tableLog;
}
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
/* Select random parameters: #streams, X1 or X2 decoding, bmi2 */
int const streams = FUZZ_dataProducer_int32Range(producer, 0, 1);
int const symbols = FUZZ_dataProducer_int32Range(producer, 0, 1);
int const bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()) && FUZZ_dataProducer_int32Range(producer, 0, 1);
/* Select a random cBufSize - it may be too small */
size_t const cBufSize = FUZZ_dataProducer_uint32Range(producer, 0, 4 * size);
/* Select a random tableLog - we'll adjust it up later */
size_t tableLog = FUZZ_dataProducer_uint32Range(producer, 1, 12);
size_t const kMaxSize = 256 * 1024;
size = FUZZ_dataProducer_remainingBytes(producer);
if (size > kMaxSize)
size = kMaxSize;
if (size <= 1) {
FUZZ_dataProducer_free(producer);
return 0;
}
uint32_t maxSymbol = 255;
U32 count[256];
size_t const mostFrequent = HIST_count(count, &maxSymbol, src, size);
FUZZ_ZASSERT(mostFrequent);
if (mostFrequent == size) {
/* RLE */
FUZZ_dataProducer_free(producer);
return 0;
}
FUZZ_ASSERT(maxSymbol <= 255);
tableLog = adjustTableLog(tableLog, maxSymbol);
size_t const wkspSize = HUF_WORKSPACE_SIZE;
void* wksp = FUZZ_malloc(wkspSize);
void* rBuf = FUZZ_malloc(size);
void* cBuf = FUZZ_malloc(cBufSize);
HUF_CElt* ct = (HUF_CElt*)FUZZ_malloc(HUF_CTABLE_SIZE(maxSymbol));
HUF_DTable* dt = (HUF_DTable*)FUZZ_malloc(HUF_DTABLE_SIZE(tableLog) * sizeof(HUF_DTable));
dt[0] = tableLog * 0x01000001;
tableLog = HUF_optimalTableLog(tableLog, size, maxSymbol);
FUZZ_ASSERT(tableLog <= 12);
tableLog = HUF_buildCTable_wksp(ct, count, maxSymbol, tableLog, wksp, wkspSize);
FUZZ_ZASSERT(tableLog);
size_t const tableSize = HUF_writeCTable_wksp(cBuf, cBufSize, ct, maxSymbol, tableLog, wksp, wkspSize);
if (ERR_isError(tableSize)) {
/* Errors on uncompressible data or cBufSize too small */
goto _out;
}
FUZZ_ZASSERT(tableSize);
if (symbols == 0) {
FUZZ_ZASSERT(HUF_readDTableX1_wksp_bmi2(dt, cBuf, tableSize, wksp, wkspSize, bmi2));
} else {
size_t const ret = HUF_readDTableX2_wksp(dt, cBuf, tableSize, wksp, wkspSize);
if (ERR_getErrorCode(ret) == ZSTD_error_tableLog_tooLarge) {
FUZZ_ZASSERT(HUF_readDTableX1_wksp_bmi2(dt, cBuf, tableSize, wksp, wkspSize, bmi2));
} else {
FUZZ_ZASSERT(ret);
}
}
size_t cSize;
size_t rSize;
if (streams == 0) {
cSize = HUF_compress1X_usingCTable_bmi2(cBuf, cBufSize, src, size, ct, bmi2);
FUZZ_ZASSERT(cSize);
if (cSize != 0)
rSize = HUF_decompress1X_usingDTable_bmi2(rBuf, size, cBuf, cSize, dt, bmi2);
} else {
cSize = HUF_compress4X_usingCTable_bmi2(cBuf, cBufSize, src, size, ct, bmi2);
FUZZ_ZASSERT(cSize);
if (cSize != 0)
rSize = HUF_decompress4X_usingDTable_bmi2(rBuf, size, cBuf, cSize, dt, bmi2);
}
if (cSize != 0) {
FUZZ_ZASSERT(rSize);
FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size");
FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!");
}
_out:
free(rBuf);
free(cBuf);
free(ct);
free(dt);
free(wksp);
FUZZ_dataProducer_free(producer);
return 0;
}