Fix various fuzzer failures: repcode history, superblocks

This commit is contained in:
Sen Huang 2021-02-24 13:21:49 -08:00
parent 0633bf17c3
commit 41c3eae6d9
3 changed files with 198 additions and 356 deletions

View File

@ -2211,14 +2211,12 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
U32* LLtype, U32* Offtype, U32* MLtype, size_t* lastCountSize, U32* LLtype, U32* Offtype, U32* MLtype, size_t* lastCountSize,
const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
BYTE* dst, const BYTE* const dstEnd, BYTE* dst, const BYTE* const dstEnd,
ZSTD_strategy strategy, ZSTD_strategy strategy, unsigned* countWorkspace,
void* entropyWorkspace, size_t entropyWkspSize) { void* entropyWorkspace, size_t entropyWkspSize) {
BYTE* const ostart = dst; BYTE* const ostart = dst;
const BYTE* const oend = dstEnd; const BYTE* const oend = dstEnd;
BYTE* op = ostart; BYTE* op = ostart;
unsigned* const countWorkspace = (unsigned*)entropyWorkspace;
FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
@ -2394,7 +2392,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
&LLtype, &Offtype, &MLtype, &lastCountSize, &LLtype, &Offtype, &MLtype, &lastCountSize,
&prevEntropy->fse, &nextEntropy->fse, &prevEntropy->fse, &nextEntropy->fse,
op, oend, op, oend,
strategy, strategy, count,
entropyWorkspace, entropyWkspSize); entropyWorkspace, entropyWkspSize);
FORWARD_IF_ERROR(entropyStatisticsSize, "FSE statistics building failed!"); FORWARD_IF_ERROR(entropyStatisticsSize, "FSE statistics building failed!");
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
@ -2440,7 +2438,7 @@ ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
size_t srcSize, size_t srcSize,
void* entropyWorkspace, size_t entropyWkspSize, void* entropyWorkspace, size_t entropyWkspSize,
int bmi2) int bmi2, U32 const canEmitUncompressed)
{ {
size_t const cSize = ZSTD_entropyCompressSequences_internal( size_t const cSize = ZSTD_entropyCompressSequences_internal(
seqStorePtr, prevEntropy, nextEntropy, cctxParams, seqStorePtr, prevEntropy, nextEntropy, cctxParams,
@ -2450,15 +2448,17 @@ ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
/* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
* Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
*/ */
if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) if (canEmitUncompressed) {
return 0; /* block not compressed */ if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed"); return 0; /* block not compressed */
FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed");
/* Check compressibility */ /* Check compressibility */
{ size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
if (cSize >= maxCSize) return 0; /* block not compressed */ if (cSize >= maxCSize) return 0; /* block not compressed */
}
} }
DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize); DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu", cSize);
return cSize; return cSize;
} }
@ -2840,9 +2840,10 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
{ /* Build and write the CTable */ { /* Build and write the CTable */
size_t const newCSize = HUF_estimateCompressedSize( size_t const newCSize = HUF_estimateCompressedSize(
(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
size_t const hSize = HUF_writeCTable( size_t const hSize = HUF_writeCTable_wksp(
hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog); (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
nodeWksp, nodeWkspSize);
/* Check against repeating the previous CTable */ /* Check against repeating the previous CTable */
if (repeat != HUF_repeat_none) { if (repeat != HUF_repeat_none) {
size_t const oldCSize = HUF_estimateCompressedSize( size_t const oldCSize = HUF_estimateCompressedSize(
@ -2885,6 +2886,9 @@ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
BYTE* op = ostart; BYTE* op = ostart;
size_t hSize; size_t hSize;
unsigned* countWorkspace = (unsigned*)workspace;
unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1);
size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace);
/* ZSTD_buildSequencesStatistics() is guaranteed to overwrite these values */ /* ZSTD_buildSequencesStatistics() is guaranteed to overwrite these values */
U32 LLtype = set_basic; U32 LLtype = set_basic;
@ -2892,12 +2896,11 @@ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
U32 MLtype = set_basic; U32 MLtype = set_basic;
DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq); DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq);
ZSTD_memset(workspace, 0, wkspSize);
hSize = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, hSize = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
&LLtype, &Offtype, &MLtype, &fseMetadata->lastCountSize, &LLtype, &Offtype, &MLtype, &fseMetadata->lastCountSize,
prevEntropy, nextEntropy, op, oend, prevEntropy, nextEntropy, op, oend,
strategy, strategy, countWorkspace,
workspace, wkspSize); entropyWorkspace, entropyWorkspaceSize);
fseMetadata->llType = (symbolEncodingType_e) LLtype; fseMetadata->llType = (symbolEncodingType_e) LLtype;
fseMetadata->ofType = (symbolEncodingType_e) Offtype; fseMetadata->ofType = (symbolEncodingType_e) Offtype;
fseMetadata->mlType = (symbolEncodingType_e) MLtype; fseMetadata->mlType = (symbolEncodingType_e) MLtype;
@ -2906,7 +2909,7 @@ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
/** ZSTD_buildBlockEntropyStats() : /** ZSTD_buildBlockEntropyStats() :
* Builds entropy for the super-block. * Builds entropy for the block.
* @return : 0 on success or error code */ * @return : 0 on success or error code */
size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
const ZSTD_entropyCTables_t* prevEntropy, const ZSTD_entropyCTables_t* prevEntropy,
@ -2934,7 +2937,7 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
} }
/* Returns the size estimate for the literals section (header + content) of a block */ /* Returns the size estimate for the literals section (header + content) of a block */
static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
const ZSTD_hufCTables_t* huf, const ZSTD_hufCTables_t* huf,
const ZSTD_hufCTablesMetadata_t* hufMetadata, const ZSTD_hufCTablesMetadata_t* hufMetadata,
void* workspace, size_t wkspSize, void* workspace, size_t wkspSize,
@ -2960,7 +2963,7 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
} }
/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */ /* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
const BYTE* codeTable, unsigned maxCode, const BYTE* codeTable, unsigned maxCode,
size_t nbSeq, const FSE_CTable* fseCTable, size_t nbSeq, const FSE_CTable* fseCTable,
const U32* additionalBits, const U32* additionalBits,
@ -2998,7 +3001,7 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
} }
/* Returns the size estimate for the sequences section (header + content) of a block */ /* Returns the size estimate for the sequences section (header + content) of a block */
static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
const BYTE* llCodeTable, const BYTE* llCodeTable,
const BYTE* mlCodeTable, const BYTE* mlCodeTable,
size_t nbSeq, size_t nbSeq,
@ -3009,15 +3012,15 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
{ {
size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ); size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);
size_t cSeqSizeEstimate = 0; size_t cSeqSizeEstimate = 0;
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
nbSeq, fseTables->offcodeCTable, NULL, nbSeq, fseTables->offcodeCTable, NULL,
OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
workspace, wkspSize); workspace, wkspSize);
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
nbSeq, fseTables->litlengthCTable, LL_bits, nbSeq, fseTables->litlengthCTable, LL_bits,
LL_defaultNorm, LL_defaultNormLog, MaxLL, LL_defaultNorm, LL_defaultNormLog, MaxLL,
workspace, wkspSize); workspace, wkspSize);
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
nbSeq, fseTables->matchlengthCTable, ML_bits, nbSeq, fseTables->matchlengthCTable, ML_bits,
ML_defaultNorm, ML_defaultNormLog, MaxML, ML_defaultNorm, ML_defaultNormLog, MaxML,
workspace, wkspSize); workspace, wkspSize);
@ -3026,20 +3029,20 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
} }
/* Returns the size estimate for a given stream of literals, of, ll, ml */ /* Returns the size estimate for a given stream of literals, of, ll, ml */
size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
const BYTE* ofCodeTable, const BYTE* ofCodeTable,
const BYTE* llCodeTable, const BYTE* llCodeTable,
const BYTE* mlCodeTable, const BYTE* mlCodeTable,
size_t nbSeq, size_t nbSeq,
const ZSTD_entropyCTables_t* entropy, const ZSTD_entropyCTables_t* entropy,
const ZSTD_entropyCTablesMetadata_t* entropyMetadata, const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize, void* workspace, size_t wkspSize,
int writeLitEntropy, int writeSeqEntropy) { int writeLitEntropy, int writeSeqEntropy) {
size_t literalsSize, seqSize; size_t literalsSize, seqSize;
literalsSize = ZSTD_estimateSubBlockSize_literal(literals, litSize, literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,
&entropy->huf, &entropyMetadata->hufMetadata, &entropy->huf, &entropyMetadata->hufMetadata,
workspace, wkspSize, writeLitEntropy); workspace, wkspSize, writeLitEntropy);
seqSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
workspace, wkspSize, writeSeqEntropy); workspace, wkspSize, writeSeqEntropy);
return seqSize + literalsSize + ZSTD_blockHeaderSize; return seqSize + literalsSize + ZSTD_blockHeaderSize;
@ -3058,7 +3061,7 @@ static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seq
&zc->appliedParams, &zc->appliedParams,
&entropyMetadata, &entropyMetadata,
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
estimatedSize = ZSTD_estimateSubBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), estimatedSize = ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
seqStore->ofCode, seqStore->llCode, seqStore->mlCode, seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
(size_t)(seqStore->sequences - seqStore->sequencesStart), (size_t)(seqStore->sequences - seqStore->sequencesStart),
&zc->blockState.nextCBlock->entropy, &entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, &zc->blockState.nextCBlock->entropy, &entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
@ -3067,7 +3070,7 @@ static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seq
} }
/* Returns literals bytes represented in a seqStore */ /* Returns literals bytes represented in a seqStore */
static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* seqStore) { static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) {
size_t literalsBytes = 0; size_t literalsBytes = 0;
size_t nbSeqs = seqStore->sequences - seqStore->sequencesStart; size_t nbSeqs = seqStore->sequences - seqStore->sequencesStart;
size_t i; size_t i;
@ -3082,7 +3085,7 @@ static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* seqStore) {
} }
/* Returns match bytes represented in a seqStore */ /* Returns match bytes represented in a seqStore */
static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* seqStore) { static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
size_t matchBytes = 0; size_t matchBytes = 0;
size_t nbSeqs = seqStore->sequences - seqStore->sequencesStart; size_t nbSeqs = seqStore->sequences - seqStore->sequencesStart;
size_t i; size_t i;
@ -3099,7 +3102,8 @@ static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* seqStore) {
/* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx). /* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx).
* Stores the result in resultSeqStore. * Stores the result in resultSeqStore.
*/ */
static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, const seqStore_t* originalSeqStore, static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
const seqStore_t* originalSeqStore,
size_t startIdx, size_t endIdx) { size_t startIdx, size_t endIdx) {
BYTE* const litEnd = originalSeqStore->lit; BYTE* const litEnd = originalSeqStore->lit;
size_t literalsBytes; size_t literalsBytes;
@ -3138,13 +3142,11 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, const seqStore_
* Compresses a seqStore into a block with a block header, into the buffer dst. * Compresses a seqStore into a block with a block header, into the buffer dst.
* *
* Returns the total size of that block (including header) or a ZSTD error code. * Returns the total size of that block (including header) or a ZSTD error code.
*
* TODO: Migrate compressBlock_internal and compressSequences_internal to use this as well
*/ */
static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqStore, static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqStore,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
U32 lastBlock) { U32 lastBlock, U32 canEmitRLEorNoCompress) {
const U32 rleMaxLength = 25; const U32 rleMaxLength = 25;
BYTE* op = (BYTE*)dst; BYTE* op = (BYTE*)dst;
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;
@ -3155,11 +3157,13 @@ static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqS
op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize,
srcSize, srcSize,
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
zc->bmi2); zc->bmi2, canEmitRLEorNoCompress);
FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSequences failed!");
if (!zc->isFirstBlock && if (!zc->isFirstBlock &&
cSeqsSize < rleMaxLength && cSeqsSize < rleMaxLength &&
ZSTD_isRLE((BYTE const*)src, srcSize)) { ZSTD_isRLE((BYTE const*)src, srcSize)&&
canEmitRLEorNoCompress) {
/* We don't want to emit our first block as a RLE even if it qualifies because /* We don't want to emit our first block as a RLE even if it qualifies because
* doing so will cause the decoder (cli only) to throw a "should consume all input error." * doing so will cause the decoder (cli only) to throw a "should consume all input error."
* This is only an issue for zstd <= v1.4.3 * This is only an issue for zstd <= v1.4.3
@ -3169,17 +3173,18 @@ static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqS
if (zc->seqCollector.collectSequences) { if (zc->seqCollector.collectSequences) {
ZSTD_copyBlockSequences(zc); ZSTD_copyBlockSequences(zc);
ZSTD_confirmRepcodesAndEntropyTables(zc);
return 0; return 0;
} }
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
if (cSeqsSize == 0) { if (cSeqsSize == 0 && canEmitRLEorNoCompress) {
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
FORWARD_IF_ERROR(cSize, "Nocompress block failed"); FORWARD_IF_ERROR(cSize, "Nocompress block failed");
DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize); DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize);
} else if (cSeqsSize == 1) { } else if (cSeqsSize == 1 && canEmitRLEorNoCompress) {
cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock); cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock);
FORWARD_IF_ERROR(cSize, "RLE compress block failed"); FORWARD_IF_ERROR(cSize, "RLE compress block failed");
DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize); DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize);
@ -3251,14 +3256,28 @@ static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx
static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) { static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) {
seqStoreSplits splits = {partitions, 0}; seqStoreSplits splits = {partitions, 0};
if (nbSeq <= 4) { if (nbSeq <= 4) {
DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split");
/* Refuse to try and split anything with less than 4 sequences */ /* Refuse to try and split anything with less than 4 sequences */
return 0; return 0;
} }
ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore); ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore);
splits.splitLocations[splits.idx] = nbSeq; splits.splitLocations[splits.idx] = nbSeq;
DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb splits: %zu", splits.idx-1);
return splits.idx; return splits.idx;
} }
/* Return 1 if if the first three sequences of seqstore/block use repcodes */
static U32 ZSTD_seqStore_firstThreeContainRepcodes(const seqStore_t* const seqStore) {
U32 const seqLimit = MIN((U32)(seqStore->sequences - seqStore->sequencesStart), ZSTD_REP_NUM);
U32 seqIdx = 0;
for (; seqIdx < seqLimit; ++seqIdx) {
if (seqStore->sequencesStart[seqIdx].offset <= ZSTD_REP_MOVE) {
return 1;
}
}
return 0;
}
/* ZSTD_compressBlock_splitBlock(): /* ZSTD_compressBlock_splitBlock():
* Attempts to split a given block into multiple blocks to improve compression ratio. * Attempts to split a given block into multiple blocks to improve compression ratio.
* *
@ -3271,38 +3290,44 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s
BYTE* op = (BYTE*)dst; BYTE* op = (BYTE*)dst;
U32 partitions[MAX_NB_SPLITS]; U32 partitions[MAX_NB_SPLITS];
size_t i = 0; size_t i = 0;
size_t startIdx = 0;
size_t endIdx;
size_t srcBytesTotal = 0; size_t srcBytesTotal = 0;
size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
seqStore_t nextSeqStore;
seqStore_t currSeqStore;
U32 canEmitRLEorNoCompress = 1;
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
(unsigned)zc->blockState.matchState.nextToUpdate); (unsigned)zc->blockState.matchState.nextToUpdate);
if (numSplits == 0) { if (numSplits == 0) {
size_t cSizeSingleBlock = ZSTD_compressSequences_singleBlock(zc, &zc->seqStore, op, dstCapacity, ip, blockSize, lastBlock); size_t cSizeSingleBlock = ZSTD_compressSequences_singleBlock(zc, &zc->seqStore, op, dstCapacity, ip, blockSize, lastBlock, 1);
FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!"); FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
return cSizeSingleBlock; return cSizeSingleBlock;
} }
ZSTD_deriveSeqStoreChunk(&currSeqStore, &zc->seqStore, 0, partitions[0]);
for (i = 0; i <= numSplits; ++i) { for (i = 0; i <= numSplits; ++i) {
seqStore_t chunkSeqStore;
size_t srcBytes; size_t srcBytes;
size_t cSizeChunk; size_t cSizeChunk;
U32 lastBlockActual; U32 lastBlockActual;
endIdx = partitions[i]; srcBytes = ZSTD_countSeqStoreLiteralsBytes(&currSeqStore) + ZSTD_countSeqStoreMatchBytes(&currSeqStore);
ZSTD_deriveSeqStoreChunk(&chunkSeqStore, &zc->seqStore, startIdx, endIdx); lastBlockActual = lastBlock && (i == numSplits);
srcBytes = ZSTD_countSeqStoreLiteralsBytes(&chunkSeqStore) + ZSTD_countSeqStoreMatchBytes(&chunkSeqStore);
lastBlockActual = lastBlock && (nbSeq == endIdx);
srcBytesTotal += srcBytes; srcBytesTotal += srcBytes;
if (i == numSplits) { if (i == numSplits) {
/* This is the final partition, need to account for possible last literals */ /* This is the final partition, need to account for possible last literals */
srcBytes += blockSize - srcBytesTotal; srcBytes += blockSize - srcBytesTotal;
} else {
ZSTD_deriveSeqStoreChunk(&nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]);
if (ZSTD_seqStore_firstThreeContainRepcodes(&nextSeqStore)) {
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: Next block contains rep in first three seqs!");
canEmitRLEorNoCompress = 0;
}
} }
cSizeChunk = ZSTD_compressSequences_singleBlock(zc, &chunkSeqStore, op, dstCapacity, ip, srcBytes, lastBlockActual); cSizeChunk = ZSTD_compressSequences_singleBlock(zc, &currSeqStore, op, dstCapacity, ip, srcBytes, lastBlockActual, canEmitRLEorNoCompress);
FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!"); FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, sizeof(U32)*ZSTD_REP_NUM); ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, sizeof(U32)*ZSTD_REP_NUM);
@ -3310,7 +3335,7 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s
op += cSizeChunk; op += cSizeChunk;
dstCapacity -= cSizeChunk; dstCapacity -= cSizeChunk;
cSize += cSizeChunk; cSize += cSizeChunk;
startIdx = partitions[i]; currSeqStore = nextSeqStore;
} }
return cSize; return cSize;
} }
@ -3331,6 +3356,7 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
return cSize; return cSize;
} }
nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart); nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart);
@ -3376,7 +3402,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
dst, dstCapacity, dst, dstCapacity,
srcSize, srcSize,
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
zc->bmi2); zc->bmi2, 1 /* Can emit uncompressed blocks */);
if (zc->seqCollector.collectSequences) { if (zc->seqCollector.collectSequences) {
ZSTD_copyBlockSequences(zc); ZSTD_copyBlockSequences(zc);
@ -5580,7 +5606,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
blockSize, blockSize,
cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
cctx->bmi2); cctx->bmi2, 1 /* Can emit uncompressed blocks */);
FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize); DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize);

View File

@ -82,9 +82,9 @@ typedef struct {
ZSTD_fseCTables_t fse; ZSTD_fseCTables_t fse;
} ZSTD_entropyCTables_t; } ZSTD_entropyCTables_t;
/*-************************************* /***********************************************
* Entropy buffer statistics structs * Entropy buffer statistics structs and funcs *
***************************************/ ***********************************************/
/** ZSTD_hufCTablesMetadata_t : /** ZSTD_hufCTablesMetadata_t :
* Stores Literals Block Type for a super-block in hType, and * Stores Literals Block Type for a super-block in hType, and
* huffman tree description in hufDesBuffer. * huffman tree description in hufDesBuffer.
@ -116,7 +116,7 @@ typedef struct {
} ZSTD_entropyCTablesMetadata_t; } ZSTD_entropyCTablesMetadata_t;
/** ZSTD_buildBlockEntropyStats() : /** ZSTD_buildBlockEntropyStats() :
* Builds entropy for the super-block. * Builds entropy for the block.
* @return : 0 on success or error code */ * @return : 0 on success or error code */
size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
const ZSTD_entropyCTables_t* prevEntropy, const ZSTD_entropyCTables_t* prevEntropy,
@ -125,18 +125,9 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
ZSTD_entropyCTablesMetadata_t* entropyMetadata, ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize); void* workspace, size_t wkspSize);
/** ZSTD_estimateSubBlockSize() : /*********************************
* Estimates the size that the block will be, based on literals and sequences. * Compression internals structs *
* @return : estimated size or error code */ *********************************/
size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
const BYTE* ofCodeTable,
const BYTE* llCodeTable,
const BYTE* mlCodeTable,
size_t nbSeq,
const ZSTD_entropyCTables_t* entropy,
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize,
int writeLitEntropy, int writeSeqEntropy);
typedef struct { typedef struct {
U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */ U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */

View File

@ -19,285 +19,6 @@
#include "zstd_compress_sequences.h" #include "zstd_compress_sequences.h"
#include "zstd_compress_literals.h" #include "zstd_compress_literals.h"
/*-*************************************
* Superblock entropy buffer structs
***************************************/
/** ZSTD_hufCTablesMetadata_t :
* Stores Literals Block Type for a super-block in hType, and
* huffman tree description in hufDesBuffer.
* hufDesSize refers to the size of huffman tree description in bytes.
* This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
typedef struct {
symbolEncodingType_e hType;
BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
size_t hufDesSize;
} ZSTD_hufCTablesMetadata_t;
/** ZSTD_fseCTablesMetadata_t :
* Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
* fse tables in fseTablesBuffer.
* fseTablesSize refers to the size of fse tables in bytes.
* This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
typedef struct {
symbolEncodingType_e llType;
symbolEncodingType_e ofType;
symbolEncodingType_e mlType;
BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
size_t fseTablesSize;
size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
} ZSTD_fseCTablesMetadata_t;
typedef struct {
ZSTD_hufCTablesMetadata_t hufMetadata;
ZSTD_fseCTablesMetadata_t fseMetadata;
} ZSTD_entropyCTablesMetadata_t;
/** ZSTD_buildSuperBlockEntropy_literal() :
* Builds entropy for the super-block literals.
* Stores literals block type (raw, rle, compressed, repeat) and
* huffman description table to hufMetadata.
* @return : size of huffman description table or error code */
static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
const ZSTD_hufCTables_t* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_hufCTablesMetadata_t* hufMetadata,
const int disableLiteralsCompression,
void* workspace, size_t wkspSize)
{
BYTE* const wkspStart = (BYTE*)workspace;
BYTE* const wkspEnd = wkspStart + wkspSize;
BYTE* const countWkspStart = wkspStart;
unsigned* const countWksp = (unsigned*)workspace;
const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
BYTE* const nodeWksp = countWkspStart + countWkspSize;
const size_t nodeWkspSize = wkspEnd-nodeWksp;
unsigned maxSymbolValue = 255;
unsigned huffLog = HUF_TABLELOG_DEFAULT;
HUF_repeat repeat = prevHuf->repeatMode;
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
/* Prepare nextEntropy assuming reusing the existing table */
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
if (disableLiteralsCompression) {
DEBUGLOG(5, "set_basic - disabled");
hufMetadata->hType = set_basic;
return 0;
}
/* small ? don't even attempt compression (speed opt) */
# define COMPRESS_LITERALS_SIZE_MIN 63
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
if (srcSize <= minLitSize) {
DEBUGLOG(5, "set_basic - too small");
hufMetadata->hType = set_basic;
return 0;
}
}
/* Scan input and build symbol stats */
{ size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
if (largest == srcSize) {
DEBUGLOG(5, "set_rle");
hufMetadata->hType = set_rle;
return 0;
}
if (largest <= (srcSize >> 7)+4) {
DEBUGLOG(5, "set_basic - no gain");
hufMetadata->hType = set_basic;
return 0;
}
}
/* Validate the previous Huffman table */
if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
repeat = HUF_repeat_none;
}
/* Build Huffman Tree */
ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
maxSymbolValue, huffLog,
nodeWksp, nodeWkspSize);
FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
huffLog = (U32)maxBits;
{ /* Build and write the CTable */
size_t const newCSize = HUF_estimateCompressedSize(
(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
size_t const hSize = HUF_writeCTable_wksp(
hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
nodeWksp, nodeWkspSize);
/* Check against repeating the previous CTable */
if (repeat != HUF_repeat_none) {
size_t const oldCSize = HUF_estimateCompressedSize(
(HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
DEBUGLOG(5, "set_repeat - smaller");
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
hufMetadata->hType = set_repeat;
return 0;
}
}
if (newCSize + hSize >= srcSize) {
DEBUGLOG(5, "set_basic - no gains");
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
hufMetadata->hType = set_basic;
return 0;
}
DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
hufMetadata->hType = set_compressed;
nextHuf->repeatMode = HUF_repeat_check;
return hSize;
}
}
}
/** ZSTD_buildSuperBlockEntropy_sequences() :
* Builds entropy for the super-block sequences.
* Stores symbol compression modes and fse table to fseMetadata.
* @return : size of fse tables or error code */
static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
const ZSTD_fseCTables_t* prevEntropy,
ZSTD_fseCTables_t* nextEntropy,
const ZSTD_CCtx_params* cctxParams,
ZSTD_fseCTablesMetadata_t* fseMetadata,
void* workspace, size_t wkspSize)
{
BYTE* const wkspStart = (BYTE*)workspace;
BYTE* const wkspEnd = wkspStart + wkspSize;
BYTE* const countWkspStart = wkspStart;
unsigned* const countWksp = (unsigned*)workspace;
const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
BYTE* const cTableWksp = countWkspStart + countWkspSize;
const size_t cTableWkspSize = wkspEnd-cTableWksp;
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
const BYTE* const llCodeTable = seqStorePtr->llCode;
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
BYTE* const ostart = fseMetadata->fseTablesBuffer;
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
BYTE* op = ostart;
assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
ZSTD_memset(workspace, 0, wkspSize);
fseMetadata->lastCountSize = 0;
/* convert length/distances into codes */
ZSTD_seqToCodes(seqStorePtr);
/* build CTable for Literal Lengths */
{ U32 LLtype;
unsigned max = MaxLL;
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
DEBUGLOG(5, "Building LL table");
nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
countWksp, max, mostFrequent, nbSeq,
LLFSELog, prevEntropy->litlengthCTable,
LL_defaultNorm, LL_defaultNormLog,
ZSTD_defaultAllowed, strategy);
assert(set_basic < set_compressed && set_rle < set_compressed);
assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
cTableWksp, cTableWkspSize);
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
if (LLtype == set_compressed)
fseMetadata->lastCountSize = countSize;
op += countSize;
fseMetadata->llType = (symbolEncodingType_e) LLtype;
} }
/* build CTable for Offsets */
{ U32 Offtype;
unsigned max = MaxOff;
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
DEBUGLOG(5, "Building OF table");
nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
countWksp, max, mostFrequent, nbSeq,
OffFSELog, prevEntropy->offcodeCTable,
OF_defaultNorm, OF_defaultNormLog,
defaultPolicy, strategy);
assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
cTableWksp, cTableWkspSize);
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
if (Offtype == set_compressed)
fseMetadata->lastCountSize = countSize;
op += countSize;
fseMetadata->ofType = (symbolEncodingType_e) Offtype;
} }
/* build CTable for MatchLengths */
{ U32 MLtype;
unsigned max = MaxML;
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
countWksp, max, mostFrequent, nbSeq,
MLFSELog, prevEntropy->matchlengthCTable,
ML_defaultNorm, ML_defaultNormLog,
ZSTD_defaultAllowed, strategy);
assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
cTableWksp, cTableWkspSize);
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
if (MLtype == set_compressed)
fseMetadata->lastCountSize = countSize;
op += countSize;
fseMetadata->mlType = (symbolEncodingType_e) MLtype;
} }
assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
return op-ostart;
}
/** ZSTD_buildSuperBlockEntropy() :
* Builds entropy for the super-block.
* @return : 0 on success or error code */
static size_t
ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
const ZSTD_entropyCTables_t* prevEntropy,
ZSTD_entropyCTables_t* nextEntropy,
const ZSTD_CCtx_params* cctxParams,
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize)
{
size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
entropyMetadata->hufMetadata.hufDesSize =
ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
&prevEntropy->huf, &nextEntropy->huf,
&entropyMetadata->hufMetadata,
ZSTD_disableLiteralsCompression(cctxParams),
workspace, wkspSize);
FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
entropyMetadata->fseMetadata.fseTablesSize =
ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
&prevEntropy->fse, &nextEntropy->fse,
cctxParams,
&entropyMetadata->fseMetadata,
workspace, wkspSize);
FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
return 0;
}
/** ZSTD_compressSubBlock_literal() : /** ZSTD_compressSubBlock_literal() :
* Compresses literals section for a sub-block. * Compresses literals section for a sub-block.
* When we have to write the Huffman table we will sometimes choose a header * When we have to write the Huffman table we will sometimes choose a header
@ -500,9 +221,9 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
* block, since it isn't worth optimizing. * block, since it isn't worth optimizing.
*/ */
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
if (writeEntropy && fseMetadata->lastCountSize && (bitstreamSize + fseMetadata->lastCountSize) < 4) { if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) {
/* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
assert(bitstreamSize + fseMetadata->lastCountSize == 3); assert(fseMetadata->lastCountSize + bitstreamSize == 3);
DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
"emitting an uncompressed block."); "emitting an uncompressed block.");
return 0; return 0;
@ -577,6 +298,110 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
return op-ostart; return op-ostart;
} }
static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
const ZSTD_hufCTables_t* huf,
const ZSTD_hufCTablesMetadata_t* hufMetadata,
void* workspace, size_t wkspSize,
int writeEntropy)
{
unsigned* const countWksp = (unsigned*)workspace;
unsigned maxSymbolValue = 255;
size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
if (hufMetadata->hType == set_basic) return litSize;
else if (hufMetadata->hType == set_rle) return 1;
else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
if (ZSTD_isError(largest)) return litSize;
{ size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
return cLitSizeEstimate + literalSectionHeaderSize;
} }
assert(0); /* impossible */
return 0;
}
static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
const BYTE* codeTable, unsigned maxCode,
size_t nbSeq, const FSE_CTable* fseCTable,
const U32* additionalBits,
short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
void* workspace, size_t wkspSize)
{
unsigned* const countWksp = (unsigned*)workspace;
const BYTE* ctp = codeTable;
const BYTE* const ctStart = ctp;
const BYTE* const ctEnd = ctStart + nbSeq;
size_t cSymbolTypeSizeEstimateInBits = 0;
unsigned max = maxCode;
HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */
if (type == set_basic) {
/* We selected this encoding type, so it must be valid. */
assert(max <= defaultMax);
cSymbolTypeSizeEstimateInBits = max <= defaultMax
? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max)
: ERROR(GENERIC);
} else if (type == set_rle) {
cSymbolTypeSizeEstimateInBits = 0;
} else if (type == set_compressed || type == set_repeat) {
cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
}
if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10;
while (ctp < ctEnd) {
if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
ctp++;
}
return cSymbolTypeSizeEstimateInBits / 8;
}
static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
const BYTE* llCodeTable,
const BYTE* mlCodeTable,
size_t nbSeq,
const ZSTD_fseCTables_t* fseTables,
const ZSTD_fseCTablesMetadata_t* fseMetadata,
void* workspace, size_t wkspSize,
int writeEntropy)
{
size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
size_t cSeqSizeEstimate = 0;
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
nbSeq, fseTables->offcodeCTable, NULL,
OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
workspace, wkspSize);
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
nbSeq, fseTables->litlengthCTable, LL_bits,
LL_defaultNorm, LL_defaultNormLog, MaxLL,
workspace, wkspSize);
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
nbSeq, fseTables->matchlengthCTable, ML_bits,
ML_defaultNorm, ML_defaultNormLog, MaxML,
workspace, wkspSize);
if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
return cSeqSizeEstimate + sequencesSectionHeaderSize;
}
static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
const BYTE* ofCodeTable,
const BYTE* llCodeTable,
const BYTE* mlCodeTable,
size_t nbSeq,
const ZSTD_entropyCTables_t* entropy,
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize,
int writeLitEntropy, int writeSeqEntropy) {
size_t cSizeEstimate = 0;
cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
&entropy->huf, &entropyMetadata->hufMetadata,
workspace, wkspSize, writeLitEntropy);
cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
workspace, wkspSize, writeSeqEntropy);
return cSizeEstimate + ZSTD_blockHeaderSize;
}
static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
{ {
if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle) if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle)