Add unit tests and fuzzer param
This commit is contained in:
parent
de52de1347
commit
e2bb215117
@ -29,7 +29,6 @@
|
|||||||
#include "zstd_opt.h"
|
#include "zstd_opt.h"
|
||||||
#include "zstd_ldm.h"
|
#include "zstd_ldm.h"
|
||||||
#include "zstd_compress_superblock.h"
|
#include "zstd_compress_superblock.h"
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
/* ***************************************************************
|
/* ***************************************************************
|
||||||
* Tuning parameters
|
* Tuning parameters
|
||||||
@ -2193,28 +2192,42 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
|
|||||||
return (cctxParams->targetCBlockSize != 0);
|
return (cctxParams->targetCBlockSize != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ZSTD_buildSequencesStatistics():
|
/* ZSTD_useBlockSplitting():
|
||||||
* Returns the size of the statistics for a given set of sequences, or a ZSTD error code
|
* Returns if block splitting param is being used
|
||||||
*/
|
* If used, compression will do best effort to split a block in order to improve compression ratio.
|
||||||
|
* Returns 1 if true, 0 otherwise. */
|
||||||
|
static int ZSTD_useBlockSplitting(const ZSTD_CCtx_params* cctxParams)
|
||||||
|
{
|
||||||
|
DEBUGLOG(5, "ZSTD_useBlockSplitting(splitBlocks=%d)", cctxParams->splitBlocks);
|
||||||
|
return (cctxParams->splitBlocks != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ZSTD_buildSequencesStatistics():
|
||||||
|
* Returns the size of the statistics for a given set of sequences, or a ZSTD error code,
|
||||||
|
*/
|
||||||
MEM_STATIC size_t
|
MEM_STATIC size_t
|
||||||
ZSTD_buildSequencesStatistics(const BYTE* const ofCodeTable,
|
ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
|
||||||
const BYTE* const llCodeTable,
|
|
||||||
const BYTE* const mlCodeTable,
|
|
||||||
FSE_CTable* CTable_LitLength,
|
|
||||||
FSE_CTable* CTable_OffsetBits,
|
|
||||||
FSE_CTable* CTable_MatchLength,
|
|
||||||
size_t nbSeq,
|
|
||||||
const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
|
const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
|
||||||
BYTE* dst, const BYTE* const dstEnd,
|
BYTE* dst, const BYTE* const dstEnd,
|
||||||
ZSTD_strategy strategy, BYTE* lastNCount, ZSTD_fseCTablesMetadata_t* fseMetadata,
|
ZSTD_strategy strategy, BYTE** lastNCount, ZSTD_fseCTablesMetadata_t* fseMetadata,
|
||||||
void* countWorkspace, void* entropyWorkspace, size_t entropyWkspSize) {
|
void* entropyWorkspace, size_t entropyWkspSize) {
|
||||||
U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
|
U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
|
||||||
BYTE* const ostart = dst;
|
BYTE* const ostart = dst;
|
||||||
const BYTE* const oend = dstEnd;
|
const BYTE* const oend = dstEnd;
|
||||||
BYTE* op = ostart;
|
BYTE* op = ostart;
|
||||||
BYTE* seqHead = op++;
|
BYTE* seqHead = op++;
|
||||||
|
|
||||||
|
unsigned* const countWorkspace = (unsigned*)entropyWorkspace;
|
||||||
|
|
||||||
|
FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
|
||||||
|
FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
|
||||||
|
FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
|
||||||
|
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
||||||
|
const BYTE* const llCodeTable = seqStorePtr->llCode;
|
||||||
|
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
||||||
|
|
||||||
|
/* convert length/distances into codes */
|
||||||
|
ZSTD_seqToCodes(seqStorePtr);
|
||||||
assert(op <= oend);
|
assert(op <= oend);
|
||||||
/* build CTable for Literal Lengths */
|
/* build CTable for Literal Lengths */
|
||||||
{ unsigned max = MaxLL;
|
{ unsigned max = MaxLL;
|
||||||
@ -2238,7 +2251,7 @@ ZSTD_buildSequencesStatistics(const BYTE* const ofCodeTable,
|
|||||||
entropyWorkspace, entropyWkspSize);
|
entropyWorkspace, entropyWkspSize);
|
||||||
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
|
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
|
||||||
if (LLtype == set_compressed)
|
if (LLtype == set_compressed)
|
||||||
lastNCount = op;
|
*lastNCount = op;
|
||||||
op += countSize;
|
op += countSize;
|
||||||
if (fseMetadata) {
|
if (fseMetadata) {
|
||||||
if (LLtype == set_compressed) fseMetadata->lastCountSize = countSize;
|
if (LLtype == set_compressed) fseMetadata->lastCountSize = countSize;
|
||||||
@ -2270,7 +2283,7 @@ ZSTD_buildSequencesStatistics(const BYTE* const ofCodeTable,
|
|||||||
entropyWorkspace, entropyWkspSize);
|
entropyWorkspace, entropyWkspSize);
|
||||||
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
|
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
|
||||||
if (Offtype == set_compressed)
|
if (Offtype == set_compressed)
|
||||||
lastNCount = op;
|
*lastNCount = op;
|
||||||
op += countSize;
|
op += countSize;
|
||||||
if (fseMetadata) {
|
if (fseMetadata) {
|
||||||
if (Offtype == set_compressed) fseMetadata->lastCountSize = countSize;
|
if (Offtype == set_compressed) fseMetadata->lastCountSize = countSize;
|
||||||
@ -2300,7 +2313,7 @@ ZSTD_buildSequencesStatistics(const BYTE* const ofCodeTable,
|
|||||||
entropyWorkspace, entropyWkspSize);
|
entropyWorkspace, entropyWkspSize);
|
||||||
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
|
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
|
||||||
if (MLtype == set_compressed)
|
if (MLtype == set_compressed)
|
||||||
lastNCount = op;
|
*lastNCount = op;
|
||||||
op += countSize;
|
op += countSize;
|
||||||
if (fseMetadata) {
|
if (fseMetadata) {
|
||||||
if (MLtype == set_compressed) fseMetadata->lastCountSize = countSize;
|
if (MLtype == set_compressed) fseMetadata->lastCountSize = countSize;
|
||||||
@ -2330,7 +2343,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
|
|||||||
FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
|
FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
|
||||||
FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
|
FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
|
||||||
FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
|
FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
|
||||||
U32 entropyStatisticsSize;
|
size_t entropyStatisticsSize;
|
||||||
const seqDef* const sequences = seqStorePtr->sequencesStart;
|
const seqDef* const sequences = seqStorePtr->sequencesStart;
|
||||||
const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
||||||
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
||||||
@ -2361,7 +2374,6 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
|
|||||||
bmi2);
|
bmi2);
|
||||||
FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
|
FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
|
||||||
assert(cSize <= dstCapacity);
|
assert(cSize <= dstCapacity);
|
||||||
DEBUGLOG(2, "Actual litSize: %zu", cSize);
|
|
||||||
op += cSize;
|
op += cSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2386,14 +2398,11 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
|
|||||||
return (size_t)(op - ostart);
|
return (size_t)(op - ostart);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* convert length/distances into codes */
|
|
||||||
ZSTD_seqToCodes(seqStorePtr);
|
|
||||||
/* build stats for sequences */
|
/* build stats for sequences */
|
||||||
entropyStatisticsSize = ZSTD_buildSequencesStatistics(ofCodeTable, llCodeTable, mlCodeTable,
|
entropyStatisticsSize = ZSTD_buildSequencesStatistics(seqStorePtr,
|
||||||
CTable_LitLength, CTable_OffsetBits, CTable_MatchLength,
|
|
||||||
nbSeq, &prevEntropy->fse, &nextEntropy->fse, op, oend,
|
nbSeq, &prevEntropy->fse, &nextEntropy->fse, op, oend,
|
||||||
strategy, lastNCount, NULL /* no fseMetadata needed */,
|
strategy, &lastNCount, NULL /* no fseMetadata needed */,
|
||||||
count, entropyWorkspace, entropyWkspSize);
|
entropyWorkspace, entropyWkspSize);
|
||||||
FORWARD_IF_ERROR(entropyStatisticsSize, "FSE statistics building failed!");
|
FORWARD_IF_ERROR(entropyStatisticsSize, "FSE statistics building failed!");
|
||||||
op += entropyStatisticsSize;
|
op += entropyStatisticsSize;
|
||||||
|
|
||||||
@ -2422,7 +2431,6 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
|
|||||||
"emitting an uncompressed block.");
|
"emitting an uncompressed block.");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
DEBUGLOG(2, "Actual seqSize: %zu", bitstreamSize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
|
DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
|
||||||
@ -2762,7 +2770,7 @@ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastB
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** ZSTD_buildBlockEntropyStats_literals() :
|
/** ZSTD_buildBlockEntropyStats_literals() :
|
||||||
* Builds entropy for the super-block literals.
|
* Builds entropy for the literals.
|
||||||
* Stores literals block type (raw, rle, compressed, repeat) and
|
* Stores literals block type (raw, rle, compressed, repeat) and
|
||||||
* huffman description table to hufMetadata.
|
* huffman description table to hufMetadata.
|
||||||
* @return : size of huffman description table or error code */
|
* @return : size of huffman description table or error code */
|
||||||
@ -2780,7 +2788,7 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
|
|||||||
const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
|
const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
|
||||||
BYTE* const nodeWksp = countWkspStart + countWkspSize;
|
BYTE* const nodeWksp = countWkspStart + countWkspSize;
|
||||||
const size_t nodeWkspSize = wkspEnd-nodeWksp;
|
const size_t nodeWkspSize = wkspEnd-nodeWksp;
|
||||||
unsigned maxSymbolValue = 255;
|
unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
|
||||||
unsigned huffLog = HUF_TABLELOG_DEFAULT;
|
unsigned huffLog = HUF_TABLELOG_DEFAULT;
|
||||||
HUF_repeat repeat = prevHuf->repeatMode;
|
HUF_repeat repeat = prevHuf->repeatMode;
|
||||||
DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);
|
DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);
|
||||||
@ -2795,7 +2803,9 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* small ? don't even attempt compression (speed opt) */
|
/* small ? don't even attempt compression (speed opt) */
|
||||||
# define COMPRESS_LITERALS_SIZE_MIN 63
|
#ifndef COMPRESS_LITERALS_SIZE_MIN
|
||||||
|
#define COMPRESS_LITERALS_SIZE_MIN 63
|
||||||
|
#endif
|
||||||
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
|
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
|
||||||
if (srcSize <= minLitSize) {
|
if (srcSize <= minLitSize) {
|
||||||
DEBUGLOG(5, "set_basic - too small");
|
DEBUGLOG(5, "set_basic - too small");
|
||||||
@ -2864,7 +2874,7 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** ZSTD_buildBlockEntropyStats_sequences() :
|
/** ZSTD_buildBlockEntropyStats_sequences() :
|
||||||
* Builds entropy for the super-block sequences.
|
* Builds entropy for the sequences.
|
||||||
* Stores symbol compression modes and fse table to fseMetadata.
|
* Stores symbol compression modes and fse table to fseMetadata.
|
||||||
* @return : size of fse tables or error code */
|
* @return : size of fse tables or error code */
|
||||||
static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
|
static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
|
||||||
@ -2874,38 +2884,19 @@ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
|
|||||||
ZSTD_fseCTablesMetadata_t* fseMetadata,
|
ZSTD_fseCTablesMetadata_t* fseMetadata,
|
||||||
void* workspace, size_t wkspSize)
|
void* workspace, size_t wkspSize)
|
||||||
{
|
{
|
||||||
/* Size the workspaces */
|
|
||||||
BYTE* const wkspStart = (BYTE*)workspace;
|
|
||||||
BYTE* const wkspEnd = wkspStart + wkspSize;
|
|
||||||
BYTE* const countWkspStart = wkspStart;
|
|
||||||
unsigned* const countWksp = (unsigned*)workspace;
|
|
||||||
const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
|
|
||||||
BYTE* const cTableWksp = countWkspStart + countWkspSize;
|
|
||||||
const size_t cTableWkspSize = wkspEnd-cTableWksp;
|
|
||||||
|
|
||||||
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
||||||
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
|
||||||
const BYTE* const llCodeTable = seqStorePtr->llCode;
|
|
||||||
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
|
||||||
FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
|
|
||||||
FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
|
|
||||||
FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
|
|
||||||
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
||||||
BYTE* const ostart = fseMetadata->fseTablesBuffer;
|
BYTE* const ostart = fseMetadata->fseTablesBuffer;
|
||||||
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
|
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
|
||||||
BYTE* op = ostart;
|
BYTE* op = ostart;
|
||||||
BYTE* lastNCount = NULL;
|
BYTE* lastNCount = NULL;
|
||||||
|
|
||||||
assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
|
|
||||||
DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq);
|
DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq);
|
||||||
ZSTD_memset(workspace, 0, wkspSize);
|
ZSTD_memset(workspace, 0, wkspSize);
|
||||||
|
|
||||||
fseMetadata->lastCountSize = 0;
|
fseMetadata->lastCountSize = 0;
|
||||||
return ZSTD_buildSequencesStatistics(ofCodeTable, llCodeTable, mlCodeTable,
|
return ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, prevEntropy, nextEntropy, op, oend,
|
||||||
CTable_LitLength, CTable_OffsetBits, CTable_MatchLength,
|
strategy, &lastNCount, fseMetadata,
|
||||||
nbSeq, prevEntropy, nextEntropy, op, oend,
|
workspace, wkspSize);
|
||||||
strategy, lastNCount, fseMetadata,
|
|
||||||
countWksp, cTableWksp, cTableWkspSize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2937,6 +2928,7 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Returns the size estimate for the literals section (header + content) of a block */
|
||||||
static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
|
static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
|
||||||
const ZSTD_hufCTables_t* huf,
|
const ZSTD_hufCTables_t* huf,
|
||||||
const ZSTD_hufCTablesMetadata_t* hufMetadata,
|
const ZSTD_hufCTablesMetadata_t* hufMetadata,
|
||||||
@ -2962,6 +2954,7 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
|
||||||
static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
||||||
const BYTE* codeTable, unsigned maxCode,
|
const BYTE* codeTable, unsigned maxCode,
|
||||||
size_t nbSeq, const FSE_CTable* fseCTable,
|
size_t nbSeq, const FSE_CTable* fseCTable,
|
||||||
@ -2989,7 +2982,6 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
|||||||
cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
|
cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
|
||||||
}
|
}
|
||||||
if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) {
|
if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) {
|
||||||
DEBUGLOG(2, "Returning inaccurate");
|
|
||||||
return nbSeq * 10;
|
return nbSeq * 10;
|
||||||
}
|
}
|
||||||
while (ctp < ctEnd) {
|
while (ctp < ctEnd) {
|
||||||
@ -2997,9 +2989,10 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
|||||||
else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
|
else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
|
||||||
ctp++;
|
ctp++;
|
||||||
}
|
}
|
||||||
return cSymbolTypeSizeEstimateInBits / 8;
|
return cSymbolTypeSizeEstimateInBits >> 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Returns the size estimate for the sequences section (header + content) of a block */
|
||||||
static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
|
static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
|
||||||
const BYTE* llCodeTable,
|
const BYTE* llCodeTable,
|
||||||
const BYTE* mlCodeTable,
|
const BYTE* mlCodeTable,
|
||||||
@ -3009,7 +3002,7 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
|
|||||||
void* workspace, size_t wkspSize,
|
void* workspace, size_t wkspSize,
|
||||||
int writeEntropy)
|
int writeEntropy)
|
||||||
{
|
{
|
||||||
size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
|
size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);
|
||||||
size_t cSeqSizeEstimate = 0;
|
size_t cSeqSizeEstimate = 0;
|
||||||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
|
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
|
||||||
nbSeq, fseTables->offcodeCTable, NULL,
|
nbSeq, fseTables->offcodeCTable, NULL,
|
||||||
@ -3027,6 +3020,7 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
|
|||||||
return cSeqSizeEstimate + sequencesSectionHeaderSize;
|
return cSeqSizeEstimate + sequencesSectionHeaderSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Returns the size estimate for a given stream of literals, of, ll, ml */
|
||||||
size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
|
size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
|
||||||
const BYTE* ofCodeTable,
|
const BYTE* ofCodeTable,
|
||||||
const BYTE* llCodeTable,
|
const BYTE* llCodeTable,
|
||||||
@ -3098,21 +3092,21 @@ static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* seqStore) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx).
|
/* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx).
|
||||||
|
* Stores the result in resultSeqStore.
|
||||||
*/
|
*/
|
||||||
static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, const seqStore_t* originalSeqStore,
|
static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, const seqStore_t* originalSeqStore,
|
||||||
size_t startIdx, size_t endIdx) {
|
size_t startIdx, size_t endIdx) {
|
||||||
BYTE* const litEnd = originalSeqStore->lit;
|
BYTE* const litEnd = originalSeqStore->lit;
|
||||||
seqDef* const seqEnd = originalSeqStore->sequences;
|
|
||||||
U32 literalsBytes;
|
U32 literalsBytes;
|
||||||
U32 literalsBytesPreceding = 0;
|
U32 literalsBytesPreceding = 0;
|
||||||
|
|
||||||
*resultSeqStore = *originalSeqStore;
|
*resultSeqStore = *originalSeqStore;
|
||||||
/* First calculate the number of literal bytes before startIdx */
|
|
||||||
if (startIdx > 0) {
|
if (startIdx > 0) {
|
||||||
resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;
|
resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;
|
||||||
literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
|
literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Move longLengthPos into the correct position if necessary */
|
||||||
if (originalSeqStore->longLengthID != 0) {
|
if (originalSeqStore->longLengthID != 0) {
|
||||||
if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) {
|
if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) {
|
||||||
resultSeqStore->longLengthID = 0;
|
resultSeqStore->longLengthID = 0;
|
||||||
@ -3125,6 +3119,7 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, const seqStore_
|
|||||||
literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
|
literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
|
||||||
resultSeqStore->litStart += literalsBytesPreceding;
|
resultSeqStore->litStart += literalsBytesPreceding;
|
||||||
if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {
|
if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {
|
||||||
|
/* This accounts for possible last literals if the derived chunk reaches the end of the block */
|
||||||
resultSeqStore->lit = litEnd;
|
resultSeqStore->lit = litEnd;
|
||||||
} else {
|
} else {
|
||||||
resultSeqStore->lit = resultSeqStore->litStart+literalsBytes;
|
resultSeqStore->lit = resultSeqStore->litStart+literalsBytes;
|
||||||
@ -3137,14 +3132,14 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, const seqStore_
|
|||||||
/* ZSTD_compressSequences_singleBlock():
|
/* ZSTD_compressSequences_singleBlock():
|
||||||
* Compresses a seqStore into a block with a block header, into the buffer dst.
|
* Compresses a seqStore into a block with a block header, into the buffer dst.
|
||||||
*
|
*
|
||||||
* Returns the size of that block or a ZSTD error code
|
* Returns the total size of that block (including header) or a ZSTD error code.
|
||||||
|
*
|
||||||
|
* TODO: Migrate compressBlock_internal and compressSequences_internal to use this as well
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* TODO: Migrate compressBlock_internal and compressSequences_internal to use this as well */
|
|
||||||
static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqStore,
|
static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqStore,
|
||||||
void* dst, size_t dstCapacity,
|
void* dst, size_t dstCapacity,
|
||||||
const void* src, size_t srcSize,
|
const void* src, size_t srcSize,
|
||||||
U32 lastBlock) {
|
U32 lastBlock) {
|
||||||
const U32 rleMaxLength = 25;
|
const U32 rleMaxLength = 25;
|
||||||
BYTE* op = (BYTE*)dst;
|
BYTE* op = (BYTE*)dst;
|
||||||
const BYTE* ip = (const BYTE*)src;
|
const BYTE* ip = (const BYTE*)src;
|
||||||
@ -3159,7 +3154,6 @@ static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqS
|
|||||||
|
|
||||||
if (!zc->isFirstBlock &&
|
if (!zc->isFirstBlock &&
|
||||||
cSeqsSize < rleMaxLength &&
|
cSeqsSize < rleMaxLength &&
|
||||||
ZSTD_maybeRLE(seqStore) &&
|
|
||||||
ZSTD_isRLE((BYTE const*)src, srcSize)) {
|
ZSTD_isRLE((BYTE const*)src, srcSize)) {
|
||||||
/* We don't want to emit our first block as a RLE even if it qualifies because
|
/* We don't want to emit our first block as a RLE even if it qualifies because
|
||||||
* doing so will cause the decoder (cli only) to throw a "should consume all input error."
|
* doing so will cause the decoder (cli only) to throw a "should consume all input error."
|
||||||
@ -3179,17 +3173,17 @@ static size_t ZSTD_compressSequences_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqS
|
|||||||
if (cSeqsSize == 0) {
|
if (cSeqsSize == 0) {
|
||||||
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
|
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
|
||||||
FORWARD_IF_ERROR(cSize, "Nocompress block failed");
|
FORWARD_IF_ERROR(cSize, "Nocompress block failed");
|
||||||
DEBUGLOG(2, "1: Writing out nocompress block, size: %zu", cSize);
|
DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize);
|
||||||
} else if (cSeqsSize == 1) {
|
} else if (cSeqsSize == 1) {
|
||||||
cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock);
|
cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock);
|
||||||
FORWARD_IF_ERROR(cSize, "RLE compress block failed");
|
FORWARD_IF_ERROR(cSize, "RLE compress block failed");
|
||||||
DEBUGLOG(2, "1: Writing out RLE block, size: %zu", cSize);
|
DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize);
|
||||||
} else {
|
} else {
|
||||||
/* Error checking and repcodes update */
|
/* Error checking and repcodes update */
|
||||||
ZSTD_confirmRepcodesAndEntropyTables(zc);
|
ZSTD_confirmRepcodesAndEntropyTables(zc);
|
||||||
writeBlockHeader(op, cSeqsSize, srcSize, lastBlock);
|
writeBlockHeader(op, cSeqsSize, srcSize, lastBlock);
|
||||||
cSize = ZSTD_blockHeaderSize + cSeqsSize;
|
cSize = ZSTD_blockHeaderSize + cSeqsSize;
|
||||||
DEBUGLOG(3, "1: Writing out compressed block, size: %zu", cSize);
|
DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize);
|
||||||
}
|
}
|
||||||
return cSize;
|
return cSize;
|
||||||
}
|
}
|
||||||
@ -3204,117 +3198,149 @@ typedef struct {
|
|||||||
#define MAX_NB_SPLITS 196
|
#define MAX_NB_SPLITS 196
|
||||||
|
|
||||||
/* Helper function to perform the recursive search for block splits.
|
/* Helper function to perform the recursive search for block splits.
|
||||||
* Estimates the cost of the original seqStore, and estimates the cost of splitting the sequences in half.
|
* Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
|
||||||
* If advantageous to split, then we recursive down the two sub-blocks.
|
* If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then
|
||||||
* The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.
|
* we do not recurse.
|
||||||
* In practice, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
|
|
||||||
*
|
*
|
||||||
* Returns the sequence index at which to split, or 0 if we should not split.
|
* Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.
|
||||||
|
* In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
|
||||||
|
* In practice, recursion depth usually doesn't go beyond 4.
|
||||||
|
*
|
||||||
|
* Furthermore, the number of splits is capped by MAX_NB_SPLITS. At MAX_NB_SPLITS == 196 with the current existing blockSize
|
||||||
|
* maximum of 128 KB, this value is actually impossible to reach.
|
||||||
*/
|
*/
|
||||||
static size_t deriveBlockBoundsHelper(ZSTD_CCtx* zc, seqStoreSplits* splits, size_t startIdx, size_t endIdx, const seqStore_t* origSeqStore) {
|
static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
|
||||||
seqStore_t origSeqStoreChunk;
|
const ZSTD_CCtx* zc, const seqStore_t* origSeqStore) {
|
||||||
|
seqStore_t fullSeqStoreChunk;
|
||||||
seqStore_t firstHalfSeqStore;
|
seqStore_t firstHalfSeqStore;
|
||||||
seqStore_t secondHalfSeqStore;
|
seqStore_t secondHalfSeqStore;
|
||||||
size_t estimatedOriginalSize;
|
size_t estimatedOriginalSize;
|
||||||
size_t estimatedFirstHalfSize;
|
size_t estimatedFirstHalfSize;
|
||||||
size_t estimatedSecondHalfSize;
|
size_t estimatedSecondHalfSize;
|
||||||
|
size_t midIdx = (startIdx + endIdx)/2;
|
||||||
|
|
||||||
if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= MAX_NB_SPLITS) {
|
if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= MAX_NB_SPLITS) {
|
||||||
return 0;
|
return;
|
||||||
}
|
}
|
||||||
ZSTD_deriveSeqStoreChunk(&origSeqStoreChunk, origSeqStore, startIdx, endIdx);
|
ZSTD_deriveSeqStoreChunk(&fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
|
||||||
ZSTD_deriveSeqStoreChunk(&firstHalfSeqStore, origSeqStore, startIdx, (startIdx + endIdx)/2);
|
ZSTD_deriveSeqStoreChunk(&firstHalfSeqStore, origSeqStore, startIdx, midIdx);
|
||||||
ZSTD_deriveSeqStoreChunk(&secondHalfSeqStore, origSeqStore, (startIdx + endIdx)/2, endIdx);
|
ZSTD_deriveSeqStoreChunk(&secondHalfSeqStore, origSeqStore, midIdx, endIdx);
|
||||||
estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&origSeqStoreChunk, zc);
|
estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&fullSeqStoreChunk, zc);
|
||||||
estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&firstHalfSeqStore, zc);
|
estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&firstHalfSeqStore, zc);
|
||||||
estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&secondHalfSeqStore, zc);
|
estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&secondHalfSeqStore, zc);
|
||||||
|
if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {
|
if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {
|
||||||
deriveBlockBoundsHelper(zc, splits, startIdx, (startIdx + endIdx)/2, origSeqStore);
|
ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore);
|
||||||
splits->splitLocations[splits->idx] = (startIdx + endIdx)/2;
|
splits->splitLocations[splits->idx] = midIdx;
|
||||||
splits->idx++;
|
splits->idx++;
|
||||||
deriveBlockBoundsHelper(zc, splits, (startIdx + endIdx)/2, endIdx, origSeqStore);
|
ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore);
|
||||||
return (startIdx + endIdx)/2;
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Base recursive function. Populates a table of partitions indices.
|
/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio.
|
||||||
*
|
*
|
||||||
* Returns the number of splits made (which equals the size of the partition table - 1).
|
* Returns the number of splits made (which equals the size of the partition table - 1).
|
||||||
*/
|
*/
|
||||||
static size_t deriveBlockBounds(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
|
static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) {
|
||||||
const void* src, size_t srcSize,
|
seqStoreSplits splits = {partitions, 0};
|
||||||
U32 partitions[], U32 nbSeq) {
|
if (nbSeq <= 4) {
|
||||||
seqStoreSplits splits;
|
/* Refuse to try and split anything with less than 4 sequences */
|
||||||
splits.idx = 0;
|
return 0;
|
||||||
splits.splitLocations = partitions;
|
}
|
||||||
|
ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore);
|
||||||
deriveBlockBoundsHelper(zc, &splits, 0, nbSeq, &zc->seqStore);
|
|
||||||
splits.splitLocations[splits.idx] = nbSeq;
|
splits.splitLocations[splits.idx] = nbSeq;
|
||||||
return splits.idx;
|
return splits.idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ZSTD_compressBlock_splitBlock():
|
/* ZSTD_compressBlock_splitBlock():
|
||||||
* Attempts to split a given block into multiple (currently 2) blocks to improve compression ratio.
|
* Attempts to split a given block into multiple blocks to improve compression ratio.
|
||||||
*
|
*
|
||||||
* Returns 0 if it would not be advantageous to split the block. Otherwise, returns the combined size
|
* Returns combined size of all blocks (which includes headers), or a ZSTD error code.
|
||||||
* of the multiple blocks, or a ZSTD error code.
|
|
||||||
*/
|
*/
|
||||||
static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
|
static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
|
||||||
void* dst, size_t dstCapacity,
|
const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) {
|
||||||
const void* src, size_t srcSize, U32 lastBlock, U32 nbSeq) {
|
|
||||||
size_t cSize = 0;
|
size_t cSize = 0;
|
||||||
const BYTE* ip = (const BYTE*)src;
|
const BYTE* ip = (const BYTE*)src;
|
||||||
BYTE* op = (BYTE*)dst;
|
BYTE* op = (BYTE*)dst;
|
||||||
seqStore_t firstHalfSeqStore;
|
U32 partitions[MAX_NB_SPLITS];
|
||||||
seqStore_t secondHalfSeqStore;
|
size_t i = 0;
|
||||||
size_t cSizeFirstHalf;
|
size_t startIdx = 0;
|
||||||
size_t cSizeSecondHalf;
|
size_t endIdx;
|
||||||
|
size_t srcBytesTotal = 0;
|
||||||
|
size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
|
||||||
|
|
||||||
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
||||||
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
|
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
|
||||||
(unsigned)zc->blockState.matchState.nextToUpdate);
|
(unsigned)zc->blockState.matchState.nextToUpdate);
|
||||||
{
|
|
||||||
U32 partitions[MAX_NB_SPLITS];
|
|
||||||
size_t numSplits = deriveBlockBounds(zc, dst, dstCapacity, src, srcSize, partitions, nbSeq);
|
|
||||||
size_t i = 0;
|
|
||||||
size_t startIdx = 0;
|
|
||||||
size_t endIdx = 0;
|
|
||||||
size_t srcBytesCum = 0;
|
|
||||||
|
|
||||||
if (numSplits == 0) {
|
if (numSplits == 0) {
|
||||||
return 0;
|
size_t cSizeSingleBlock = ZSTD_compressSequences_singleBlock(zc, &zc->seqStore, op, dstCapacity, ip, blockSize, lastBlock);
|
||||||
}
|
FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
|
||||||
while (i <= numSplits) {
|
return cSizeSingleBlock;
|
||||||
endIdx = partitions[i];
|
|
||||||
seqStore_t chunkSeqStore = zc->seqStore;
|
|
||||||
ZSTD_deriveSeqStoreChunk(&chunkSeqStore, &zc->seqStore, startIdx, endIdx);
|
|
||||||
size_t srcBytes = ZSTD_countSeqStoreLiteralsBytes(&chunkSeqStore) + ZSTD_countSeqStoreMatchBytes(&chunkSeqStore);
|
|
||||||
size_t lastBlock = lastBlock && (nbSeq == endIdx);
|
|
||||||
srcBytesCum += srcBytes;
|
|
||||||
if (endIdx == nbSeq) {
|
|
||||||
/* This is the final partition, need to account for last literals */
|
|
||||||
srcBytes += zc->blockSize - srcBytesCum;
|
|
||||||
srcBytesCum += zc->blockSize - srcBytesCum;
|
|
||||||
}
|
|
||||||
size_t cSizeChunk = ZSTD_compressSequences_singleBlock(zc, &chunkSeqStore, op, dstCapacity, ip, srcBytes, lastBlock);
|
|
||||||
{
|
|
||||||
ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, sizeof(U32)*ZSTD_REP_NUM);
|
|
||||||
ip += srcBytes;
|
|
||||||
op += cSizeChunk;
|
|
||||||
dstCapacity -= cSizeChunk;
|
|
||||||
}
|
|
||||||
startIdx = partitions[i];
|
|
||||||
cSize += cSizeChunk;
|
|
||||||
++i;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (i = 0; i <= numSplits; ++i) {
|
||||||
|
seqStore_t chunkSeqStore;
|
||||||
|
size_t srcBytes;
|
||||||
|
size_t cSizeChunk;
|
||||||
|
U32 lastBlockActual;
|
||||||
|
|
||||||
|
endIdx = partitions[i];
|
||||||
|
ZSTD_deriveSeqStoreChunk(&chunkSeqStore, &zc->seqStore, startIdx, endIdx);
|
||||||
|
srcBytes = ZSTD_countSeqStoreLiteralsBytes(&chunkSeqStore) + ZSTD_countSeqStoreMatchBytes(&chunkSeqStore);
|
||||||
|
lastBlockActual = lastBlock && (nbSeq == endIdx);
|
||||||
|
srcBytesTotal += srcBytes;
|
||||||
|
if (i == numSplits) {
|
||||||
|
/* This is the final partition, need to account for possible last literals */
|
||||||
|
srcBytes += blockSize - srcBytesTotal;
|
||||||
|
}
|
||||||
|
|
||||||
|
cSizeChunk = ZSTD_compressSequences_singleBlock(zc, &chunkSeqStore, op, dstCapacity, ip, srcBytes, lastBlockActual);
|
||||||
|
FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
|
||||||
|
ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, sizeof(U32)*ZSTD_REP_NUM);
|
||||||
|
|
||||||
|
ip += srcBytes;
|
||||||
|
op += cSizeChunk;
|
||||||
|
dstCapacity -= cSizeChunk;
|
||||||
|
cSize += cSizeChunk;
|
||||||
|
startIdx = partitions[i];
|
||||||
|
}
|
||||||
|
return cSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
|
||||||
|
void* dst, size_t dstCapacity,
|
||||||
|
const void* src, size_t srcSize, U32 lastBlock) {
|
||||||
|
const BYTE* ip = (const BYTE*)src;
|
||||||
|
BYTE* op = (BYTE*)dst;
|
||||||
|
size_t nbSeq;
|
||||||
|
size_t cSize;
|
||||||
|
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
|
||||||
|
|
||||||
|
{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
|
||||||
|
FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
|
||||||
|
if (bss == ZSTDbss_noCompress) {
|
||||||
|
cSize = 0;
|
||||||
|
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
|
||||||
|
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
|
||||||
|
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
|
||||||
|
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
|
||||||
|
return cSize;
|
||||||
|
}
|
||||||
|
nbSeq = (size_t)(zc->seqStore.sequences - zc->seqStore.sequencesStart);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(zc->appliedParams.splitBlocks == 1);
|
||||||
|
cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq);
|
||||||
|
FORWARD_IF_ERROR(cSize, "Splitting blocks failed!");
|
||||||
return cSize;
|
return cSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
||||||
void* dst, size_t dstCapacity,
|
void* dst, size_t dstCapacity,
|
||||||
const void* src, size_t srcSize, U32 frame, U32 lastBlock)
|
const void* src, size_t srcSize, U32 frame)
|
||||||
{
|
{
|
||||||
/* This the upper bound for the length of an rle block.
|
/* This the upper bound for the length of an rle block.
|
||||||
* This isn't the actual upper bound. Finding the real threshold
|
* This isn't the actual upper bound. Finding the real threshold
|
||||||
@ -3322,7 +3348,6 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
|||||||
*/
|
*/
|
||||||
const U32 rleMaxLength = 25;
|
const U32 rleMaxLength = 25;
|
||||||
size_t cSize;
|
size_t cSize;
|
||||||
size_t nbSeq;
|
|
||||||
const BYTE* ip = (const BYTE*)src;
|
const BYTE* ip = (const BYTE*)src;
|
||||||
BYTE* op = (BYTE*)dst;
|
BYTE* op = (BYTE*)dst;
|
||||||
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
||||||
@ -3332,23 +3357,19 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
|||||||
{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
|
{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
|
||||||
FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
|
FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
|
||||||
if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
|
if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
|
||||||
nbSeq = (size_t)(zc->seqStore.sequences - zc->seqStore.sequencesStart);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
zc->appliedParams.splitBlocks = 1; /* remove */
|
if (zc->seqCollector.collectSequences) {
|
||||||
if (zc->appliedParams.splitBlocks && nbSeq >= 2) {
|
ZSTD_copyBlockSequences(zc);
|
||||||
size_t splitBlocksCompressedSize;
|
ZSTD_confirmRepcodesAndEntropyTables(zc);
|
||||||
splitBlocksCompressedSize = ZSTD_compressBlock_splitBlock(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq);
|
return 0;
|
||||||
if (splitBlocksCompressedSize != 0) {
|
|
||||||
return splitBlocksCompressedSize;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* encode sequences and literals */
|
/* encode sequences and literals */
|
||||||
cSize = ZSTD_entropyCompressSequences(&zc->seqStore,
|
cSize = ZSTD_entropyCompressSequences(&zc->seqStore,
|
||||||
&zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
|
&zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
|
||||||
&zc->appliedParams,
|
&zc->appliedParams,
|
||||||
op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
|
dst, dstCapacity,
|
||||||
srcSize,
|
srcSize,
|
||||||
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
|
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
|
||||||
zc->bmi2);
|
zc->bmi2);
|
||||||
@ -3382,13 +3403,7 @@ out:
|
|||||||
*/
|
*/
|
||||||
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
|
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
|
||||||
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
|
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
|
||||||
if (cSize == 0) {
|
|
||||||
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
|
|
||||||
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
|
|
||||||
} else {
|
|
||||||
writeBlockHeader(op, cSize, srcSize, lastBlock);
|
|
||||||
cSize += ZSTD_blockHeaderSize;
|
|
||||||
}
|
|
||||||
return cSize;
|
return cSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3498,7 +3513,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
|
|||||||
* Frame is supposed already started (header already produced)
|
* Frame is supposed already started (header already produced)
|
||||||
* @return : compressed size, or an error code
|
* @return : compressed size, or an error code
|
||||||
*/
|
*/
|
||||||
static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
|
||||||
void* dst, size_t dstCapacity,
|
void* dst, size_t dstCapacity,
|
||||||
const void* src, size_t srcSize,
|
const void* src, size_t srcSize,
|
||||||
U32 lastFrameChunk)
|
U32 lastFrameChunk)
|
||||||
@ -3538,13 +3553,29 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
|||||||
FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
|
FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
|
||||||
assert(cSize > 0);
|
assert(cSize > 0);
|
||||||
assert(cSize <= blockSize + ZSTD_blockHeaderSize);
|
assert(cSize <= blockSize + ZSTD_blockHeaderSize);
|
||||||
|
} else if (ZSTD_useBlockSplitting(&cctx->appliedParams)) {
|
||||||
|
cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock);
|
||||||
|
FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed");
|
||||||
|
assert(cSize > 0);
|
||||||
} else {
|
} else {
|
||||||
cSize = ZSTD_compressBlock_internal(cctx,
|
cSize = ZSTD_compressBlock_internal(cctx,
|
||||||
op, dstCapacity,
|
op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
|
||||||
ip, blockSize, 1 /* frame */, lastBlock);
|
ip, blockSize, 1 /* frame */);
|
||||||
FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed");
|
FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed");
|
||||||
|
|
||||||
|
if (cSize == 0) { /* block is not compressible */
|
||||||
|
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
|
||||||
|
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
|
||||||
|
} else {
|
||||||
|
U32 const cBlockHeader = cSize == 1 ?
|
||||||
|
lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
|
||||||
|
lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
|
||||||
|
MEM_writeLE24(op, cBlockHeader);
|
||||||
|
cSize += ZSTD_blockHeaderSize;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ip += blockSize;
|
ip += blockSize;
|
||||||
assert(remaining >= blockSize);
|
assert(remaining >= blockSize);
|
||||||
remaining -= blockSize;
|
remaining -= blockSize;
|
||||||
@ -3552,7 +3583,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
|||||||
assert(dstCapacity >= cSize);
|
assert(dstCapacity >= cSize);
|
||||||
dstCapacity -= cSize;
|
dstCapacity -= cSize;
|
||||||
cctx->isFirstBlock = 0;
|
cctx->isFirstBlock = 0;
|
||||||
DEBUGLOG(2, "ZSTD_compress_frameChunk: adding a block of size %u",
|
DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
|
||||||
(unsigned)cSize);
|
(unsigned)cSize);
|
||||||
} }
|
} }
|
||||||
|
|
||||||
@ -3698,7 +3729,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
|
|||||||
DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
|
DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
|
||||||
{ size_t const cSize = frame ?
|
{ size_t const cSize = frame ?
|
||||||
ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
|
ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
|
||||||
ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */, 0);
|
ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
|
||||||
FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed");
|
FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed");
|
||||||
cctx->consumedSrcSize += srcSize;
|
cctx->consumedSrcSize += srcSize;
|
||||||
cctx->producedCSize += (cSize + fhSize);
|
cctx->producedCSize += (cSize + fhSize);
|
||||||
|
@ -94,6 +94,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer
|
|||||||
setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer);
|
setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer);
|
||||||
setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer);
|
setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer);
|
||||||
setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
|
setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
|
||||||
|
setRand(cctx, ZSTD_c_splitBlocks, 0, 1, producer);
|
||||||
if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
|
if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
|
||||||
setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
|
setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
|
||||||
}
|
}
|
||||||
|
@ -1544,6 +1544,15 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
|||||||
ZSTD_freeCCtx(cctx);
|
ZSTD_freeCCtx(cctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DISPLAYLEVEL(3, "test%3i : compress with block splitting : ", testNb++)
|
||||||
|
{ ZSTD_CCtx* cctx = ZSTD_createCCtx();
|
||||||
|
CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_splitBlocks, 1) );
|
||||||
|
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
|
||||||
|
CHECK(cSize);
|
||||||
|
ZSTD_freeCCtx(cctx);
|
||||||
|
}
|
||||||
|
DISPLAYLEVEL(3, "OK \n");
|
||||||
|
|
||||||
DISPLAYLEVEL(3, "test%3i : compress -T2 with/without literals compression : ", testNb++)
|
DISPLAYLEVEL(3, "test%3i : compress -T2 with/without literals compression : ", testNb++)
|
||||||
{ ZSTD_CCtx* cctx = ZSTD_createCCtx();
|
{ ZSTD_CCtx* cctx = ZSTD_createCCtx();
|
||||||
size_t cSize1, cSize2;
|
size_t cSize1, cSize2;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user