Merge pull request #2780 from senhuang42/blocksplit_stack_reduce
Reduce stack usage of block splitter
This commit is contained in:
commit
c7afbec4c1
@ -93,4 +93,4 @@ test: libzstd
|
|||||||
|
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean:
|
clean:
|
||||||
$(RM) -rf linux
|
$(RM) -rf linux test/test test/static_test
|
||||||
|
@ -1987,8 +1987,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|||||||
zc->ldmState.loadedDictEnd = 0;
|
zc->ldmState.loadedDictEnd = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
|
|
||||||
DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
|
DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
|
||||||
|
assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
|
||||||
|
|
||||||
zc->initialized = 1;
|
zc->initialized = 1;
|
||||||
|
|
||||||
@ -3341,20 +3341,20 @@ static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
|
|||||||
*
|
*
|
||||||
* Returns the estimated compressed size of the seqStore, or a zstd error.
|
* Returns the estimated compressed size of the seqStore, or a zstd error.
|
||||||
*/
|
*/
|
||||||
static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, const ZSTD_CCtx* zc) {
|
static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) {
|
||||||
ZSTD_entropyCTablesMetadata_t entropyMetadata;
|
ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata;
|
||||||
DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()");
|
DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()");
|
||||||
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
|
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
|
||||||
&zc->blockState.prevCBlock->entropy,
|
&zc->blockState.prevCBlock->entropy,
|
||||||
&zc->blockState.nextCBlock->entropy,
|
&zc->blockState.nextCBlock->entropy,
|
||||||
&zc->appliedParams,
|
&zc->appliedParams,
|
||||||
&entropyMetadata,
|
entropyMetadata,
|
||||||
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
|
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
|
||||||
return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
|
return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
|
||||||
seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
|
seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
|
||||||
(size_t)(seqStore->sequences - seqStore->sequencesStart),
|
(size_t)(seqStore->sequences - seqStore->sequencesStart),
|
||||||
&zc->blockState.nextCBlock->entropy, &entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
|
&zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
|
||||||
(int)(entropyMetadata.hufMetadata.hType == set_compressed), 1);
|
(int)(entropyMetadata->hufMetadata.hType == set_compressed), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns literals bytes represented in a seqStore */
|
/* Returns literals bytes represented in a seqStore */
|
||||||
@ -3553,7 +3553,6 @@ typedef struct {
|
|||||||
} seqStoreSplits;
|
} seqStoreSplits;
|
||||||
|
|
||||||
#define MIN_SEQUENCES_BLOCK_SPLITTING 300
|
#define MIN_SEQUENCES_BLOCK_SPLITTING 300
|
||||||
#define MAX_NB_SPLITS 196
|
|
||||||
|
|
||||||
/* Helper function to perform the recursive search for block splits.
|
/* Helper function to perform the recursive search for block splits.
|
||||||
* Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
|
* Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
|
||||||
@ -3564,30 +3563,31 @@ typedef struct {
|
|||||||
* In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
|
* In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
|
||||||
* In practice, recursion depth usually doesn't go beyond 4.
|
* In practice, recursion depth usually doesn't go beyond 4.
|
||||||
*
|
*
|
||||||
* Furthermore, the number of splits is capped by MAX_NB_SPLITS. At MAX_NB_SPLITS == 196 with the current existing blockSize
|
* Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize
|
||||||
* maximum of 128 KB, this value is actually impossible to reach.
|
* maximum of 128 KB, this value is actually impossible to reach.
|
||||||
*/
|
*/
|
||||||
static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
|
static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
|
||||||
const ZSTD_CCtx* zc, const seqStore_t* origSeqStore) {
|
ZSTD_CCtx* zc, const seqStore_t* origSeqStore) {
|
||||||
seqStore_t fullSeqStoreChunk;
|
seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
|
||||||
seqStore_t firstHalfSeqStore;
|
seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
|
||||||
seqStore_t secondHalfSeqStore;
|
seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
|
||||||
size_t estimatedOriginalSize;
|
size_t estimatedOriginalSize;
|
||||||
size_t estimatedFirstHalfSize;
|
size_t estimatedFirstHalfSize;
|
||||||
size_t estimatedSecondHalfSize;
|
size_t estimatedSecondHalfSize;
|
||||||
size_t midIdx = (startIdx + endIdx)/2;
|
size_t midIdx = (startIdx + endIdx)/2;
|
||||||
|
|
||||||
if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= MAX_NB_SPLITS) {
|
if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) {
|
||||||
DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences");
|
DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ZSTD_deriveSeqStoreChunk(&fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
|
DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
|
||||||
ZSTD_deriveSeqStoreChunk(&firstHalfSeqStore, origSeqStore, startIdx, midIdx);
|
ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
|
||||||
ZSTD_deriveSeqStoreChunk(&secondHalfSeqStore, origSeqStore, midIdx, endIdx);
|
ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx);
|
||||||
estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&fullSeqStoreChunk, zc);
|
ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx);
|
||||||
estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&firstHalfSeqStore, zc);
|
estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc);
|
||||||
estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&secondHalfSeqStore, zc);
|
estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc);
|
||||||
DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
|
estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc);
|
||||||
|
DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
|
||||||
estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
|
estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
|
||||||
if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
|
if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
|
||||||
return;
|
return;
|
||||||
@ -3627,12 +3627,12 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s
|
|||||||
size_t cSize = 0;
|
size_t cSize = 0;
|
||||||
const BYTE* ip = (const BYTE*)src;
|
const BYTE* ip = (const BYTE*)src;
|
||||||
BYTE* op = (BYTE*)dst;
|
BYTE* op = (BYTE*)dst;
|
||||||
U32 partitions[MAX_NB_SPLITS];
|
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
size_t srcBytesTotal = 0;
|
size_t srcBytesTotal = 0;
|
||||||
|
U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
|
||||||
|
seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
|
||||||
|
seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore;
|
||||||
size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
|
size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
|
||||||
seqStore_t nextSeqStore;
|
|
||||||
seqStore_t currSeqStore;
|
|
||||||
|
|
||||||
/* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
|
/* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
|
||||||
* may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
|
* may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
|
||||||
@ -3652,7 +3652,7 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s
|
|||||||
repcodes_t cRep;
|
repcodes_t cRep;
|
||||||
ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
||||||
ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
||||||
ZSTD_memset(&nextSeqStore, 0, sizeof(seqStore_t));
|
ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t));
|
||||||
|
|
||||||
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
||||||
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
|
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
|
||||||
@ -3670,36 +3670,36 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s
|
|||||||
return cSizeSingleBlock;
|
return cSizeSingleBlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
ZSTD_deriveSeqStoreChunk(&currSeqStore, &zc->seqStore, 0, partitions[0]);
|
ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]);
|
||||||
for (i = 0; i <= numSplits; ++i) {
|
for (i = 0; i <= numSplits; ++i) {
|
||||||
size_t srcBytes;
|
size_t srcBytes;
|
||||||
size_t cSizeChunk;
|
size_t cSizeChunk;
|
||||||
U32 const lastPartition = (i == numSplits);
|
U32 const lastPartition = (i == numSplits);
|
||||||
U32 lastBlockEntireSrc = 0;
|
U32 lastBlockEntireSrc = 0;
|
||||||
|
|
||||||
srcBytes = ZSTD_countSeqStoreLiteralsBytes(&currSeqStore) + ZSTD_countSeqStoreMatchBytes(&currSeqStore);
|
srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);
|
||||||
srcBytesTotal += srcBytes;
|
srcBytesTotal += srcBytes;
|
||||||
if (lastPartition) {
|
if (lastPartition) {
|
||||||
/* This is the final partition, need to account for possible last literals */
|
/* This is the final partition, need to account for possible last literals */
|
||||||
srcBytes += blockSize - srcBytesTotal;
|
srcBytes += blockSize - srcBytesTotal;
|
||||||
lastBlockEntireSrc = lastBlock;
|
lastBlockEntireSrc = lastBlock;
|
||||||
} else {
|
} else {
|
||||||
ZSTD_deriveSeqStoreChunk(&nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]);
|
ZSTD_deriveSeqStoreChunk(nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, &currSeqStore,
|
cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, currSeqStore,
|
||||||
&dRep, &cRep,
|
&dRep, &cRep,
|
||||||
op, dstCapacity,
|
op, dstCapacity,
|
||||||
ip, srcBytes,
|
ip, srcBytes,
|
||||||
lastBlockEntireSrc, 1 /* isPartition */);
|
lastBlockEntireSrc, 1 /* isPartition */);
|
||||||
DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&currSeqStore, zc), cSizeChunk);
|
DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk);
|
||||||
FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
|
FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
|
||||||
|
|
||||||
ip += srcBytes;
|
ip += srcBytes;
|
||||||
op += cSizeChunk;
|
op += cSizeChunk;
|
||||||
dstCapacity -= cSizeChunk;
|
dstCapacity -= cSizeChunk;
|
||||||
cSize += cSizeChunk;
|
cSize += cSizeChunk;
|
||||||
currSeqStore = nextSeqStore;
|
*currSeqStore = *nextSeqStore;
|
||||||
assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
|
assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
|
||||||
}
|
}
|
||||||
/* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes
|
/* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes
|
||||||
|
@ -345,6 +345,22 @@ typedef enum {
|
|||||||
ZSTDb_buffered
|
ZSTDb_buffered
|
||||||
} ZSTD_buffered_policy_e;
|
} ZSTD_buffered_policy_e;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Struct that contains all elements of block splitter that should be allocated
|
||||||
|
* in a wksp.
|
||||||
|
*/
|
||||||
|
#define ZSTD_MAX_NB_BLOCK_SPLITS 196
|
||||||
|
typedef struct {
|
||||||
|
seqStore_t fullSeqStoreChunk;
|
||||||
|
seqStore_t firstHalfSeqStore;
|
||||||
|
seqStore_t secondHalfSeqStore;
|
||||||
|
seqStore_t currSeqStore;
|
||||||
|
seqStore_t nextSeqStore;
|
||||||
|
|
||||||
|
U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
|
||||||
|
ZSTD_entropyCTablesMetadata_t entropyMetadata;
|
||||||
|
} ZSTD_blockSplitCtx;
|
||||||
|
|
||||||
struct ZSTD_CCtx_s {
|
struct ZSTD_CCtx_s {
|
||||||
ZSTD_compressionStage_e stage;
|
ZSTD_compressionStage_e stage;
|
||||||
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
|
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
|
||||||
@ -410,6 +426,9 @@ struct ZSTD_CCtx_s {
|
|||||||
#if ZSTD_TRACE
|
#if ZSTD_TRACE
|
||||||
ZSTD_TraceCtx traceCtx;
|
ZSTD_TraceCtx traceCtx;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Workspace for block splitter */
|
||||||
|
ZSTD_blockSplitCtx blockSplitCtx;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
|
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user