Merge pull request #1301 from terrelln/lit-size

[zstd] Fix seqStore growth
dev
Yann Collet 2018-08-28 17:10:25 -07:00 committed by GitHub
commit 31ebb26945
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 61 additions and 10 deletions

View File

@ -193,6 +193,7 @@ typedef struct {
BYTE* mlCode;
BYTE* ofCode;
size_t maxNbSeq;
size_t maxNbLit;
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
U32 longLengthPos;
} seqStore_t;

View File

@ -805,7 +805,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
U32 const divider = (cParams.searchLength==3) ? 3 : 4;
size_t const maxNbSeq = blockSize / divider;
size_t const tokenSpace = blockSize + 11*maxNbSeq;
size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
size_t const entropySpace = HUF_WORKSPACE_SIZE;
size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t);
size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1);
@ -932,6 +932,7 @@ typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e;
* check internal buffers exist for streaming if buffPol == ZSTDb_buffered .
* Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */
static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t maxNbSeq1,
size_t maxNbLit1,
ZSTD_buffered_policy_e buffPol2,
ZSTD_compressionParameters cParams2,
U64 pledgedSrcSize)
@ -939,18 +940,24 @@ static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t maxNbSeq1,
size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize));
size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2);
size_t const maxNbSeq2 = blockSize2 / ((cParams2.searchLength == 3) ? 3 : 4);
size_t const maxNbLit2 = blockSize2;
size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0;
DEBUGLOG(4, "ZSTD_sufficientBuff: is neededBufferSize2=%u <= bufferSize1=%u",
(U32)neededBufferSize2, (U32)bufferSize1);
DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbSeq2=%u <= maxNbSeq1=%u",
(U32)maxNbSeq2, (U32)maxNbSeq1);
return (maxNbSeq2 <= maxNbSeq1) & (neededBufferSize2 <= bufferSize1);
DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbLit2=%u <= maxNbLit1=%u",
(U32)maxNbLit2, (U32)maxNbLit1);
return (maxNbLit2 <= maxNbLit1)
& (maxNbSeq2 <= maxNbSeq1)
& (neededBufferSize2 <= bufferSize1);
}
/** Equivalence for resetCCtx purposes */
static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1,
ZSTD_CCtx_params params2,
size_t buffSize1, size_t maxNbSeq1,
size_t buffSize1,
size_t maxNbSeq1, size_t maxNbLit1,
ZSTD_buffered_policy_e buffPol2,
U64 pledgedSrcSize)
{
@ -963,8 +970,8 @@ static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1,
DEBUGLOG(4, "ZSTD_equivalentLdmParams() == 0");
return 0;
}
if (!ZSTD_sufficientBuff(buffSize1, maxNbSeq1, buffPol2, params2.cParams,
pledgedSrcSize)) {
if (!ZSTD_sufficientBuff(buffSize1, maxNbSeq1, maxNbLit1, buffPol2,
params2.cParams, pledgedSrcSize)) {
DEBUGLOG(4, "ZSTD_sufficientBuff() == 0");
return 0;
}
@ -1096,7 +1103,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
if (crp == ZSTDcrp_continue) {
if (ZSTD_equivalentParams(zc->appliedParams, params,
zc->inBuffSize, zc->seqStore.maxNbSeq,
zc->inBuffSize,
zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit,
zbuff, pledgedSrcSize)) {
DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%zu)",
zc->appliedParams.cParams.windowLog, zc->blockSize);
@ -1118,7 +1126,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
size_t const maxNbSeq = blockSize / divider;
size_t const tokenSpace = blockSize + 11*maxNbSeq;
size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0;
size_t const matchStateSize = ZSTD_sizeof_matchState(&params.cParams, /* forCCtx */ 1);
@ -1215,7 +1223,11 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
ptr = zc->seqStore.litStart + blockSize;
/* ZSTD_wildcopy() is used to copy into the literals buffer,
* so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
*/
zc->seqStore.maxNbLit = blockSize;
ptr = zc->seqStore.litStart + blockSize + WILDCOPY_OVERLENGTH;
/* ldm bucketOffsets table */
if (params.ldmParams.enableLdm) {

View File

@ -316,7 +316,8 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
#endif
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
/* copy Literals */
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB);
assert(seqStorePtr->maxNbLit <= 128 KB);
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
seqStorePtr->lit += litLength;

View File

@ -621,6 +621,7 @@ static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize)
static inline void initSeqStore(seqStore_t *seqStore) {
seqStore->maxNbSeq = MAX_NB_SEQ;
seqStore->maxNbLit = ZSTD_BLOCKSIZE_MAX;
seqStore->sequencesStart = SEQUENCE_BUFFER;
seqStore->litStart = SEQUENCE_LITERAL_BUFFER;
seqStore->llCode = SEQUENCE_LLCODE;

View File

@ -1375,6 +1375,24 @@ static int basicUnitTests(U32 seed, double compressibility)
((BYTE*)CNBuffer)[i+1] = _3BytesSeqs[id][1];
((BYTE*)CNBuffer)[i+2] = _3BytesSeqs[id][2];
} } }
DISPLAYLEVEL(3, "test%3i : growing nbSeq : ", testNb++);
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
size_t const maxNbSeq = _3BYTESTESTLENGTH / 3;
size_t const bound = ZSTD_compressBound(_3BYTESTESTLENGTH);
size_t nbSeq = 1;
while (nbSeq <= maxNbSeq) {
CHECK(ZSTD_compressCCtx(cctx, compressedBuffer, bound, CNBuffer, nbSeq * 3, 19));
/* Check every sequence for the first 100, then skip more rapidly. */
if (nbSeq < 100) {
++nbSeq;
} else {
nbSeq += (nbSeq >> 2);
}
}
ZSTD_freeCCtx(cctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : compress lots 3-bytes sequences : ", testNb++);
{ CHECK_V(r, ZSTD_compress(compressedBuffer, ZSTD_compressBound(_3BYTESTESTLENGTH),
CNBuffer, _3BYTESTESTLENGTH, 19) );
@ -1386,8 +1404,26 @@ static int basicUnitTests(U32 seed, double compressibility)
if (r != _3BYTESTESTLENGTH) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : incompressible data and ill suited dictionary : ", testNb++);
DISPLAYLEVEL(3, "test%3i : growing literals buffer : ", testNb++);
RDG_genBuffer(CNBuffer, CNBuffSize, 0.0, 0.1, seed);
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
size_t const bound = ZSTD_compressBound(CNBuffSize);
size_t size = 1;
while (size <= CNBuffSize) {
CHECK(ZSTD_compressCCtx(cctx, compressedBuffer, bound, CNBuffer, size, 3));
/* Check every size for the first 100, then skip more rapidly. */
if (size < 100) {
++size;
} else {
size += (size >> 2);
}
}
ZSTD_freeCCtx(cctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : incompressible data and ill suited dictionary : ", testNb++);
{ /* Train a dictionary on low characters */
size_t dictSize = 16 KB;
void* const dictBuffer = malloc(dictSize);