Merge pull request #2405 from senhuang42/sequence_compression_minmatch_fix
Sequence Compression API - make validator use cctx minMatch instead of global MINMATCH
This commit is contained in:
commit
993baba315
@ -4524,7 +4524,7 @@ typedef struct {
|
|||||||
|
|
||||||
/* Returns a ZSTD error code if sequence is not valid */
|
/* Returns a ZSTD error code if sequence is not valid */
|
||||||
static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength,
|
static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength,
|
||||||
size_t posInSrc, U32 windowLog, size_t dictSize) {
|
size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) {
|
||||||
size_t offsetBound;
|
size_t offsetBound;
|
||||||
U32 windowSize = 1 << windowLog;
|
U32 windowSize = 1 << windowLog;
|
||||||
/* posInSrc represents the amount of data the the decoder would decode up to this point.
|
/* posInSrc represents the amount of data the the decoder would decode up to this point.
|
||||||
@ -4534,7 +4534,7 @@ static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength,
|
|||||||
*/
|
*/
|
||||||
offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
|
offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
|
||||||
RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!");
|
RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!");
|
||||||
RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small");
|
RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4594,7 +4594,8 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS
|
|||||||
if (cctx->appliedParams.validateSequences) {
|
if (cctx->appliedParams.validateSequences) {
|
||||||
seqPos->posInSrc += litLength + matchLength;
|
seqPos->posInSrc += litLength + matchLength;
|
||||||
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
|
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
|
||||||
cctx->appliedParams.cParams.windowLog, dictSize),
|
cctx->appliedParams.cParams.windowLog, dictSize,
|
||||||
|
cctx->appliedParams.cParams.minMatch),
|
||||||
"Sequence validation failed");
|
"Sequence validation failed");
|
||||||
}
|
}
|
||||||
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
|
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
|
||||||
@ -4679,13 +4680,13 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
|
|||||||
U32 firstHalfMatchLength;
|
U32 firstHalfMatchLength;
|
||||||
litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
|
litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
|
||||||
firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
|
firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
|
||||||
if (matchLength > blockSize && firstHalfMatchLength >= MINMATCH) {
|
if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) {
|
||||||
/* Only ever split the match if it is larger than the block size */
|
/* Only ever split the match if it is larger than the block size */
|
||||||
U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;
|
U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;
|
||||||
if (secondHalfMatchLength < MINMATCH) {
|
if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) {
|
||||||
/* Move the endPosInSequence backward so that it creates match of MINMATCH length */
|
/* Move the endPosInSequence backward so that it creates match of minMatch length */
|
||||||
endPosInSequence -= MINMATCH - secondHalfMatchLength;
|
endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
|
||||||
bytesAdjustment = MINMATCH - secondHalfMatchLength;
|
bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
|
||||||
firstHalfMatchLength -= bytesAdjustment;
|
firstHalfMatchLength -= bytesAdjustment;
|
||||||
}
|
}
|
||||||
matchLength = firstHalfMatchLength;
|
matchLength = firstHalfMatchLength;
|
||||||
@ -4716,7 +4717,8 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
|
|||||||
if (cctx->appliedParams.validateSequences) {
|
if (cctx->appliedParams.validateSequences) {
|
||||||
seqPos->posInSrc += litLength + matchLength;
|
seqPos->posInSrc += litLength + matchLength;
|
||||||
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
|
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
|
||||||
cctx->appliedParams.cParams.windowLog, dictSize),
|
cctx->appliedParams.cParams.windowLog, dictSize,
|
||||||
|
cctx->appliedParams.cParams.minMatch),
|
||||||
"Sequence validation failed");
|
"Sequence validation failed");
|
||||||
}
|
}
|
||||||
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
|
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
|
||||||
@ -4884,6 +4886,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapaci
|
|||||||
DEBUGLOG(3, "ZSTD_compressSequences()");
|
DEBUGLOG(3, "ZSTD_compressSequences()");
|
||||||
assert(cctx != NULL);
|
assert(cctx != NULL);
|
||||||
FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
|
FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
|
||||||
|
RETURN_ERROR_IF(inSeqsSize > cctx->seqStore.maxNbSeq, memory_allocation, "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
|
||||||
/* Begin writing output, starting with frame header */
|
/* Begin writing output, starting with frame header */
|
||||||
frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
|
frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
|
||||||
op += frameHeaderSize;
|
op += frameHeaderSize;
|
||||||
|
@ -1356,9 +1356,10 @@ ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t se
|
|||||||
* behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
|
* behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
|
||||||
* specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
|
* specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
|
||||||
*
|
*
|
||||||
* In addition to the two adjustable experimental params, other noteworthy cctx parameters are the compression level and window log.
|
* In addition to the two adjustable experimental params, there are other important cctx params.
|
||||||
* - The compression level accordingly adjusts the strength of the entropy coder, as it would in typical compression.
|
* - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
|
||||||
* - The window log affects offset validation: this function will return an error at higher debug levels if a provided offset
|
* - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression.
|
||||||
|
* - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
|
||||||
* is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
|
* is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
|
||||||
*
|
*
|
||||||
* Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
|
* Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
|
||||||
|
@ -199,6 +199,7 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
|
|||||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
|
ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
|
||||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog);
|
ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog);
|
||||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
|
ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
|
||||||
|
ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
|
||||||
/* TODO: Add block delim mode fuzzing */
|
/* TODO: Add block delim mode fuzzing */
|
||||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters);
|
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters);
|
||||||
if (hasDict) {
|
if (hasDict) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user