Add experimental param for sequence validation

This commit is contained in:
senhuang42 2020-11-20 11:23:22 -05:00
parent 0e32928b7d
commit 7742f076b4
4 changed files with 70 additions and 30 deletions

View File

@ -462,6 +462,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters; bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;
bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters; bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;
return bounds; return bounds;
case ZSTD_c_validateSequences:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
default: default:
bounds.error = ERROR(parameter_unsupported); bounds.error = ERROR(parameter_unsupported);
@ -523,6 +528,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
case ZSTD_c_stableInBuffer: case ZSTD_c_stableInBuffer:
case ZSTD_c_stableOutBuffer: case ZSTD_c_stableOutBuffer:
case ZSTD_c_blockDelimiters: case ZSTD_c_blockDelimiters:
case ZSTD_c_validateSequences:
default: default:
return 0; return 0;
} }
@ -574,6 +580,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
case ZSTD_c_stableInBuffer: case ZSTD_c_stableInBuffer:
case ZSTD_c_stableOutBuffer: case ZSTD_c_stableOutBuffer:
case ZSTD_c_blockDelimiters: case ZSTD_c_blockDelimiters:
case ZSTD_c_validateSequences:
break; break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
@ -779,6 +786,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
BOUNDCHECK(ZSTD_c_blockDelimiters, value); BOUNDCHECK(ZSTD_c_blockDelimiters, value);
CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value; CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;
return CCtxParams->blockDelimiters; return CCtxParams->blockDelimiters;
case ZSTD_c_validateSequences:
BOUNDCHECK(ZSTD_c_validateSequences, value);
CCtxParams->validateSequences = value;
return CCtxParams->validateSequences;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
} }
@ -900,6 +912,9 @@ size_t ZSTD_CCtxParams_getParameter(
case ZSTD_c_blockDelimiters : case ZSTD_c_blockDelimiters :
*value = (int)CCtxParams->blockDelimiters; *value = (int)CCtxParams->blockDelimiters;
break; break;
case ZSTD_c_validateSequences :
*value = (int)CCtxParams->validateSequences;
break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
} }
return 0; return 0;
@ -4561,10 +4576,12 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS
updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
seqPos->posInSrc += litLength + matchLength; if (cctx->appliedParams.validateSequences) {
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, seqPos->posInSrc += litLength + matchLength;
cctx->appliedParams.cParams.windowLog, dictSize), FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
"Sequence validation failed"); cctx->appliedParams.cParams.windowLog, dictSize),
"Sequence validation failed");
}
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
ip += matchLength + litLength; ip += matchLength + litLength;
} }
@ -4681,10 +4698,12 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
} }
seqPos->posInSrc += litLength + matchLength; if (cctx->appliedParams.validateSequences) {
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, seqPos->posInSrc += litLength + matchLength;
cctx->appliedParams.cParams.windowLog, dictSize), FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
"Sequence validation failed"); cctx->appliedParams.cParams.windowLog, dictSize),
"Sequence validation failed");
}
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
ip += matchLength + litLength; ip += matchLength + litLength;
@ -4712,17 +4731,12 @@ typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* s
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize); const void* src, size_t blockSize);
static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) { static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) {
ZSTD_sequenceCopier sequenceCopier = NULL;
assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
ZSTD_sequenceCopier sequenceCopier; if (mode == ZSTD_sf_explicitBlockDelimiters) {
switch (mode) { return ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
case ZSTD_sf_noBlockDelimiters: } else if (mode == ZSTD_sf_noBlockDelimiters) {
sequenceCopier = ZSTD_copySequencesToSeqStoreNoBlockDelim; return ZSTD_copySequencesToSeqStoreNoBlockDelim;
break;
case ZSTD_sf_explicitBlockDelimiters:
sequenceCopier = ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
break;
default:
assert(0); /* Unreachable due to as param validated in bounds */
} }
assert(sequenceCopier != NULL); assert(sequenceCopier != NULL);
return sequenceCopier; return sequenceCopier;
@ -4745,7 +4759,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
BYTE const* ip = (BYTE const*)src; BYTE const* ip = (BYTE const*)src;
BYTE* op = (BYTE*)dst; BYTE* op = (BYTE*)dst;
const ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize); DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
/* Special case: empty frame */ /* Special case: empty frame */

View File

@ -244,6 +244,7 @@ struct ZSTD_CCtx_params_s {
/* Sequence compression API */ /* Sequence compression API */
ZSTD_sequenceFormat_e blockDelimiters; ZSTD_sequenceFormat_e blockDelimiters;
int validateSequences;
/* Internal use, for createCCtxParams() and freeCCtxParams() only */ /* Internal use, for createCCtxParams() and freeCCtxParams() only */
ZSTD_customMem customMem; ZSTD_customMem customMem;

View File

@ -418,6 +418,7 @@ typedef enum {
* ZSTD_c_stableInBuffer * ZSTD_c_stableInBuffer
* ZSTD_c_stableOutBuffer * ZSTD_c_stableOutBuffer
* ZSTD_c_blockDelimiters * ZSTD_c_blockDelimiters
* ZSTD_c_validateSequences
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly; * note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change. * also, the enums values themselves are unstable and can still change.
@ -432,7 +433,8 @@ typedef enum {
ZSTD_c_experimentalParam8=1005, ZSTD_c_experimentalParam8=1005,
ZSTD_c_experimentalParam9=1006, ZSTD_c_experimentalParam9=1006,
ZSTD_c_experimentalParam10=1007, ZSTD_c_experimentalParam10=1007,
ZSTD_c_experimentalParam11=1008 ZSTD_c_experimentalParam11=1008,
ZSTD_c_experimentalParam12=1009
} ZSTD_cParameter; } ZSTD_cParameter;
typedef struct { typedef struct {
@ -1316,8 +1318,8 @@ typedef enum {
* zc can be used to insert custom compression params. * zc can be used to insert custom compression params.
* This function invokes ZSTD_compress2 * This function invokes ZSTD_compress2
* *
* The output of this function can be fed into ZSTD_compressSequences() with ZSTD_c_blockDelimiters * The output of this function can be fed into ZSTD_compressSequences() with CCtx
* set to ZSTD_sf_explicitBlockDelimiters * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
* @return : number of sequences generated * @return : number of sequences generated
*/ */
@ -1331,8 +1333,8 @@ ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
* As such, the final generated result has no explicit representation of block boundaries, * As such, the final generated result has no explicit representation of block boundaries,
* and the final last literals segment is not represented in the sequences. * and the final last literals segment is not represented in the sequences.
* *
* The output of this function can be fed into ZSTD_compressSequences() with ZSTD_c_blockDelimiters * The output of this function can be fed into ZSTD_compressSequences() with CCtx
* set to ZSTD_sf_noBlockDelimiters * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
* @return : number of sequences left after merging * @return : number of sequences left after merging
*/ */
ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
@ -1348,16 +1350,20 @@ ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t se
* the block size derived from the cctx, and sequences may be split. This is the default setting. * the block size derived from the cctx, and sequences may be split. This is the default setting.
* *
* If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
* block delimiters (defined in ZSTD_Sequence). * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
* *
* In addition to ZSTD_c_blockDelimiters, other noteworthy cctx parameters are the compression level and window log. * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
* behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
* specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
*
* In addition to the two adjustable experimental params, other noteworthy cctx parameters are the compression level and window log.
* - The compression level accordingly adjusts the strength of the entropy coder, as it would in typical compression. * - The compression level accordingly adjusts the strength of the entropy coder, as it would in typical compression.
* - The window log affects offset validation: this function will return an error at higher debug levels if a provided offset * - The window log affects offset validation: this function will return an error at higher debug levels if a provided offset
* is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
* *
* Note: * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
* - Repcodes are, as of now, always re-calculated, so ZSTD_Sequence::rep is never used. * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
* * and cannot emit an RLE block that disagrees with the repcode history
* @return : final compressed size or a ZSTD error. * @return : final compressed size or a ZSTD error.
*/ */
ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
@ -1766,11 +1772,30 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
* Default is 0 == ZSTD_sf_noBlockDelimiters. * Default is 0 == ZSTD_sf_noBlockDelimiters.
* *
* For use with sequence compression API: ZSTD_compressSequences(). * For use with sequence compression API: ZSTD_compressSequences().
*
* Designates whether or not the given array of ZSTD_Sequence contains block delimiters * Designates whether or not the given array of ZSTD_Sequence contains block delimiters
* which are defined as sequences with offset == 0 and matchLength == 0. * and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
* See the definition of ZSTD_Sequence for more specifics.
*/ */
#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11 #define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
/* ZSTD_c_validateSequences
* Default is 0 == disabled. Set to 1 to enable sequence validation.
*
* For use with sequence compression API: ZSTD_compressSequences().
* Designates whether or not we validate sequences provided to ZSTD_compressSequences()
* during function execution.
*
* Without validation, providing a sequence that does not conform to the zstd spec will cause
* undefined behavior, and may produce a corrupted block.
*
* With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
* specifics regarding offset/matchlength requirements) then the function will bail out and
* return an error.
*
*/
#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
/*! ZSTD_CCtx_getParameter() : /*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter, * Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value. * and store it into int* value.

Binary file not shown.