Add experimental param for sequence validation

dev
senhuang42 2020-11-20 11:23:22 -05:00
parent 0e32928b7d
commit 7742f076b4
4 changed files with 70 additions and 30 deletions

View File

@ -462,6 +462,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;
bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;
return bounds;
case ZSTD_c_validateSequences:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
default:
bounds.error = ERROR(parameter_unsupported);
@ -523,6 +528,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
case ZSTD_c_stableInBuffer:
case ZSTD_c_stableOutBuffer:
case ZSTD_c_blockDelimiters:
case ZSTD_c_validateSequences:
default:
return 0;
}
@ -574,6 +580,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
case ZSTD_c_stableInBuffer:
case ZSTD_c_stableOutBuffer:
case ZSTD_c_blockDelimiters:
case ZSTD_c_validateSequences:
break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
@ -779,6 +786,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
BOUNDCHECK(ZSTD_c_blockDelimiters, value);
CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;
return CCtxParams->blockDelimiters;
case ZSTD_c_validateSequences:
BOUNDCHECK(ZSTD_c_validateSequences, value);
CCtxParams->validateSequences = value;
return CCtxParams->validateSequences;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
@ -900,6 +912,9 @@ size_t ZSTD_CCtxParams_getParameter(
case ZSTD_c_blockDelimiters :
*value = (int)CCtxParams->blockDelimiters;
break;
case ZSTD_c_validateSequences :
*value = (int)CCtxParams->validateSequences;
break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
return 0;
@ -4561,10 +4576,12 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS
updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
seqPos->posInSrc += litLength + matchLength;
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
cctx->appliedParams.cParams.windowLog, dictSize),
"Sequence validation failed");
if (cctx->appliedParams.validateSequences) {
seqPos->posInSrc += litLength + matchLength;
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
cctx->appliedParams.cParams.windowLog, dictSize),
"Sequence validation failed");
}
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
ip += matchLength + litLength;
}
@ -4681,10 +4698,12 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
}
seqPos->posInSrc += litLength + matchLength;
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
cctx->appliedParams.cParams.windowLog, dictSize),
"Sequence validation failed");
if (cctx->appliedParams.validateSequences) {
seqPos->posInSrc += litLength + matchLength;
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
cctx->appliedParams.cParams.windowLog, dictSize),
"Sequence validation failed");
}
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
ip += matchLength + litLength;
@ -4712,17 +4731,12 @@ typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* s
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize);
static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) {
ZSTD_sequenceCopier sequenceCopier = NULL;
assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
ZSTD_sequenceCopier sequenceCopier;
switch (mode) {
case ZSTD_sf_noBlockDelimiters:
sequenceCopier = ZSTD_copySequencesToSeqStoreNoBlockDelim;
break;
case ZSTD_sf_explicitBlockDelimiters:
sequenceCopier = ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
break;
default:
assert(0); /* Unreachable due to as param validated in bounds */
if (mode == ZSTD_sf_explicitBlockDelimiters) {
return ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
} else if (mode == ZSTD_sf_noBlockDelimiters) {
return ZSTD_copySequencesToSeqStoreNoBlockDelim;
}
assert(sequenceCopier != NULL);
return sequenceCopier;
@ -4745,7 +4759,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
BYTE const* ip = (BYTE const*)src;
BYTE* op = (BYTE*)dst;
const ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
/* Special case: empty frame */

View File

@ -244,6 +244,7 @@ struct ZSTD_CCtx_params_s {
/* Sequence compression API */
ZSTD_sequenceFormat_e blockDelimiters;
int validateSequences;
/* Internal use, for createCCtxParams() and freeCCtxParams() only */
ZSTD_customMem customMem;

View File

@ -418,6 +418,7 @@ typedef enum {
* ZSTD_c_stableInBuffer
* ZSTD_c_stableOutBuffer
* ZSTD_c_blockDelimiters
* ZSTD_c_validateSequences
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change.
@ -432,7 +433,8 @@ typedef enum {
ZSTD_c_experimentalParam8=1005,
ZSTD_c_experimentalParam9=1006,
ZSTD_c_experimentalParam10=1007,
ZSTD_c_experimentalParam11=1008
ZSTD_c_experimentalParam11=1008,
ZSTD_c_experimentalParam12=1009
} ZSTD_cParameter;
typedef struct {
@ -1316,8 +1318,8 @@ typedef enum {
* zc can be used to insert custom compression params.
* This function invokes ZSTD_compress2
*
* The output of this function can be fed into ZSTD_compressSequences() with ZSTD_c_blockDelimiters
* set to ZSTD_sf_explicitBlockDelimiters
* The output of this function can be fed into ZSTD_compressSequences() with CCtx
* setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
* @return : number of sequences generated
*/
@ -1331,8 +1333,8 @@ ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
* As such, the final generated result has no explicit representation of block boundaries,
* and the final last literals segment is not represented in the sequences.
*
* The output of this function can be fed into ZSTD_compressSequences() with ZSTD_c_blockDelimiters
* set to ZSTD_sf_noBlockDelimiters
* The output of this function can be fed into ZSTD_compressSequences() with CCtx
* setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
* @return : number of sequences left after merging
*/
ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
@ -1348,16 +1350,20 @@ ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t se
* the block size derived from the cctx, and sequences may be split. This is the default setting.
*
* If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
* block delimiters (defined in ZSTD_Sequence).
* block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
*
* In addition to ZSTD_c_blockDelimiters, other noteworthy cctx parameters are the compression level and window log.
* If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
* behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
* specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
*
* In addition to the two adjustable experimental params, other noteworthy cctx parameters are the compression level and window log.
* - The compression level accordingly adjusts the strength of the entropy coder, as it would in typical compression.
* - The window log affects offset validation: this function will return an error at higher debug levels if a provided offset
* is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
*
* Note:
* - Repcodes are, as of now, always re-calculated, so ZSTD_Sequence::rep is never used.
*
* Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
* Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
* and cannot emit an RLE block that disagrees with the repcode history
* @return : final compressed size or a ZSTD error.
*/
ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
@ -1766,11 +1772,30 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
* Default is 0 == ZSTD_sf_noBlockDelimiters.
*
* For use with sequence compression API: ZSTD_compressSequences().
*
* Designates whether or not the given array of ZSTD_Sequence contains block delimiters
* which are defined as sequences with offset == 0 and matchLength == 0.
* and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
* See the definition of ZSTD_Sequence for more specifics.
*/
#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
/* ZSTD_c_validateSequences
* Default is 0 == disabled. Set to 1 to enable sequence validation.
*
* For use with sequence compression API: ZSTD_compressSequences().
* Designates whether or not we validate sequences provided to ZSTD_compressSequences()
* during function execution.
*
* Without validation, providing a sequence that does not conform to the zstd spec will cause
* undefined behavior, and may produce a corrupted block.
*
* With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
* specifics regarding offset/matchlength requirements) then the function will bail out and
* return an error.
*
*/
#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
/*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value.

Binary file not shown.