Add experimental param for sequence validation
parent
0e32928b7d
commit
7742f076b4
|
@ -462,6 +462,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
|||
bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;
|
||||
bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;
|
||||
return bounds;
|
||||
|
||||
case ZSTD_c_validateSequences:
|
||||
bounds.lowerBound = 0;
|
||||
bounds.upperBound = 1;
|
||||
return bounds;
|
||||
|
||||
default:
|
||||
bounds.error = ERROR(parameter_unsupported);
|
||||
|
@ -523,6 +528,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
|
|||
case ZSTD_c_stableInBuffer:
|
||||
case ZSTD_c_stableOutBuffer:
|
||||
case ZSTD_c_blockDelimiters:
|
||||
case ZSTD_c_validateSequences:
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
@ -574,6 +580,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
|
|||
case ZSTD_c_stableInBuffer:
|
||||
case ZSTD_c_stableOutBuffer:
|
||||
case ZSTD_c_blockDelimiters:
|
||||
case ZSTD_c_validateSequences:
|
||||
break;
|
||||
|
||||
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
||||
|
@ -779,6 +786,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|||
BOUNDCHECK(ZSTD_c_blockDelimiters, value);
|
||||
CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;
|
||||
return CCtxParams->blockDelimiters;
|
||||
|
||||
case ZSTD_c_validateSequences:
|
||||
BOUNDCHECK(ZSTD_c_validateSequences, value);
|
||||
CCtxParams->validateSequences = value;
|
||||
return CCtxParams->validateSequences;
|
||||
|
||||
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
||||
}
|
||||
|
@ -900,6 +912,9 @@ size_t ZSTD_CCtxParams_getParameter(
|
|||
case ZSTD_c_blockDelimiters :
|
||||
*value = (int)CCtxParams->blockDelimiters;
|
||||
break;
|
||||
case ZSTD_c_validateSequences :
|
||||
*value = (int)CCtxParams->validateSequences;
|
||||
break;
|
||||
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
||||
}
|
||||
return 0;
|
||||
|
@ -4561,10 +4576,12 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS
|
|||
updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
|
||||
|
||||
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
|
||||
seqPos->posInSrc += litLength + matchLength;
|
||||
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
|
||||
cctx->appliedParams.cParams.windowLog, dictSize),
|
||||
"Sequence validation failed");
|
||||
if (cctx->appliedParams.validateSequences) {
|
||||
seqPos->posInSrc += litLength + matchLength;
|
||||
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
|
||||
cctx->appliedParams.cParams.windowLog, dictSize),
|
||||
"Sequence validation failed");
|
||||
}
|
||||
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
|
||||
ip += matchLength + litLength;
|
||||
}
|
||||
|
@ -4681,10 +4698,12 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
|
|||
updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
|
||||
}
|
||||
|
||||
seqPos->posInSrc += litLength + matchLength;
|
||||
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
|
||||
cctx->appliedParams.cParams.windowLog, dictSize),
|
||||
"Sequence validation failed");
|
||||
if (cctx->appliedParams.validateSequences) {
|
||||
seqPos->posInSrc += litLength + matchLength;
|
||||
FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
|
||||
cctx->appliedParams.cParams.windowLog, dictSize),
|
||||
"Sequence validation failed");
|
||||
}
|
||||
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
|
||||
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
|
||||
ip += matchLength + litLength;
|
||||
|
@ -4712,17 +4731,12 @@ typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* s
|
|||
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
||||
const void* src, size_t blockSize);
|
||||
static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) {
|
||||
ZSTD_sequenceCopier sequenceCopier = NULL;
|
||||
assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
|
||||
ZSTD_sequenceCopier sequenceCopier;
|
||||
switch (mode) {
|
||||
case ZSTD_sf_noBlockDelimiters:
|
||||
sequenceCopier = ZSTD_copySequencesToSeqStoreNoBlockDelim;
|
||||
break;
|
||||
case ZSTD_sf_explicitBlockDelimiters:
|
||||
sequenceCopier = ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
|
||||
break;
|
||||
default:
|
||||
assert(0); /* Unreachable due to as param validated in bounds */
|
||||
if (mode == ZSTD_sf_explicitBlockDelimiters) {
|
||||
return ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
|
||||
} else if (mode == ZSTD_sf_noBlockDelimiters) {
|
||||
return ZSTD_copySequencesToSeqStoreNoBlockDelim;
|
||||
}
|
||||
assert(sequenceCopier != NULL);
|
||||
return sequenceCopier;
|
||||
|
@ -4745,7 +4759,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|||
|
||||
BYTE const* ip = (BYTE const*)src;
|
||||
BYTE* op = (BYTE*)dst;
|
||||
const ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
|
||||
ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
|
||||
|
||||
DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
|
||||
/* Special case: empty frame */
|
||||
|
|
|
@ -244,6 +244,7 @@ struct ZSTD_CCtx_params_s {
|
|||
|
||||
/* Sequence compression API */
|
||||
ZSTD_sequenceFormat_e blockDelimiters;
|
||||
int validateSequences;
|
||||
|
||||
/* Internal use, for createCCtxParams() and freeCCtxParams() only */
|
||||
ZSTD_customMem customMem;
|
||||
|
|
47
lib/zstd.h
47
lib/zstd.h
|
@ -418,6 +418,7 @@ typedef enum {
|
|||
* ZSTD_c_stableInBuffer
|
||||
* ZSTD_c_stableOutBuffer
|
||||
* ZSTD_c_blockDelimiters
|
||||
* ZSTD_c_validateSequences
|
||||
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
|
||||
* note : never ever use experimentalParam? names directly;
|
||||
* also, the enums values themselves are unstable and can still change.
|
||||
|
@ -432,7 +433,8 @@ typedef enum {
|
|||
ZSTD_c_experimentalParam8=1005,
|
||||
ZSTD_c_experimentalParam9=1006,
|
||||
ZSTD_c_experimentalParam10=1007,
|
||||
ZSTD_c_experimentalParam11=1008
|
||||
ZSTD_c_experimentalParam11=1008,
|
||||
ZSTD_c_experimentalParam12=1009
|
||||
} ZSTD_cParameter;
|
||||
|
||||
typedef struct {
|
||||
|
@ -1316,8 +1318,8 @@ typedef enum {
|
|||
* zc can be used to insert custom compression params.
|
||||
* This function invokes ZSTD_compress2
|
||||
*
|
||||
* The output of this function can be fed into ZSTD_compressSequences() with ZSTD_c_blockDelimiters
|
||||
* set to ZSTD_sf_explicitBlockDelimiters
|
||||
* The output of this function can be fed into ZSTD_compressSequences() with CCtx
|
||||
* setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
|
||||
* @return : number of sequences generated
|
||||
*/
|
||||
|
||||
|
@ -1331,8 +1333,8 @@ ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
|||
* As such, the final generated result has no explicit representation of block boundaries,
|
||||
* and the final last literals segment is not represented in the sequences.
|
||||
*
|
||||
* The output of this function can be fed into ZSTD_compressSequences() with ZSTD_c_blockDelimiters
|
||||
* set to ZSTD_sf_noBlockDelimiters
|
||||
* The output of this function can be fed into ZSTD_compressSequences() with CCtx
|
||||
* setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
|
||||
* @return : number of sequences left after merging
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
|
||||
|
@ -1348,16 +1350,20 @@ ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t se
|
|||
* the block size derived from the cctx, and sequences may be split. This is the default setting.
|
||||
*
|
||||
* If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
|
||||
* block delimiters (defined in ZSTD_Sequence).
|
||||
* block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
|
||||
*
|
||||
* In addition to ZSTD_c_blockDelimiters, other noteworthy cctx parameters are the compression level and window log.
|
||||
* If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
|
||||
* behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
|
||||
* specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
|
||||
*
|
||||
* In addition to the two adjustable experimental params, other noteworthy cctx parameters are the compression level and window log.
|
||||
* - The compression level accordingly adjusts the strength of the entropy coder, as it would in typical compression.
|
||||
* - The window log affects offset validation: this function will return an error at higher debug levels if a provided offset
|
||||
* is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
|
||||
*
|
||||
* Note:
|
||||
* - Repcodes are, as of now, always re-calculated, so ZSTD_Sequence::rep is never used.
|
||||
*
|
||||
* Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
|
||||
* Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
|
||||
* and cannot emit an RLE block that disagrees with the repcode history
|
||||
* @return : final compressed size or a ZSTD error.
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
|
||||
|
@ -1766,11 +1772,30 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
|
|||
* Default is 0 == ZSTD_sf_noBlockDelimiters.
|
||||
*
|
||||
* For use with sequence compression API: ZSTD_compressSequences().
|
||||
*
|
||||
* Designates whether or not the given array of ZSTD_Sequence contains block delimiters
|
||||
* which are defined as sequences with offset == 0 and matchLength == 0.
|
||||
* and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
|
||||
* See the definition of ZSTD_Sequence for more specifics.
|
||||
*/
|
||||
#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
|
||||
|
||||
/* ZSTD_c_validateSequences
|
||||
* Default is 0 == disabled. Set to 1 to enable sequence validation.
|
||||
*
|
||||
* For use with sequence compression API: ZSTD_compressSequences().
|
||||
* Designates whether or not we validate sequences provided to ZSTD_compressSequences()
|
||||
* during function execution.
|
||||
*
|
||||
* Without validation, providing a sequence that does not conform to the zstd spec will cause
|
||||
* undefined behavior, and may produce a corrupted block.
|
||||
*
|
||||
* With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
|
||||
* specifics regarding offset/matchlength requirements) then the function will bail out and
|
||||
* return an error.
|
||||
*
|
||||
*/
|
||||
#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
|
||||
|
||||
/*! ZSTD_CCtx_getParameter() :
|
||||
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
|
||||
* and store it into int* value.
|
||||
|
|
Binary file not shown.
Loading…
Reference in New Issue