From 153bc1c0047f9c2bb503ced4f1b6f13b75e9c80a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 21 Mar 2018 15:50:05 -0700 Subject: [PATCH 1/4] removed limit ZSTD_TARGETLENGTH_MAX this makes it possible to specify extremely large negative compression levels, achieving the side effect as "no compression". It will also be possible to define larger targetlength for ultra compression mode. There is no adverse side effect due to removing this limit. --- lib/compress/zstd_compress.c | 8 ++++---- lib/zstd.h | 1 - programs/zstdcli.c | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 52829177..2aa26da4 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -389,8 +389,7 @@ size_t ZSTD_CCtxParam_setParameter( return CCtxParams->cParams.searchLength; case ZSTD_p_targetLength : - if (value>0) /* 0 => use default */ - CLAMPCHECK(value, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); + /* all values are valid. 0 => use default */ CCtxParams->cParams.targetLength = value; return CCtxParams->cParams.targetLength; @@ -590,7 +589,8 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); - CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); + if ((U32)(cParams.targetLength) < ZSTD_TARGETLENGTH_MIN) + return ERROR(parameter_unsupported); if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) return ERROR(parameter_unsupported); return 0; @@ -610,7 +610,7 @@ static ZSTD_compressionParameters ZSTD_clampCParams(ZSTD_compressionParameters c CLAMP(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); CLAMP(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); CLAMP(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); - CLAMP(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); + if ((U32)(cParams.targetLength) < ZSTD_TARGETLENGTH_MIN) cParams.targetLength = ZSTD_TARGETLENGTH_MIN; if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) cParams.strategy = ZSTD_btultra; return cParams; } diff --git a/lib/zstd.h b/lib/zstd.h index a35091a5..6405da60 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -391,7 +391,6 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output #define ZSTD_SEARCHLENGTH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ #define ZSTD_SEARCHLENGTH_MIN 3 /* only for ZSTD_btopt, other strategies are limited to 4 */ #define ZSTD_TARGETLENGTH_MIN 1 /* only used by btopt, btultra and btfast */ -#define ZSTD_TARGETLENGTH_MAX 999 /* only used by btopt, btultra and btfast */ #define ZSTD_LDM_MINMATCH_MIN 4 #define ZSTD_LDM_MINMATCH_MAX 4096 #define ZSTD_LDM_BUCKETSIZELOG_MAX 8 diff --git a/programs/zstdcli.c b/programs/zstdcli.c index bf82843a..c35de7cc 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -388,7 +388,7 @@ int main(int argCount, const char* argv[]) zstd_operation_mode operation = zom_compress; ZSTD_compressionParameters compressionParams; int cLevel = ZSTDCLI_CLEVEL_DEFAULT; - int cLevelLast = -10000; + int cLevelLast = -1000000000; unsigned recursive = 0; unsigned memLimit = 0; const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*)); /* argCount >= 1 */ From 93b8262c021c89f8b6204c7b27aca0e23ade9786 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 21 Mar 2018 15:54:44 -0700 Subject: [PATCH 2/4] added large negative level test case --- tests/playTests.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/playTests.sh b/tests/playTests.sh index 1847f57d..41d8263b 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -106,6 +106,7 @@ $ZSTD -99 -f tmp # too large compression level, automatic sized down $ECHO "test : --fast aka negative compression levels" $ZSTD --fast -f tmp # == -1 $ZSTD --fast=3 -f tmp # == -3 +$ZSTD --fast=200000 -f tmp # == no compression $ECHO "test : compress to stdout" $ZSTD tmp -c > tmpCompressed $ZSTD tmp --stdout > tmpCompressed # long command format From b7b80e156903750946f9bc9e99570a57d47529f5 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 21 Mar 2018 16:09:15 -0700 Subject: [PATCH 3/4] fixed paramgrill --- doc/zstd_manual.html | 102 +++++++++++++++++++++---------------------- tests/paramgrill.c | 12 ++--- 2 files changed, 55 insertions(+), 59 deletions(-) diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 66dd8de1..623fd611 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -730,12 +730,12 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long

New advanced API (experimental)


 
 
typedef enum {
-    /* Question : should we have a format ZSTD_f_auto ?
-     * For the time being, it would mean exactly the same as ZSTD_f_zstd1.
-     * But, in the future, should several formats be supported,
+    /* Opened question : should we have a format ZSTD_f_auto ?
+     * Today, it would mean exactly the same as ZSTD_f_zstd1.
+     * But, in the future, should several formats become supported,
      * on the compression side, it would mean "default format".
-     * On the decompression side, it would mean "multi format",
-     * and ZSTD_f_zstd1 could be reserved to mean "accept *only* zstd frames".
+     * On the decompression side, it would mean "automatic format detection",
+     * so that ZSTD_f_zstd1 would mean "accept *only* zstd frames".
      * Since meaning is a little different, another option could be to define different enums for compression and decompression.
      * This question could be kept for later, when there are actually multiple formats to support,
      * but there is also the question of pinning enum values, and pinning value `0` is especially important */
@@ -755,33 +755,34 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
                               * Default level is ZSTD_CLEVEL_DEFAULT==3.
                               * Special: value 0 means "do not change cLevel".
                               * Note 1 : it's possible to pass a negative compression level by casting it to unsigned type.
-                              * Note 2 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */
+                              * Note 2 : setting a level sets all default values of other compression parameters.
+                              * Note 3 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */
     ZSTD_p_windowLog,        /* Maximum allowed back-reference distance, expressed as power of 2.
                               * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
-                              * Special: value 0 means "do not change windowLog".
+                              * Special: value 0 means "use default windowLog".
                               * Note: Using a window size greater than ZSTD_MAXWINDOWSIZE_DEFAULT (default: 2^27)
-                              * requires setting the maximum window size at least as large during decompression. */
+                              *       requires explicitly allowing such window size during decompression stage. */
     ZSTD_p_hashLog,          /* Size of the probe table, as a power of 2.
                               * Resulting table size is (1 << (hashLog+2)).
                               * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
                               * Larger tables improve compression ratio of strategies <= dFast,
                               * and improve speed of strategies > dFast.
-                              * Special: value 0 means "do not change hashLog". */
+                              * Special: value 0 means "use default hashLog". */
     ZSTD_p_chainLog,         /* Size of the full-search table, as a power of 2.
                               * Resulting table size is (1 << (chainLog+2)).
                               * Larger tables result in better and slower compression.
                               * This parameter is useless when using "fast" strategy.
-                              * Special: value 0 means "do not change chainLog". */
+                              * Special: value 0 means "use default chainLog". */
     ZSTD_p_searchLog,        /* Number of search attempts, as a power of 2.
                               * More attempts result in better and slower compression.
                               * This parameter is useless when using "fast" and "dFast" strategies.
-                              * Special: value 0 means "do not change searchLog". */
+                              * Special: value 0 means "use default searchLog". */
     ZSTD_p_minMatch,         /* Minimum size of searched matches (note : repCode matches can be smaller).
                               * Larger values make faster compression and decompression, but decrease ratio.
                               * Must be clamped between ZSTD_SEARCHLENGTH_MIN and ZSTD_SEARCHLENGTH_MAX.
                               * Note that currently, for all strategies < btopt, effective minimum is 4.
-                              * Note that currently, for all strategies > fast, effective maximum is 6.
-                              * Special: value 0 means "do not change minMatchLength". */
+                              *                    , for all strategies > fast, effective maximum is 6.
+                              * Special: value 0 means "use default minMatchLength". */
     ZSTD_p_targetLength,     /* Impact of this field depends on strategy.
                               * For strategies btopt & btultra:
                               *     Length of Match considered "good enough" to stop search.
@@ -789,12 +790,39 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
                               * For strategy fast:
                               *     Distance between match sampling.
                               *     Larger values make compression faster, and weaker.
-                              * Special: value 0 means "do not change targetLength". */
+                              * Special: value 0 means "use default targetLength". */
     ZSTD_p_compressionStrategy, /* See ZSTD_strategy enum definition.
                               * Cast selected strategy as unsigned for ZSTD_CCtx_setParameter() compatibility.
                               * The higher the value of selected strategy, the more complex it is,
                               * resulting in stronger and slower compression.
-                              * Special: value 0 means "do not change strategy". */
+                              * Special: value 0 means "use default strategy". */
+
+    ZSTD_p_enableLongDistanceMatching=160, /* Enable long distance matching.
+                                         * This parameter is designed to improve compression ratio
+                                         * for large inputs, by finding large matches at long distance.
+                                         * It increases memory usage and window size.
+                                         * Note: enabling this parameter increases ZSTD_p_windowLog to 128 MB
+                                         * except when expressly set to a different value. */
+    ZSTD_p_ldmHashLog,       /* Size of the table for long distance matching, as a power of 2.
+                              * Larger values increase memory usage and compression ratio,
+                              * but decrease compression speed.
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
+                              * default: windowlog - 7.
+                              * Special: value 0 means "automatically determine hashlog". */
+    ZSTD_p_ldmMinMatch,      /* Minimum match size for long distance matcher.
+                              * Larger/too small values usually decrease compression ratio.
+                              * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
+                              * Special: value 0 means "use default value" (default: 64). */
+    ZSTD_p_ldmBucketSizeLog, /* Log size of each bucket in the LDM hash table for collision resolution.
+                              * Larger values improve collision resolution but decrease compression speed.
+                              * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX .
+                              * Special: value 0 means "use default value" (default: 3). */
+    ZSTD_p_ldmHashEveryLog,  /* Frequency of inserting/looking up entries in the LDM hash table.
+                              * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
+                              * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
+                              * Larger values improve compression speed.
+                              * Deviating far from default value will likely result in a compression ratio decrease.
+                              * Special: value 0 means "automatically determine hashEveryLog". */
 
     /* frame parameters */
     ZSTD_p_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
@@ -821,7 +849,9 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
     ZSTD_p_overlapSizeLog,   /* Size of previous input reloaded at the beginning of each job.
                               * 0 => no overlap, 6(default) => use 1/8th of windowSize, >=9 => use full windowSize */
 
-    /* advanced parameters - may not remain available after API update */
+    /* =================================================================== */
+    /* experimental parameters - no stability guaranteed                   */
+    /* =================================================================== */
 
     ZSTD_p_compressLiterals=1000, /* control huffman compression of literals (enabled) by default.
                               * disabling it improves speed and decreases compression ratio by a large amount.
@@ -832,40 +862,6 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
     ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
                               * even when referencing into Dictionary content (default:0) */
 
-    ZSTD_p_enableLongDistanceMatching=1200, /* Enable long distance matching.
-                                         * This parameter is designed to improve the compression
-                                         * ratio for large inputs with long distance matches.
-                                         * This increases the memory usage as well as window size.
-                                         * Note: setting this parameter sets all the LDM parameters
-                                         * as well as ZSTD_p_windowLog. It should be set after
-                                         * ZSTD_p_compressionLevel and before ZSTD_p_windowLog and
-                                         * other LDM parameters. Setting the compression level
-                                         * after this parameter overrides the window log, though LDM
-                                         * will remain enabled until explicitly disabled. */
-    ZSTD_p_ldmHashLog,       /* Size of the table for long distance matching, as a power of 2.
-                              * Larger values increase memory usage and compression ratio, but decrease
-                              * compression speed.
-                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
-                              * (default: windowlog - 7).
-                              * Special: value 0 means "do not change ldmHashLog". */
-    ZSTD_p_ldmMinMatch,      /* Minimum size of searched matches for long distance matcher.
-                              * Larger/too small values usually decrease compression ratio.
-                              * Must be clamped between ZSTD_LDM_MINMATCH_MIN
-                              * and ZSTD_LDM_MINMATCH_MAX (default: 64).
-                              * Special: value 0 means "do not change ldmMinMatch". */
-    ZSTD_p_ldmBucketSizeLog, /* Log size of each bucket in the LDM hash table for collision resolution.
-                              * Larger values usually improve collision resolution but may decrease
-                              * compression speed.
-                              * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX (default: 3).
-                              * note : 0 is a valid value */
-    ZSTD_p_ldmHashEveryLog,  /* Frequency of inserting/looking up entries in the LDM hash table.
-                              * The default is MAX(0, (windowLog - ldmHashLog)) to
-                              * optimize hash table usage.
-                              * Larger values improve compression speed. Deviating far from the
-                              * default value will likely result in a decrease in compression ratio.
-                              * Must be clamped between 0 and ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN.
-                              * note : 0 is a valid value */
-
 } ZSTD_cParameter;
 

size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
@@ -1067,7 +1063,7 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size
  
 


-
size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);   /* not implemented */
+
size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
 

Reference a prepared dictionary, to be used to decompress next frames. The dictionary remains active for decompression of future frames using same DCtx. @result : 0, or an error code (which can be tested with ZSTD_isError()). @@ -1078,8 +1074,8 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size


-
size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize);   /* not implemented */
-size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);   /* not implemented */
+
size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize);
+size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
 

Reference a prefix (single-usage dictionary) for next compression job. Prefix is **only used once**. It must be explicitly referenced before each frame. If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_DDict instead. diff --git a/tests/paramgrill.c b/tests/paramgrill.c index ae14aa07..13b102b2 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -547,12 +547,12 @@ static ZSTD_compressionParameters randomParams(void) U32 validated = 0; while (!validated) { /* totally random entry */ - p.chainLog = FUZ_rand(&g_rand) % (ZSTD_CHAINLOG_MAX+1 - ZSTD_CHAINLOG_MIN) + ZSTD_CHAINLOG_MIN; - p.hashLog = FUZ_rand(&g_rand) % (ZSTD_HASHLOG_MAX+1 - ZSTD_HASHLOG_MIN) + ZSTD_HASHLOG_MIN; - p.searchLog = FUZ_rand(&g_rand) % (ZSTD_SEARCHLOG_MAX+1 - ZSTD_SEARCHLOG_MIN) + ZSTD_SEARCHLOG_MIN; - p.windowLog = FUZ_rand(&g_rand) % (ZSTD_WINDOWLOG_MAX+1 - ZSTD_WINDOWLOG_MIN) + ZSTD_WINDOWLOG_MIN; - p.searchLength=FUZ_rand(&g_rand) % (ZSTD_SEARCHLENGTH_MAX+1 - ZSTD_SEARCHLENGTH_MIN) + ZSTD_SEARCHLENGTH_MIN; - p.targetLength=FUZ_rand(&g_rand) % (ZSTD_TARGETLENGTH_MAX+1 - ZSTD_TARGETLENGTH_MIN) + ZSTD_TARGETLENGTH_MIN; + p.chainLog = (FUZ_rand(&g_rand) % (ZSTD_CHAINLOG_MAX+1 - ZSTD_CHAINLOG_MIN)) + ZSTD_CHAINLOG_MIN; + p.hashLog = (FUZ_rand(&g_rand) % (ZSTD_HASHLOG_MAX+1 - ZSTD_HASHLOG_MIN)) + ZSTD_HASHLOG_MIN; + p.searchLog = (FUZ_rand(&g_rand) % (ZSTD_SEARCHLOG_MAX+1 - ZSTD_SEARCHLOG_MIN)) + ZSTD_SEARCHLOG_MIN; + p.windowLog = (FUZ_rand(&g_rand) % (ZSTD_WINDOWLOG_MAX+1 - ZSTD_WINDOWLOG_MIN)) + ZSTD_WINDOWLOG_MIN; + p.searchLength=(FUZ_rand(&g_rand) % (ZSTD_SEARCHLENGTH_MAX+1 - ZSTD_SEARCHLENGTH_MIN)) + ZSTD_SEARCHLENGTH_MIN; + p.targetLength=(FUZ_rand(&g_rand) % (512)) + ZSTD_TARGETLENGTH_MIN; p.strategy = (ZSTD_strategy) (FUZ_rand(&g_rand) % (ZSTD_btultra +1)); validated = !ZSTD_isError(ZSTD_checkCParams(p)); } From f15e2fd55ed632d947b93b3acc9ef5ad34658b46 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 21 Mar 2018 16:45:15 -0700 Subject: [PATCH 4/4] fixed fuzz test --- tests/fuzz/zstd_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index 60289847..6fc38361 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -35,7 +35,7 @@ ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state) cParams.searchLength = FUZZ_rand32(state, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); cParams.targetLength = FUZZ_rand32(state, ZSTD_TARGETLENGTH_MIN, - ZSTD_TARGETLENGTH_MAX); + 512); cParams.strategy = FUZZ_rand32(state, ZSTD_fast, ZSTD_btultra); return ZSTD_adjustCParams(cParams, srcSize, 0); }