fixed remaining searchLength invocations

This commit is contained in:
Yann Collet 2018-11-20 15:13:27 -08:00
parent e874dacc08
commit 2e7fd6a2cb
10 changed files with 76 additions and 40 deletions

View File

@ -772,7 +772,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
unsigned chainLog; </b>/**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */<b> unsigned chainLog; </b>/**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */<b>
unsigned hashLog; </b>/**< dispatch table : larger == faster, more memory */<b> unsigned hashLog; </b>/**< dispatch table : larger == faster, more memory */<b>
unsigned searchLog; </b>/**< nb of searches : larger == more compression, slower */<b> unsigned searchLog; </b>/**< nb of searches : larger == more compression, slower */<b>
unsigned minMatch; </b>/**< match length searched : larger == faster decompression, sometimes less compression */<b> unsigned minMatch; </b>/**< match length searched : larger == faster decompression, sometimes less compression */<b>
unsigned targetLength; </b>/**< acceptable match size for optimal parser (only) : larger == more compression, slower */<b> unsigned targetLength; </b>/**< acceptable match size for optimal parser (only) : larger == more compression, slower */<b>
ZSTD_strategy strategy; </b>/**< see ZSTD_strategy definition above */<b> ZSTD_strategy strategy; </b>/**< see ZSTD_strategy definition above */<b>
} ZSTD_compressionParameters; } ZSTD_compressionParameters;

View File

@ -263,13 +263,39 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
return bounds; return bounds;
case ZSTD_p_targetLength: case ZSTD_p_targetLength:
bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;
bounds.upperBound = ZSTD_TARGETLENGTH_MAX;
return bounds;
case ZSTD_p_compressionStrategy: case ZSTD_p_compressionStrategy:
case ZSTD_p_format: bounds.lowerBound = (int)ZSTD_fast;
bounds.upperBound = (int)ZSTD_btultra;
return bounds;
case ZSTD_p_contentSizeFlag: case ZSTD_p_contentSizeFlag:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_p_checksumFlag: case ZSTD_p_checksumFlag:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_p_dictIDFlag: case ZSTD_p_dictIDFlag:
case ZSTD_p_forceMaxWindow : bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_p_nbWorkers: case ZSTD_p_nbWorkers:
bounds.lowerBound = 0;
#ifdef ZSTD_MULTITHREAD
bounds.upperBound = ZSTDMT_NBWORKERS_MAX;
#else
bounds.upperBound = 0;
#endif
return bounds;
case ZSTD_p_jobSize: case ZSTD_p_jobSize:
case ZSTD_p_overlapSizeLog: case ZSTD_p_overlapSizeLog:
case ZSTD_p_rsyncable: case ZSTD_p_rsyncable:
@ -278,6 +304,13 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
case ZSTD_p_ldmMinMatch: case ZSTD_p_ldmMinMatch:
case ZSTD_p_ldmBucketSizeLog: case ZSTD_p_ldmBucketSizeLog:
case ZSTD_p_ldmHashEveryLog: case ZSTD_p_ldmHashEveryLog:
/* experimental parameters */
case ZSTD_p_forceMaxWindow :
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_p_format:
case ZSTD_p_forceAttachDict: case ZSTD_p_forceAttachDict:
default: default:
{ ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 };

View File

@ -10,7 +10,6 @@
/* ====== Tuning parameters ====== */ /* ====== Tuning parameters ====== */
#define ZSTDMT_NBWORKERS_MAX 200
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (2 GB)) /* note : limited by `jobSize` type, which is `unsigned` */ #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (2 GB)) /* note : limited by `jobSize` type, which is `unsigned` */
#define ZSTDMT_OVERLAPLOG_DEFAULT 6 #define ZSTDMT_OVERLAPLOG_DEFAULT 6

View File

@ -28,6 +28,10 @@
#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */ #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
/* === Constants === */
#define ZSTDMT_NBWORKERS_MAX 200
/* === Memory management === */ /* === Memory management === */
typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers); ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);

View File

@ -255,7 +255,7 @@ static dictItem ZDICT_analyzePos(
} }
{ int i; { int i;
U32 searchLength; U32 mml;
U32 refinedStart = start; U32 refinedStart = start;
U32 refinedEnd = end; U32 refinedEnd = end;
@ -263,7 +263,7 @@ static dictItem ZDICT_analyzePos(
DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u ", (U32)(end-start), MINMATCHLENGTH, (U32)pos); DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u ", (U32)(end-start), MINMATCHLENGTH, (U32)pos);
DISPLAYLEVEL(4, "\n"); DISPLAYLEVEL(4, "\n");
for (searchLength = MINMATCHLENGTH ; ; searchLength++) { for (mml = MINMATCHLENGTH ; ; mml++) {
BYTE currentChar = 0; BYTE currentChar = 0;
U32 currentCount = 0; U32 currentCount = 0;
U32 currentID = refinedStart; U32 currentID = refinedStart;
@ -271,13 +271,13 @@ static dictItem ZDICT_analyzePos(
U32 selectedCount = 0; U32 selectedCount = 0;
U32 selectedID = currentID; U32 selectedID = currentID;
for (id =refinedStart; id < refinedEnd; id++) { for (id =refinedStart; id < refinedEnd; id++) {
if (b[suffix[id] + searchLength] != currentChar) { if (b[suffix[id] + mml] != currentChar) {
if (currentCount > selectedCount) { if (currentCount > selectedCount) {
selectedCount = currentCount; selectedCount = currentCount;
selectedID = currentID; selectedID = currentID;
} }
currentID = id; currentID = id;
currentChar = b[ suffix[id] + searchLength]; currentChar = b[ suffix[id] + mml];
currentCount = 0; currentCount = 0;
} }
currentCount ++; currentCount ++;

View File

@ -895,22 +895,22 @@ ZSTDLIB_API size_t ZSTD_decompress_generic(ZSTD_DCtx* dctx,
* This limit does not apply to one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */ * This limit does not apply to one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */
/* compression parameter bounds */ /* compression parameter bounds */
#define ZSTD_WINDOWLOG_MAX_32 30 #define ZSTD_WINDOWLOG_MAX_32 30
#define ZSTD_WINDOWLOG_MAX_64 31 #define ZSTD_WINDOWLOG_MAX_64 31
#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) #define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
#define ZSTD_WINDOWLOG_MIN 10 #define ZSTD_WINDOWLOG_MIN 10
#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) #define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
#define ZSTD_HASHLOG_MIN 6 #define ZSTD_HASHLOG_MIN 6
#define ZSTD_CHAINLOG_MAX_32 29 #define ZSTD_CHAINLOG_MAX_32 29
#define ZSTD_CHAINLOG_MAX_64 30 #define ZSTD_CHAINLOG_MAX_64 30
#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) #define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN #define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN
#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) #define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1)
#define ZSTD_SEARCHLOG_MIN 1 #define ZSTD_SEARCHLOG_MIN 1
#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ #define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */
#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ #define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */
#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX #define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX
#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ #define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */
/* LDM parameter bounds */ /* LDM parameter bounds */
#define ZSTD_LDM_MINMATCH_MAX 4096 #define ZSTD_LDM_MINMATCH_MAX 4096
@ -927,7 +927,7 @@ typedef struct {
unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
unsigned hashLog; /**< dispatch table : larger == faster, more memory */ unsigned hashLog; /**< dispatch table : larger == faster, more memory */
unsigned searchLog; /**< nb of searches : larger == more compression, slower */ unsigned searchLog; /**< nb of searches : larger == more compression, slower */
unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */ unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */
unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */
ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */ ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */
} ZSTD_compressionParameters; } ZSTD_compressionParameters;

View File

@ -41,7 +41,7 @@ Additional remarks:
The example usage with two test files, one e-mail address, and with an additional message: The example usage with two test files, one e-mail address, and with an additional message:
``` ```
./test-zstd-speed.py "silesia.tar calgary.tar" "email@gmail.com" --message "tested on my laptop" --sleepTime 60 ./test-zstd-speed.py "silesia.tar calgary.tar" "email@gmail.com" --message "tested on my laptop" --sleepTime 60
``` ```
To run the script in background please use: To run the script in background please use:
``` ```
@ -100,19 +100,19 @@ Full list of arguments
h# - hashLog h# - hashLog
c# - chainLog c# - chainLog
s# - searchLog s# - searchLog
l# - searchLength l# - minMatch
t# - targetLength t# - targetLength
S# - strategy S# - strategy
L# - level L# - level
--zstd= : Single run, parameter selection syntax same as zstdcli with more parameters --zstd= : Single run, parameter selection syntax same as zstdcli with more parameters
(Added forceAttachDictionary / fadt) (Added forceAttachDictionary / fadt)
When invoked with --optimize, this represents the sample to exceed. When invoked with --optimize, this represents the sample to exceed.
--optimize= : find parameters to maximize compression ratio given parameters --optimize= : find parameters to maximize compression ratio given parameters
Can use all --zstd= commands to constrain the type of solution found in addition to the following constraints Can use all --zstd= commands to constrain the type of solution found in addition to the following constraints
cSpeed= : Minimum compression speed cSpeed= : Minimum compression speed
dSpeed= : Minimum decompression speed dSpeed= : Minimum decompression speed
cMem= : Maximum compression memory cMem= : Maximum compression memory
lvl= : Searches for solutions which are strictly better than that compression lvl in ratio and cSpeed, lvl= : Searches for solutions which are strictly better than that compression lvl in ratio and cSpeed,
stc= : When invoked with lvl=, represents percentage slack in ratio/cSpeed allowed for a solution to be considered (Default 100%) stc= : When invoked with lvl=, represents percentage slack in ratio/cSpeed allowed for a solution to be considered (Default 100%)
: In normal operation, represents percentage slack in choosing viable starting strategy selection in choosing the default parameters : In normal operation, represents percentage slack in choosing viable starting strategy selection in choosing the default parameters
(Lower value will begin with stronger strategies) (Default 90%) (Lower value will begin with stronger strategies) (Default 90%)
@ -121,13 +121,13 @@ Full list of arguments
when determining overall winner (default 5 (1% ratio = 5% speed)). when determining overall winner (default 5 (1% ratio = 5% speed)).
tries= : Maximum number of random restarts on a single strategy before switching (Default 5) tries= : Maximum number of random restarts on a single strategy before switching (Default 5)
Higher values will make optimizer run longer, more chances to find better solution. Higher values will make optimizer run longer, more chances to find better solution.
memLog : Limits the log of the size of each memotable (1 per strategy). Will use hash tables when state space is larger than max size. memLog : Limits the log of the size of each memotable (1 per strategy). Will use hash tables when state space is larger than max size.
Setting memLog = 0 turns off memoization Setting memLog = 0 turns off memoization
--display= : specifiy which parameters are included in the output --display= : specifiy which parameters are included in the output
can use all --zstd parameter names and 'cParams' as a shorthand for all parameters used in ZSTD_compressionParameters can use all --zstd parameter names and 'cParams' as a shorthand for all parameters used in ZSTD_compressionParameters
(Default: display all params available) (Default: display all params available)
-P# : generated sample compressibility (when no file is provided) -P# : generated sample compressibility (when no file is provided)
-t# : Caps runtime of operation in seconds (default : 99999 seconds (about 27 hours )) -t# : Caps runtime of operation in seconds (default : 99999 seconds (about 27 hours ))
-v : Prints Benchmarking output -v : Prints Benchmarking output
-D : Next argument dictionary file -D : Next argument dictionary file
-s : Benchmark all files separately -s : Benchmark all files separately

View File

@ -32,8 +32,8 @@ ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state)
cParams.hashLog = FUZZ_rand32(state, ZSTD_HASHLOG_MIN, 15); cParams.hashLog = FUZZ_rand32(state, ZSTD_HASHLOG_MIN, 15);
cParams.chainLog = FUZZ_rand32(state, ZSTD_CHAINLOG_MIN, 16); cParams.chainLog = FUZZ_rand32(state, ZSTD_CHAINLOG_MIN, 16);
cParams.searchLog = FUZZ_rand32(state, ZSTD_SEARCHLOG_MIN, 9); cParams.searchLog = FUZZ_rand32(state, ZSTD_SEARCHLOG_MIN, 9);
cParams.searchLength = FUZZ_rand32(state, ZSTD_SEARCHLENGTH_MIN, cParams.minMatch = FUZZ_rand32(state, ZSTD_MINMATCH_MIN,
ZSTD_SEARCHLENGTH_MAX); ZSTD_MINMATCH_MAX);
cParams.targetLength = FUZZ_rand32(state, 0, 512); cParams.targetLength = FUZZ_rand32(state, 0, 512);
cParams.strategy = FUZZ_rand32(state, ZSTD_fast, ZSTD_btultra); cParams.strategy = FUZZ_rand32(state, ZSTD_fast, ZSTD_btultra);
return ZSTD_adjustCParams(cParams, srcSize, 0); return ZSTD_adjustCParams(cParams, srcSize, 0);
@ -64,7 +64,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state)
set(cctx, ZSTD_p_hashLog, cParams.hashLog); set(cctx, ZSTD_p_hashLog, cParams.hashLog);
set(cctx, ZSTD_p_chainLog, cParams.chainLog); set(cctx, ZSTD_p_chainLog, cParams.chainLog);
set(cctx, ZSTD_p_searchLog, cParams.searchLog); set(cctx, ZSTD_p_searchLog, cParams.searchLog);
set(cctx, ZSTD_p_minMatch, cParams.searchLength); set(cctx, ZSTD_p_minMatch, cParams.minMatch);
set(cctx, ZSTD_p_targetLength, cParams.targetLength); set(cctx, ZSTD_p_targetLength, cParams.targetLength);
set(cctx, ZSTD_p_compressionStrategy, cParams.strategy); set(cctx, ZSTD_p_compressionStrategy, cParams.strategy);
/* Select frame parameters */ /* Select frame parameters */

View File

@ -50,7 +50,7 @@ int main(int argc, const char** argv)
params.cParams.chainLog = 13; params.cParams.chainLog = 13;
params.cParams.hashLog = 14; params.cParams.hashLog = 14;
params.cParams.searchLog = 1; params.cParams.searchLog = 1;
params.cParams.searchLength = 7; params.cParams.minMatch = 7;
params.cParams.targetLength = 16; params.cParams.targetLength = 16;
params.cParams.strategy = ZSTD_fast; params.cParams.strategy = ZSTD_fast;
windowLog = params.cParams.windowLog; windowLog = params.cParams.windowLog;

View File

@ -140,8 +140,8 @@ static int ZWRAP_initializeCStream(ZWRAP_CCtx* zwc, const void* dict, size_t dic
if (!pledgedSrcSize) pledgedSrcSize = zwc->pledgedSrcSize; if (!pledgedSrcSize) pledgedSrcSize = zwc->pledgedSrcSize;
{ ZSTD_parameters const params = ZSTD_getParams(zwc->compressionLevel, pledgedSrcSize, dictSize); { ZSTD_parameters const params = ZSTD_getParams(zwc->compressionLevel, pledgedSrcSize, dictSize);
size_t initErr; size_t initErr;
LOG_WRAPPERC("pledgedSrcSize=%d windowLog=%d chainLog=%d hashLog=%d searchLog=%d searchLength=%d strategy=%d\n", LOG_WRAPPERC("pledgedSrcSize=%d windowLog=%d chainLog=%d hashLog=%d searchLog=%d minMatch=%d strategy=%d\n",
(int)pledgedSrcSize, params.cParams.windowLog, params.cParams.chainLog, params.cParams.hashLog, params.cParams.searchLog, params.cParams.searchLength, params.cParams.strategy); (int)pledgedSrcSize, params.cParams.windowLog, params.cParams.chainLog, params.cParams.hashLog, params.cParams.searchLog, params.cParams.minMatch, params.cParams.strategy);
initErr = ZSTD_initCStream_advanced(zwc->zbc, dict, dictSize, params, pledgedSrcSize); initErr = ZSTD_initCStream_advanced(zwc->zbc, dict, dictSize, params, pledgedSrcSize);
if (ZSTD_isError(initErr)) return Z_STREAM_ERROR; if (ZSTD_isError(initErr)) return Z_STREAM_ERROR;
} }