fixed remaining searchLength invocations

This commit is contained in:
Yann Collet 2018-11-20 15:13:27 -08:00
parent e874dacc08
commit 2e7fd6a2cb
10 changed files with 76 additions and 40 deletions

View File

@ -772,7 +772,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
unsigned chainLog; </b>/**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */<b>
unsigned hashLog; </b>/**< dispatch table : larger == faster, more memory */<b>
unsigned searchLog; </b>/**< nb of searches : larger == more compression, slower */<b>
unsigned minMatch; </b>/**< match length searched : larger == faster decompression, sometimes less compression */<b>
unsigned minMatch; </b>/**< match length searched : larger == faster decompression, sometimes less compression */<b>
unsigned targetLength; </b>/**< acceptable match size for optimal parser (only) : larger == more compression, slower */<b>
ZSTD_strategy strategy; </b>/**< see ZSTD_strategy definition above */<b>
} ZSTD_compressionParameters;

View File

@ -263,13 +263,39 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
return bounds;
case ZSTD_p_targetLength:
bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;
bounds.upperBound = ZSTD_TARGETLENGTH_MAX;
return bounds;
case ZSTD_p_compressionStrategy:
case ZSTD_p_format:
bounds.lowerBound = (int)ZSTD_fast;
bounds.upperBound = (int)ZSTD_btultra;
return bounds;
case ZSTD_p_contentSizeFlag:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_p_checksumFlag:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_p_dictIDFlag:
case ZSTD_p_forceMaxWindow :
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_p_nbWorkers:
bounds.lowerBound = 0;
#ifdef ZSTD_MULTITHREAD
bounds.upperBound = ZSTDMT_NBWORKERS_MAX;
#else
bounds.upperBound = 0;
#endif
return bounds;
case ZSTD_p_jobSize:
case ZSTD_p_overlapSizeLog:
case ZSTD_p_rsyncable:
@ -278,6 +304,13 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
case ZSTD_p_ldmMinMatch:
case ZSTD_p_ldmBucketSizeLog:
case ZSTD_p_ldmHashEveryLog:
/* experimental parameters */
case ZSTD_p_forceMaxWindow :
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_p_format:
case ZSTD_p_forceAttachDict:
default:
{ ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 };

View File

@ -10,7 +10,6 @@
/* ====== Tuning parameters ====== */
#define ZSTDMT_NBWORKERS_MAX 200
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (2 GB)) /* note : limited by `jobSize` type, which is `unsigned` */
#define ZSTDMT_OVERLAPLOG_DEFAULT 6

View File

@ -28,6 +28,10 @@
#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
/* === Constants === */
#define ZSTDMT_NBWORKERS_MAX 200
/* === Memory management === */
typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);

View File

@ -255,7 +255,7 @@ static dictItem ZDICT_analyzePos(
}
{ int i;
U32 searchLength;
U32 mml;
U32 refinedStart = start;
U32 refinedEnd = end;
@ -263,7 +263,7 @@ static dictItem ZDICT_analyzePos(
DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u ", (U32)(end-start), MINMATCHLENGTH, (U32)pos);
DISPLAYLEVEL(4, "\n");
for (searchLength = MINMATCHLENGTH ; ; searchLength++) {
for (mml = MINMATCHLENGTH ; ; mml++) {
BYTE currentChar = 0;
U32 currentCount = 0;
U32 currentID = refinedStart;
@ -271,13 +271,13 @@ static dictItem ZDICT_analyzePos(
U32 selectedCount = 0;
U32 selectedID = currentID;
for (id =refinedStart; id < refinedEnd; id++) {
if (b[suffix[id] + searchLength] != currentChar) {
if (b[suffix[id] + mml] != currentChar) {
if (currentCount > selectedCount) {
selectedCount = currentCount;
selectedID = currentID;
}
currentID = id;
currentChar = b[ suffix[id] + searchLength];
currentChar = b[ suffix[id] + mml];
currentCount = 0;
}
currentCount ++;

View File

@ -895,22 +895,22 @@ ZSTDLIB_API size_t ZSTD_decompress_generic(ZSTD_DCtx* dctx,
* This limit does not apply to one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */
/* compression parameter bounds */
#define ZSTD_WINDOWLOG_MAX_32 30
#define ZSTD_WINDOWLOG_MAX_64 31
#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
#define ZSTD_WINDOWLOG_MIN 10
#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
#define ZSTD_HASHLOG_MIN 6
#define ZSTD_CHAINLOG_MAX_32 29
#define ZSTD_CHAINLOG_MAX_64 30
#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN
#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1)
#define ZSTD_SEARCHLOG_MIN 1
#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */
#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */
#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX
#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */
#define ZSTD_WINDOWLOG_MAX_32 30
#define ZSTD_WINDOWLOG_MAX_64 31
#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
#define ZSTD_WINDOWLOG_MIN 10
#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
#define ZSTD_HASHLOG_MIN 6
#define ZSTD_CHAINLOG_MAX_32 29
#define ZSTD_CHAINLOG_MAX_64 30
#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN
#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1)
#define ZSTD_SEARCHLOG_MIN 1
#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */
#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */
#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX
#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */
/* LDM parameter bounds */
#define ZSTD_LDM_MINMATCH_MAX 4096
@ -927,7 +927,7 @@ typedef struct {
unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
unsigned hashLog; /**< dispatch table : larger == faster, more memory */
unsigned searchLog; /**< nb of searches : larger == more compression, slower */
unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */
unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */
unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */
ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */
} ZSTD_compressionParameters;

View File

@ -41,7 +41,7 @@ Additional remarks:
The example usage with two test files, one e-mail address, and with an additional message:
```
./test-zstd-speed.py "silesia.tar calgary.tar" "email@gmail.com" --message "tested on my laptop" --sleepTime 60
```
```
To run the script in background please use:
```
@ -100,19 +100,19 @@ Full list of arguments
h# - hashLog
c# - chainLog
s# - searchLog
l# - searchLength
l# - minMatch
t# - targetLength
S# - strategy
L# - level
--zstd= : Single run, parameter selection syntax same as zstdcli with more parameters
(Added forceAttachDictionary / fadt)
When invoked with --optimize, this represents the sample to exceed.
(Added forceAttachDictionary / fadt)
When invoked with --optimize, this represents the sample to exceed.
--optimize= : find parameters to maximize compression ratio given parameters
Can use all --zstd= commands to constrain the type of solution found in addition to the following constraints
cSpeed= : Minimum compression speed
dSpeed= : Minimum decompression speed
cMem= : Maximum compression memory
lvl= : Searches for solutions which are strictly better than that compression lvl in ratio and cSpeed,
lvl= : Searches for solutions which are strictly better than that compression lvl in ratio and cSpeed,
stc= : When invoked with lvl=, represents percentage slack in ratio/cSpeed allowed for a solution to be considered (Default 100%)
: In normal operation, represents percentage slack in choosing viable starting strategy selection in choosing the default parameters
(Lower value will begin with stronger strategies) (Default 90%)
@ -121,13 +121,13 @@ Full list of arguments
when determining overall winner (default 5 (1% ratio = 5% speed)).
tries= : Maximum number of random restarts on a single strategy before switching (Default 5)
Higher values will make optimizer run longer, more chances to find better solution.
memLog : Limits the log of the size of each memotable (1 per strategy). Will use hash tables when state space is larger than max size.
Setting memLog = 0 turns off memoization
memLog : Limits the log of the size of each memotable (1 per strategy). Will use hash tables when state space is larger than max size.
Setting memLog = 0 turns off memoization
--display= : specifiy which parameters are included in the output
can use all --zstd parameter names and 'cParams' as a shorthand for all parameters used in ZSTD_compressionParameters
can use all --zstd parameter names and 'cParams' as a shorthand for all parameters used in ZSTD_compressionParameters
(Default: display all params available)
-P# : generated sample compressibility (when no file is provided)
-t# : Caps runtime of operation in seconds (default : 99999 seconds (about 27 hours ))
-t# : Caps runtime of operation in seconds (default : 99999 seconds (about 27 hours ))
-v : Prints Benchmarking output
-D : Next argument dictionary file
-s : Benchmark all files separately

View File

@ -32,8 +32,8 @@ ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, uint32_t *state)
cParams.hashLog = FUZZ_rand32(state, ZSTD_HASHLOG_MIN, 15);
cParams.chainLog = FUZZ_rand32(state, ZSTD_CHAINLOG_MIN, 16);
cParams.searchLog = FUZZ_rand32(state, ZSTD_SEARCHLOG_MIN, 9);
cParams.searchLength = FUZZ_rand32(state, ZSTD_SEARCHLENGTH_MIN,
ZSTD_SEARCHLENGTH_MAX);
cParams.minMatch = FUZZ_rand32(state, ZSTD_MINMATCH_MIN,
ZSTD_MINMATCH_MAX);
cParams.targetLength = FUZZ_rand32(state, 0, 512);
cParams.strategy = FUZZ_rand32(state, ZSTD_fast, ZSTD_btultra);
return ZSTD_adjustCParams(cParams, srcSize, 0);
@ -64,7 +64,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state)
set(cctx, ZSTD_p_hashLog, cParams.hashLog);
set(cctx, ZSTD_p_chainLog, cParams.chainLog);
set(cctx, ZSTD_p_searchLog, cParams.searchLog);
set(cctx, ZSTD_p_minMatch, cParams.searchLength);
set(cctx, ZSTD_p_minMatch, cParams.minMatch);
set(cctx, ZSTD_p_targetLength, cParams.targetLength);
set(cctx, ZSTD_p_compressionStrategy, cParams.strategy);
/* Select frame parameters */

View File

@ -50,7 +50,7 @@ int main(int argc, const char** argv)
params.cParams.chainLog = 13;
params.cParams.hashLog = 14;
params.cParams.searchLog = 1;
params.cParams.searchLength = 7;
params.cParams.minMatch = 7;
params.cParams.targetLength = 16;
params.cParams.strategy = ZSTD_fast;
windowLog = params.cParams.windowLog;

View File

@ -140,8 +140,8 @@ static int ZWRAP_initializeCStream(ZWRAP_CCtx* zwc, const void* dict, size_t dic
if (!pledgedSrcSize) pledgedSrcSize = zwc->pledgedSrcSize;
{ ZSTD_parameters const params = ZSTD_getParams(zwc->compressionLevel, pledgedSrcSize, dictSize);
size_t initErr;
LOG_WRAPPERC("pledgedSrcSize=%d windowLog=%d chainLog=%d hashLog=%d searchLog=%d searchLength=%d strategy=%d\n",
(int)pledgedSrcSize, params.cParams.windowLog, params.cParams.chainLog, params.cParams.hashLog, params.cParams.searchLog, params.cParams.searchLength, params.cParams.strategy);
LOG_WRAPPERC("pledgedSrcSize=%d windowLog=%d chainLog=%d hashLog=%d searchLog=%d minMatch=%d strategy=%d\n",
(int)pledgedSrcSize, params.cParams.windowLog, params.cParams.chainLog, params.cParams.hashLog, params.cParams.searchLog, params.cParams.minMatch, params.cParams.strategy);
initErr = ZSTD_initCStream_advanced(zwc->zbc, dict, dictSize, params, pledgedSrcSize);
if (ZSTD_isError(initErr)) return Z_STREAM_ERROR;
}