From 17c19fbbb5930beb470b3db3afdafeb822d750db Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 11 May 2018 17:32:26 -0700 Subject: [PATCH 1/7] generalized use of readU32FromChar() and check input overflow --- tests/paramgrill.c | 143 ++++++++++++++++++++++++--------------------- 1 file changed, 76 insertions(+), 67 deletions(-) diff --git a/tests/paramgrill.c b/tests/paramgrill.c index 13b102b2..13ab7a28 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -18,6 +18,7 @@ #include /* strcmp */ #include /* log */ #include +#include #include "mem.h" #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_estimateCCtxSize */ @@ -32,7 +33,7 @@ **************************************/ #define PROGRAM_DESCRIPTION "ZSTD parameters tester" #define AUTHOR "Yann Collet" -#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s (%s) ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION_STRING, (int)(sizeof(void*)*8), AUTHOR, __DATE__ +#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION_STRING, (int)(sizeof(void*)*8), AUTHOR #define KB *(1<<10) @@ -47,7 +48,6 @@ static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); #define COMPRESSIBILITY_DEFAULT 0.50 -static const size_t sampleSize = 10000000; static const double g_grillDuration_s = 90000; /* about 24 hours */ static const U64 g_maxParamTime = 15 * SEC_TO_MICRO; @@ -566,18 +566,17 @@ static void BMK_selectRandomStart( { U32 const id = (FUZ_rand(&g_rand) % (ZSTD_maxCLevel()+1)); if ((id==0) || (winners[id].params.windowLog==0)) { - /* totally random entry */ + /* use some random entry */ ZSTD_compressionParameters const p = ZSTD_adjustCParams(randomParams(), srcSize, 0); playAround(f, winners, p, srcBuffer, srcSize, ctx); - } - else + } else { playAround(f, winners, winners[id].params, srcBuffer, srcSize, ctx); + } } -static void BMK_benchMem(void* srcBuffer, size_t srcSize) +static void BMK_benchMem_usingCCtx(ZSTD_CCtx* cctx, const void* srcBuffer, size_t srcSize) { - ZSTD_CCtx* const ctx = ZSTD_createCCtx(); ZSTD_compressionParameters params; winnerInfo_t winners[NB_LEVELS_TRACKED]; const char* const rfName = "grillResults.txt"; @@ -585,25 +584,24 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize) const size_t blockSize = g_blockSize ? g_blockSize : srcSize; /* init */ - if (ctx==NULL) { DISPLAY("ZSTD_createCCtx() failed \n"); exit(1); } memset(winners, 0, sizeof(winners)); if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); } if (g_singleRun) { BMK_result_t testResult; g_params = ZSTD_adjustCParams(g_params, srcSize, 0); - BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, g_params); + BMK_benchParam(&testResult, srcBuffer, srcSize, cctx, g_params); DISPLAY("\n"); return; } - if (g_target) + if (g_target) { g_cSpeedTarget[1] = g_target * 1000000; - else { + } else { /* baseline config for level 1 */ BMK_result_t testResult; params = ZSTD_getCParams(1, blockSize, 0); - BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, params); + BMK_benchParam(&testResult, srcBuffer, srcSize, cctx, params); g_cSpeedTarget[1] = (testResult.cSpeed * 31) / 32; } @@ -618,14 +616,14 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize) int i; for (i=0; i<=maxSeeds; i++) { params = ZSTD_getCParams(i, blockSize, 0); - BMK_seed(winners, params, srcBuffer, srcSize, ctx); + BMK_seed(winners, params, srcBuffer, srcSize, cctx); } } BMK_printWinners(f, winners, srcSize); /* start tests */ { const time_t grillStart = time(NULL); do { - BMK_selectRandomStart(f, winners, srcBuffer, srcSize, ctx); + BMK_selectRandomStart(f, winners, srcBuffer, srcSize, cctx); } while (BMK_timeSpan(grillStart) < g_grillDuration_s); } @@ -635,19 +633,24 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize) /* clean up*/ fclose(f); - ZSTD_freeCCtx(ctx); +} + +static void BMK_benchMem(const void* srcBuffer, size_t srcSize) +{ + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + if (cctx==NULL) { DISPLAY("ZSTD_createCCtx() failed \n"); exit(1); } + BMK_benchMem_usingCCtx(cctx, srcBuffer, srcSize); + ZSTD_freeCCtx(cctx); } static int benchSample(void) { - void* origBuff; - size_t const benchedSize = sampleSize; - const char* const name = "Sample 10MiB"; + const char* const name = "Sample 10MB"; + size_t const benchedSize = 10000000; - /* Allocation */ - origBuff = malloc(benchedSize); - if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); return 12; } + void* origBuff = malloc(benchedSize); + if (!origBuff) { perror("not enough memory"); return 12; } /* Fill buffer */ RDG_genBuffer(origBuff, benchedSize, g_compressibility, 0.0, 0); @@ -662,6 +665,9 @@ static int benchSample(void) } +/* benchFiles() : + * note: while this function takes a table of filenames, + * in practice, only the first filename will be used */ int benchFiles(const char** fileNamesTable, int nbFiles) { int fileIdx=0; @@ -680,7 +686,7 @@ int benchFiles(const char** fileNamesTable, int nbFiles) return 11; } if (inFileSize == UTIL_FILESIZE_UNKNOWN) { - DISPLAY("Pb evaluatin size of %s \n", inFileName); + DISPLAY("Pb evaluating size of %s \n", inFileName); fclose(inFile); return 11; } @@ -843,6 +849,38 @@ int optimizeForSize(const char* inFileName, U32 targetSpeed) return 0; } +/*! readU32FromChar() : + * @return : unsigned integer value read from input in `char` format. + * allows and interprets K, KB, KiB, M, MB and MiB suffix. + * Will also modify `*stringPtr`, advancing it to position where it stopped reading. + * Note : function will exit() program if digit sequence overflows */ +static unsigned readU32FromChar(const char** stringPtr) +{ + unsigned result = 0; + while ((**stringPtr >='0') && (**stringPtr <='9')) { + unsigned const max = (((unsigned)(-1)) / 10) - 1; + if (result > max) { + DISPLAY("error: numeric value too large \n"); + exit(1); + } + result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; + } + if ((**stringPtr=='K') || (**stringPtr=='M')) { + unsigned const maxK = ((unsigned)(-1)) >> 10; + result <<= 10; + if (**stringPtr=='M') { + if (result > maxK) { + DISPLAY("error: numeric value too large \n"); + exit(1); + } + result <<= 10; + } + (*stringPtr)++; /* skip `K` or `M` */ + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + return result; +} static int usage(const char* exename) { @@ -893,12 +931,11 @@ int main(int argc, const char** argv) /* Welcome message */ DISPLAY(WELCOME_MESSAGE); - if (argc<1) { badusage(exename); return 1; } + if (argc<2) { assert(argc==1); badusage(exename); return 1; } for(i=1; i='0') & (argument[0] <='9')) - g_nbIterations = *argument++ - '0'; + g_nbIterations = readU32FromChar(&argument); break; /* Sample compressibility (when no file provided) */ case 'P': argument++; - { U32 proba32 = 0; - while ((argument[0]>= '0') & (argument[0]<= '9')) - proba32 = (proba32*10) + (*argument++ - '0'); + { U32 const proba32 = readU32FromChar(&argument); g_compressibility = (double)proba32 / 100.; } break; case 'O': argument++; - optimizer=1; - targetSpeed = 0; - while ((*argument >= '0') & (*argument <= '9')) - targetSpeed = (targetSpeed*10) + (*argument++ - '0'); + optimizer = 1; + targetSpeed = readU32FromChar(&argument); break; /* Run Single conf */ @@ -951,51 +983,35 @@ int main(int argc, const char** argv) switch(*argument) { case 'w': - g_params.windowLog = 0; argument++; - while ((*argument>= '0') && (*argument<='9')) - g_params.windowLog *= 10, g_params.windowLog += *argument++ - '0'; + g_params.windowLog = readU32FromChar(&argument); continue; case 'c': - g_params.chainLog = 0; argument++; - while ((*argument>= '0') && (*argument<='9')) - g_params.chainLog *= 10, g_params.chainLog += *argument++ - '0'; + g_params.chainLog = readU32FromChar(&argument); continue; case 'h': - g_params.hashLog = 0; argument++; - while ((*argument>= '0') && (*argument<='9')) - g_params.hashLog *= 10, g_params.hashLog += *argument++ - '0'; + g_params.hashLog = readU32FromChar(&argument); continue; case 's': - g_params.searchLog = 0; argument++; - while ((*argument>= '0') && (*argument<='9')) - g_params.searchLog *= 10, g_params.searchLog += *argument++ - '0'; + g_params.searchLog = readU32FromChar(&argument); continue; case 'l': /* search length */ - g_params.searchLength = 0; argument++; - while ((*argument>= '0') && (*argument<='9')) - g_params.searchLength *= 10, g_params.searchLength += *argument++ - '0'; + g_params.searchLength = readU32FromChar(&argument); continue; case 't': /* target length */ - g_params.targetLength = 0; argument++; - while ((*argument>= '0') && (*argument<='9')) - g_params.targetLength *= 10, g_params.targetLength += *argument++ - '0'; + g_params.targetLength = readU32FromChar(&argument); continue; case 'S': /* strategy */ argument++; - while ((*argument>= '0') && (*argument<='9')) - g_params.strategy = (ZSTD_strategy)(*argument++ - '0'); + g_params.strategy = readU32FromChar(&argument); continue; case 'L': - { int cLevel = 0; - argument++; - while ((*argument>= '0') && (*argument<='9')) - cLevel *= 10, cLevel += *argument++ - '0'; + { int const cLevel = readU32FromChar(&argument); g_params = ZSTD_getCParams(cLevel, g_blockSize, 0); continue; } @@ -1008,20 +1024,13 @@ int main(int argc, const char** argv) /* target level1 speed objective, in MB/s */ case 'T': argument++; - g_target = 0; - while ((*argument >= '0') && (*argument <= '9')) - g_target = (g_target*10) + (*argument++ - '0'); + g_target = readU32FromChar(&argument); break; /* cut input into blocks */ case 'B': - g_blockSize = 0; argument++; - while ((*argument >='0') & (*argument <='9')) - g_blockSize = (g_blockSize*10) + (*argument++ - '0'); - if (*argument=='K') g_blockSize<<=10, argument++; /* allows using KB notation */ - if (*argument=='M') g_blockSize<<=20, argument++; - if (*argument=='B') argument++; + g_blockSize = readU32FromChar(&argument); DISPLAY("using %u KB block size \n", g_blockSize>>10); break; From a3f2e84a37a1b8c228aae0a4cb1de629f12e070a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 11 May 2018 19:43:08 -0700 Subject: [PATCH 2/7] added programmable constraints --- tests/paramgrill.c | 119 ++++++++++++++++++++++++++++----------------- 1 file changed, 75 insertions(+), 44 deletions(-) diff --git a/tests/paramgrill.c b/tests/paramgrill.c index 13ab7a28..d318696e 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -43,8 +43,6 @@ #define NBLOOPS 2 #define TIMELOOP (2 * SEC_TO_MICRO) -#define NB_LEVELS_TRACKED 30 - static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); #define COMPRESSIBILITY_DEFAULT 0.50 @@ -150,10 +148,11 @@ typedef struct } blockParam_t; -static size_t BMK_benchParam(BMK_result_t* resultPtr, - const void* srcBuffer, size_t srcSize, - ZSTD_CCtx* ctx, - const ZSTD_compressionParameters cParams) +static size_t +BMK_benchParam(BMK_result_t* resultPtr, + const void* srcBuffer, size_t srcSize, + ZSTD_CCtx* ctx, + const ZSTD_compressionParameters cParams) { const size_t blockSize = g_blockSize ? g_blockSize : srcSize; const U32 nbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize); @@ -191,8 +190,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr, crcOrig = XXH64(srcBuffer, srcSize, 0); /* Init blockTable data */ - { - U32 i; + { U32 i; size_t remaining = srcSize; const char* srcPtr = (const char*)srcBuffer; char* cPtr = (char*)compressedBuffer; @@ -323,8 +321,6 @@ static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_compr } -static double g_cSpeedTarget[NB_LEVELS_TRACKED] = { 0. }; /* NB_LEVELS_TRACKED : checked at main() */ - typedef struct { BMK_result_t result; ZSTD_compressionParameters params; @@ -350,6 +346,36 @@ static void BMK_printWinners(FILE* f, const winnerInfo_t* winners, size_t srcSiz BMK_printWinners2(stdout, winners, srcSize); } + +typedef struct { + double cSpeed_min; + double dSpeed_min; + U32 windowLog_max; + ZSTD_strategy strategy_max; +} level_constraints_t; + +#define NB_LEVELS_TRACKED 23 +static level_constraints_t g_level_constraint[NB_LEVELS_TRACKED]; + +static void BMK_init_level_constraints(int bytePerSec_level1) +{ + assert(NB_LEVELS_TRACKED == ZSTD_maxCLevel()+1); + memset(g_level_constraint, 0, sizeof(g_level_constraint)); + g_level_constraint[1].cSpeed_min = bytePerSec_level1; + g_level_constraint[1].dSpeed_min = 0.; + g_level_constraint[1].windowLog_max = 19; + g_level_constraint[1].strategy_max = ZSTD_fast; + + /* establish speed objectives (relative to level 1) */ + { int l; + for (l=2; l= 20 may use windowlog > 23 */ + g_level_constraint[l].strategy_max = (l<19) ? ZSTD_btopt : ZSTD_btultra; /* level 19 is allowed to use btultra */ + } } +} + static int BMK_seed(winnerInfo_t* winners, const ZSTD_compressionParameters params, const void* srcBuffer, size_t srcSize, ZSTD_CCtx* ctx) @@ -360,9 +386,16 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_compressionParameters para BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, params); + for (cLevel = 1; cLevel <= ZSTD_maxCLevel(); cLevel++) { - if (testResult.cSpeed < g_cSpeedTarget[cLevel]) + if (testResult.cSpeed < g_level_constraint[cLevel].cSpeed_min) continue; /* not fast enough for this level */ + if (testResult.dSpeed < g_level_constraint[cLevel].dSpeed_min) + continue; /* not fast enough for this level */ + if (params.windowLog > g_level_constraint[cLevel].windowLog_max) + continue; /* too much memory for this level */ + if (params.strategy > g_level_constraint[cLevel].strategy_max) + continue; /* forbidden strategy for this level */ if (winners[cLevel].result.cSize==0) { /* first solution for this cLevel */ winners[cLevel].result = testResult; @@ -575,40 +608,36 @@ static void BMK_selectRandomStart( } -static void BMK_benchMem_usingCCtx(ZSTD_CCtx* cctx, const void* srcBuffer, size_t srcSize) +static void BMK_benchOnce(ZSTD_CCtx* cctx, const void* srcBuffer, size_t srcSize) +{ + BMK_result_t testResult; + g_params = ZSTD_adjustCParams(g_params, srcSize, 0); + BMK_benchParam(&testResult, srcBuffer, srcSize, cctx, g_params); + DISPLAY("\n"); + return; +} + +static void BMK_benchFullTable(ZSTD_CCtx* cctx, const void* srcBuffer, size_t srcSize) { ZSTD_compressionParameters params; winnerInfo_t winners[NB_LEVELS_TRACKED]; const char* const rfName = "grillResults.txt"; FILE* const f = fopen(rfName, "w"); - const size_t blockSize = g_blockSize ? g_blockSize : srcSize; + const size_t blockSize = g_blockSize ? g_blockSize : srcSize; /* cut by block or not ? */ /* init */ + assert(g_singleRun==0); memset(winners, 0, sizeof(winners)); if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); } - if (g_singleRun) { - BMK_result_t testResult; - g_params = ZSTD_adjustCParams(g_params, srcSize, 0); - BMK_benchParam(&testResult, srcBuffer, srcSize, cctx, g_params); - DISPLAY("\n"); - return; - } - if (g_target) { - g_cSpeedTarget[1] = g_target * 1000000; + BMK_init_level_constraints(g_target*1000000); } else { /* baseline config for level 1 */ + ZSTD_compressionParameters const l1params = ZSTD_getCParams(1, blockSize, 0); BMK_result_t testResult; - params = ZSTD_getCParams(1, blockSize, 0); - BMK_benchParam(&testResult, srcBuffer, srcSize, cctx, params); - g_cSpeedTarget[1] = (testResult.cSpeed * 31) / 32; - } - - /* establish speed objectives (relative to level 1) */ - { int i; - for (i=2; i<=ZSTD_maxCLevel(); i++) - g_cSpeedTarget[i] = (g_cSpeedTarget[i-1] * 25) / 32; + BMK_benchParam(&testResult, srcBuffer, srcSize, cctx, l1params); + BMK_init_level_constraints((int)((testResult.cSpeed * 31) / 32)); } /* populate initial solution */ @@ -635,6 +664,14 @@ static void BMK_benchMem_usingCCtx(ZSTD_CCtx* cctx, const void* srcBuffer, size_ fclose(f); } +static void BMK_benchMem_usingCCtx(ZSTD_CCtx* cctx, const void* srcBuffer, size_t srcSize) +{ + if (g_singleRun) + return BMK_benchOnce(cctx, srcBuffer, srcSize); + else + return BMK_benchFullTable(cctx, srcBuffer, srcSize); +} + static void BMK_benchMem(const void* srcBuffer, size_t srcSize) { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); @@ -922,17 +959,11 @@ int main(int argc, const char** argv) U32 main_pause = 0; U32 targetSpeed = 0; - /* checks */ - if (NB_LEVELS_TRACKED <= ZSTD_maxCLevel()) { - DISPLAY("Error : NB_LEVELS_TRACKED <= ZSTD_maxCLevel() \n"); - exit(1); - } + assert(argc>=1); /* for exename */ /* Welcome message */ DISPLAY(WELCOME_MESSAGE); - if (argc<2) { assert(argc==1); badusage(exename); return 1; } - for(i=1; i Date: Sat, 12 May 2018 09:40:04 -0700 Subject: [PATCH 3/7] paramgrill: subtle change in level spacing distance between levels is slightly increased to compensate for level 1 speed improvements and the will to have stronger level 19 extending the range of speed to cover. --- tests/paramgrill.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/paramgrill.c b/tests/paramgrill.c index d318696e..984fb308 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -47,7 +47,7 @@ static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t #define COMPRESSIBILITY_DEFAULT 0.50 -static const double g_grillDuration_s = 90000; /* about 24 hours */ +static const double g_grillDuration_s = 99999; /* about 27 hours */ static const U64 g_maxParamTime = 15 * SEC_TO_MICRO; static const U64 g_maxVariationTime = 60 * SEC_TO_MICRO; static const int g_maxNbVariations = 64; @@ -369,9 +369,9 @@ static void BMK_init_level_constraints(int bytePerSec_level1) /* establish speed objectives (relative to level 1) */ { int l; for (l=2; l= 20 may use windowlog > 23 */ + g_level_constraint[l].windowLog_max = (l<20) ? 23 : l+5; /* only --ultra levels >= 20 can use windowlog > 23 */ g_level_constraint[l].strategy_max = (l<19) ? ZSTD_btopt : ZSTD_btultra; /* level 19 is allowed to use btultra */ } } } From b824d213cbdeeed846e4085da33ab0e47ab7d227 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 12 May 2018 10:21:30 -0700 Subject: [PATCH 4/7] fix #1115 --- programs/zstd.1 | 36 +++++++++++++++++++++++++----------- programs/zstd.1.md | 14 +++++++------- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/programs/zstd.1 b/programs/zstd.1 index ccc6cacb..507933c9 100644 --- a/programs/zstd.1 +++ b/programs/zstd.1 @@ -1,5 +1,5 @@ . -.TH "ZSTD" "1" "2018-01-27" "zstd 1.3.4" "User Commands" +.TH "ZSTD" "1" "2018-05-12" "zstd 1.3.4" "User Commands" . .SH "NAME" \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files @@ -111,8 +111,16 @@ enables long distance matching with \fB#\fR \fBwindowLog\fR, if not \fB#\fR is n Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \fB\-\-memory=windowSize\fR needs to be passed to the decompressor\. . .TP +\fB\-\-fast[=#]\fR +switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\. +. +.TP \fB\-T#\fR, \fB\-\-threads=#\fR -Compress using \fB#\fR threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==256\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\. +Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==200\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\. +. +.TP +\fB\-\-single\-thread\fR +Does not spawn a thread for compression, use caller thread instead\. This is the only available mode when multithread support is disabled\. In this mode, compression is serialized with I/O\. (This is different from \fB\-T1\fR, which spawns 1 compression thread in parallel of I/O)\. Single\-thread mode also features lower memory usage\. . .TP \fB\-D file\fR @@ -335,13 +343,19 @@ The minimum \fIslen\fR is 3 and the maximum is 7\. . .TP \fBtargetLen\fR=\fItlen\fR, \fBtlen\fR=\fItlen\fR -Specify the minimum match length that causes a match finder to stop searching for better matches\. +The impact of this field vary depending on selected strategy\. . .IP -A larger minimum match length usually improves compression ratio but decreases compression speed\. This option is only used with strategies ZSTD_btopt and ZSTD_btultra\. +For ZSTD_btopt and ZSTD_btultra, it specifies the minimum match length that causes match finder to stop searching for better matches\. A larger \fBtargetLen\fR usually improves compression ratio but decreases compression speed\. . .IP -The minimum \fItlen\fR is 4 and the maximum is 999\. +For ZSTD_fast, it specifies the amount of data skipped between match sampling\. Impact is reversed : a larger \fBtargetLen\fR increases compression speed but decreases compression ratio\. +. +.IP +For all other strategies, this field has no impact\. +. +.IP +The minimum \fItlen\fR is 1 and the maximum is 999\. . .TP \fBoverlapLog\fR=\fIovlog\fR, \fBovlog\fR=\fIovlog\fR @@ -374,7 +388,7 @@ This option is ignored unless long distance matching is enabled\. Larger/very small values usually decrease compression ratio\. . .IP -The minumum \fIldmslen\fR is 4 and the maximum is 4096 (default: 64)\. +The minimum \fIldmslen\fR is 4 and the maximum is 4096 (default: 64)\. . .TP \fBldmBucketSizeLog\fR=\fIldmblog\fR, \fBldmblog\fR=\fIldmblog\fR @@ -402,14 +416,14 @@ Larger values will improve compression speed\. Deviating far from the default va .IP The default value is \fBwlog \- ldmhlog\fR\. . -.SS "\-B#:" -Select the size of each compression job\. This parameter is available only when multi\-threading is enabled\. Default value is \fB4 * windowSize\fR, which means it varies depending on compression level\. \fB\-B#\fR makes it possible to select a custom value\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 1 MB, or \fBoverlapSize\fR, whichever is largest\. -. .SS "Example" -The following parameters sets advanced compression options to those of predefined level 19 for files bigger than 256 KB: +The following parameters sets advanced compression options to something similar to predefined level 19 for files bigger than 256 KB: . .P -\fB\-\-zstd\fR=windowLog=23,chainLog=23,hashLog=22,searchLog=6,searchLength=3,targetLength=48,strategy=6 +\fB\-\-zstd\fR=wlog=23,clog=23,hlog=22,slog=6,slen=3,tlen=48,strat=6 +. +.SS "\-B#:" +Select the size of each compression job\. This parameter is available only when multi\-threading is enabled\. Default value is \fB4 * windowSize\fR, which means it varies depending on compression level\. \fB\-B#\fR makes it possible to select a custom value\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 1 MB, or \fBoverlapSize\fR, whichever is largest\. . .SH "BUGS" Report bugs at: https://github\.com/facebook/zstd/issues diff --git a/programs/zstd.1.md b/programs/zstd.1.md index 8a9d18da..22f7d042 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -392,7 +392,7 @@ The list of available _options_: Larger/very small values usually decrease compression ratio. - The minumum _ldmslen_ is 4 and the maximum is 4096 (default: 64). + The minimum _ldmslen_ is 4 and the maximum is 4096 (default: 64). - `ldmBucketSizeLog`=_ldmblog_, `ldmblog`=_ldmblog_: Specify the size of each bucket for the hash table used for long distance @@ -416,6 +416,12 @@ The list of available _options_: The default value is `wlog - ldmhlog`. +### Example +The following parameters sets advanced compression options to something +similar to predefined level 19 for files bigger than 256 KB: + +`--zstd`=wlog=23,clog=23,hlog=22,slog=6,slen=3,tlen=48,strat=6 + ### -B#: Select the size of each compression job. This parameter is available only when multi-threading is enabled. @@ -424,12 +430,6 @@ Default value is `4 * windowSize`, which means it varies depending on compressio Note that job size must respect a minimum value which is enforced transparently. This minimum is either 1 MB, or `overlapSize`, whichever is largest. -### Example -The following parameters sets advanced compression options to those of -predefined level 19 for files bigger than 256 KB: - -`--zstd`=windowLog=23,chainLog=23,hashLog=22,searchLog=6,searchLength=3,targetLength=48,strategy=6 - BUGS ---- Report bugs at: https://github.com/facebook/zstd/issues From 3f89cd108109d413fc576dca7b81fcbf5ff6d28e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 12 May 2018 12:34:34 -0700 Subject: [PATCH 5/7] minor : factor out errorOut() --- tests/paramgrill.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/paramgrill.c b/tests/paramgrill.c index 984fb308..7e4c6b75 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -886,6 +886,11 @@ int optimizeForSize(const char* inFileName, U32 targetSpeed) return 0; } +static void errorOut(const char* msg) +{ + DISPLAY("%s \n", msg); exit(1); +} + /*! readU32FromChar() : * @return : unsigned integer value read from input in `char` format. * allows and interprets K, KB, KiB, M, MB and MiB suffix. @@ -893,23 +898,19 @@ int optimizeForSize(const char* inFileName, U32 targetSpeed) * Note : function will exit() program if digit sequence overflows */ static unsigned readU32FromChar(const char** stringPtr) { + const char errorMsg[] = "error: numeric value too large"; unsigned result = 0; while ((**stringPtr >='0') && (**stringPtr <='9')) { unsigned const max = (((unsigned)(-1)) / 10) - 1; - if (result > max) { - DISPLAY("error: numeric value too large \n"); - exit(1); - } + if (result > max) errorOut(errorMsg); result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; } if ((**stringPtr=='K') || (**stringPtr=='M')) { unsigned const maxK = ((unsigned)(-1)) >> 10; + if (result > maxK) errorOut(errorMsg); result <<= 10; if (**stringPtr=='M') { - if (result > maxK) { - DISPLAY("error: numeric value too large \n"); - exit(1); - } + if (result > maxK) errorOut(errorMsg); result <<= 10; } (*stringPtr)++; /* skip `K` or `M` */ From 9cd5c63771a21c5769366e058d1d8bf1cea89970 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 12 May 2018 14:29:33 -0700 Subject: [PATCH 6/7] cli: control numeric argument overflow exit on overflow backported from paramgrill added associated test case --- programs/zstdcli.c | 22 ++++++++++++++++++---- tests/playTests.sh | 2 ++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 24488191..28bfdc53 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -219,20 +219,34 @@ static int exeNameMatch(const char* exeName, const char* test) (exeName[strlen(test)] == '\0' || exeName[strlen(test)] == '.'); } +static void errorOut(const char* msg) +{ + DISPLAY("%s \n", msg); exit(1); +} + /*! readU32FromChar() : * @return : unsigned integer value read from input in `char` format. * allows and interprets K, KB, KiB, M, MB and MiB suffix. * Will also modify `*stringPtr`, advancing it to position where it stopped reading. - * Note : function result can overflow if digit string > MAX_UINT */ + * Note : function will exit() program if digit sequence overflows */ static unsigned readU32FromChar(const char** stringPtr) { + const char errorMsg[] = "error: numeric value too large"; unsigned result = 0; - while ((**stringPtr >='0') && (**stringPtr <='9')) + while ((**stringPtr >='0') && (**stringPtr <='9')) { + unsigned const max = (((unsigned)(-1)) / 10) - 1; + if (result > max) errorOut(errorMsg); result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; + } if ((**stringPtr=='K') || (**stringPtr=='M')) { + unsigned const maxK = ((unsigned)(-1)) >> 10; + if (result > maxK) errorOut(errorMsg); result <<= 10; - if (**stringPtr=='M') result <<= 10; - (*stringPtr)++ ; + if (**stringPtr=='M') { + if (result > maxK) errorOut(errorMsg); + result <<= 10; + } + (*stringPtr)++; /* skip `K` or `M` */ if (**stringPtr=='i') (*stringPtr)++; if (**stringPtr=='B') (*stringPtr)++; } diff --git a/tests/playTests.sh b/tests/playTests.sh index c8e27f23..200de4bd 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -108,6 +108,8 @@ $ECHO "test : --fast aka negative compression levels" $ZSTD --fast -f tmp # == -1 $ZSTD --fast=3 -f tmp # == -3 $ZSTD --fast=200000 -f tmp # == no compression +$ECHO "test : too large numeric argument" +$ZSTD --fast=9999999999 -f tmp && die "should have refused numeric value" $ECHO "test : compress to stdout" $ZSTD tmp -c > tmpCompressed $ZSTD tmp --stdout > tmpCompressed # long command format From 2c392952f994bd05d130a62cc8fbe667e3fc2815 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 13 May 2018 17:25:53 -0700 Subject: [PATCH 7/7] paramgrill: use NB_LEVELS_TRACKED in loop make it easier to generate/track more levels than ZSTD_maxClevel() --- tests/paramgrill.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/paramgrill.c b/tests/paramgrill.c index 7e4c6b75..3172ab06 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -42,6 +42,7 @@ #define NBLOOPS 2 #define TIMELOOP (2 * SEC_TO_MICRO) +#define NB_LEVELS_TRACKED 22 /* ensured being >= ZSTD_maxCLevel() in BMK_init_level_constraints() */ static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); @@ -333,7 +334,7 @@ static void BMK_printWinners2(FILE* f, const winnerInfo_t* winners, size_t srcSi fprintf(f, "\n /* Proposed configurations : */ \n"); fprintf(f, " /* W, C, H, S, L, T, strat */ \n"); - for (cLevel=0; cLevel <= ZSTD_maxCLevel(); cLevel++) + for (cLevel=0; cLevel <= NB_LEVELS_TRACKED; cLevel++) BMK_printWinner(f, cLevel, winners[cLevel].result, winners[cLevel].params, srcSize); } @@ -354,12 +355,11 @@ typedef struct { ZSTD_strategy strategy_max; } level_constraints_t; -#define NB_LEVELS_TRACKED 23 -static level_constraints_t g_level_constraint[NB_LEVELS_TRACKED]; +static level_constraints_t g_level_constraint[NB_LEVELS_TRACKED+1]; static void BMK_init_level_constraints(int bytePerSec_level1) { - assert(NB_LEVELS_TRACKED == ZSTD_maxCLevel()+1); + assert(NB_LEVELS_TRACKED >= ZSTD_maxCLevel()); memset(g_level_constraint, 0, sizeof(g_level_constraint)); g_level_constraint[1].cSpeed_min = bytePerSec_level1; g_level_constraint[1].dSpeed_min = 0.; @@ -368,7 +368,7 @@ static void BMK_init_level_constraints(int bytePerSec_level1) /* establish speed objectives (relative to level 1) */ { int l; - for (l=2; l= 20 can use windowlog > 23 */ @@ -387,7 +387,7 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_compressionParameters para BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, params); - for (cLevel = 1; cLevel <= ZSTD_maxCLevel(); cLevel++) { + for (cLevel = 1; cLevel <= NB_LEVELS_TRACKED; cLevel++) { if (testResult.cSpeed < g_level_constraint[cLevel].cSpeed_min) continue; /* not fast enough for this level */ if (testResult.dSpeed < g_level_constraint[cLevel].dSpeed_min) @@ -597,7 +597,7 @@ static void BMK_selectRandomStart( const void* srcBuffer, size_t srcSize, ZSTD_CCtx* ctx) { - U32 const id = (FUZ_rand(&g_rand) % (ZSTD_maxCLevel()+1)); + U32 const id = FUZ_rand(&g_rand) % (NB_LEVELS_TRACKED+1); if ((id==0) || (winners[id].params.windowLog==0)) { /* use some random entry */ ZSTD_compressionParameters const p = ZSTD_adjustCParams(randomParams(), srcSize, 0); @@ -620,7 +620,7 @@ static void BMK_benchOnce(ZSTD_CCtx* cctx, const void* srcBuffer, size_t srcSize static void BMK_benchFullTable(ZSTD_CCtx* cctx, const void* srcBuffer, size_t srcSize) { ZSTD_compressionParameters params; - winnerInfo_t winners[NB_LEVELS_TRACKED]; + winnerInfo_t winners[NB_LEVELS_TRACKED+1]; const char* const rfName = "grillResults.txt"; FILE* const f = fopen(rfName, "w"); const size_t blockSize = g_blockSize ? g_blockSize : srcSize; /* cut by block or not ? */