From 6c51bf420c51246ec51a55f801f4853eaf167d8a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 24 Sep 2018 18:16:08 -0700 Subject: [PATCH] bounds for --adapt mode can supply min and max compression level through advanced command : --adapt=min=#,max=# --- programs/fileio.c | 18 +++++++++++++++++- programs/fileio.h | 31 ++++++++++++++++++++----------- programs/zstd.1.md | 3 ++- programs/zstdcli.c | 42 +++++++++++++++++++++++++++++++++++++++++- tests/playTests.sh | 14 ++++++++++---- 5 files changed, 90 insertions(+), 18 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index e4ad6c4f..53f72aa7 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -292,6 +292,20 @@ void FIO_setAdaptiveMode(unsigned adapt) { EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n"); g_adaptiveMode = adapt; } +static int g_minAdaptLevel = -50; /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */ +void FIO_setAdaptMin(int minCLevel) +{ +#ifndef ZSTD_NOCOMPRESS + assert(minCLevel >= ZSTD_minCLevel()); +#endif + g_minAdaptLevel = minCLevel; +} +static int g_maxAdaptLevel = 22; /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */ +void FIO_setAdaptMax(int maxCLevel) +{ + g_maxAdaptLevel = maxCLevel; +} + static U32 g_ldmFlag = 0; void FIO_setLdmFlag(unsigned ldmFlag) { g_ldmFlag = (ldmFlag>0); @@ -954,13 +968,15 @@ FIO_compressZstdFrame(const cRess_t* ressPtr, if (speedChange == slower) { DISPLAYLEVEL(6, "slower speed , higher compression \n") compressionLevel ++; - compressionLevel += (compressionLevel == 0); /* skip 0 */ if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel(); + if (compressionLevel > g_maxAdaptLevel) compressionLevel = g_maxAdaptLevel; + compressionLevel += (compressionLevel == 0); /* skip 0 */ ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, (unsigned)compressionLevel); } if (speedChange == faster) { DISPLAYLEVEL(6, "faster speed , lighter compression \n") compressionLevel --; + if (compressionLevel < g_minAdaptLevel) compressionLevel = g_minAdaptLevel; compressionLevel -= (compressionLevel == 0); /* skip 0 */ ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, (unsigned)compressionLevel); } diff --git a/programs/fileio.h b/programs/fileio.h index 94789929..4c7049cb 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -48,21 +48,23 @@ typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_xzCompression, FIO_ ***************************************/ void FIO_setCompressionType(FIO_compressionType_t compressionType); void FIO_overwriteMode(void); -void FIO_setNotificationLevel(unsigned level); -void FIO_setSparseWrite(unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ -void FIO_setDictIDFlag(unsigned dictIDFlag); -void FIO_setChecksumFlag(unsigned checksumFlag); -void FIO_setRemoveSrcFile(unsigned flag); -void FIO_setMemLimit(unsigned memLimit); -void FIO_setNbWorkers(unsigned nbWorkers); -void FIO_setBlockSize(unsigned blockSize); -void FIO_setOverlapLog(unsigned overlapLog); void FIO_setAdaptiveMode(unsigned adapt); +void FIO_setAdaptMin(int minCLevel); +void FIO_setAdaptMax(int maxCLevel); +void FIO_setBlockSize(unsigned blockSize); +void FIO_setChecksumFlag(unsigned checksumFlag); +void FIO_setDictIDFlag(unsigned dictIDFlag); +void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog); void FIO_setLdmFlag(unsigned ldmFlag); +void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog); void FIO_setLdmHashLog(unsigned ldmHashLog); void FIO_setLdmMinMatch(unsigned ldmMinMatch); -void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog); -void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog); +void FIO_setMemLimit(unsigned memLimit); +void FIO_setNbWorkers(unsigned nbWorkers); +void FIO_setNotificationLevel(unsigned level); +void FIO_setOverlapLog(unsigned overlapLog); +void FIO_setRemoveSrcFile(unsigned flag); +void FIO_setSparseWrite(unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ /*-************************************* @@ -79,6 +81,7 @@ int FIO_decompressFilename (const char* outfilename, const char* infilename, con int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel); + /*-************************************* * Multiple File functions ***************************************/ @@ -96,9 +99,15 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles const char* dictFileName); +/*-************************************* +* Advanced stuff (should actually be hosted elsewhere) +***************************************/ + /* custom crash signal handler */ void FIO_addAbortHandler(void); + + #if defined (__cplusplus) } #endif diff --git a/programs/zstd.1.md b/programs/zstd.1.md index fcc1944b..c0c04698 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -134,9 +134,10 @@ the last one takes effect. This mode is the only one available when multithread support is disabled. Single-thread mode features lower memory usage. Final compressed result is slightly different from `-T1`. -* `--adapt` : +* `--adapt[=min=#,max=#]` : `zstd` will dynamically adapt compression level to perceived I/O conditions. Compression level adaptation can be observed live by using command `-v`. + Adaptation can be constrained between supplied `min` and `max` levels. The feature works when combined with multi-threading and `--long` mode. It does not work with `--single-thread`. It sets window size to 8 MB by default (can be changed manually, see `wlog`). diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 9ace49c4..1545d1ca 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -138,8 +138,8 @@ static int usage_advanced(const char* programName) #ifndef ZSTD_NOCOMPRESS DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); - DISPLAY( "--adapt : automatically adapt compression level to I/O conditions \n"); DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1); + DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n"); #ifdef ZSTD_MULTITHREAD DISPLAY( " -T# : spawns # compression threads (default: 1, 0==# cores) \n"); DISPLAY( " -B# : select size of each job (default: 0==automatic) \n"); @@ -366,6 +366,30 @@ static ZDICT_fastCover_params_t defaultFastCoverParams(void) #endif +/** parseAdaptParameters() : + * reads adapt parameters from *stringPtr (e.g. "--zstd=min=1,max=19) and store them into adaptMinPtr and adaptMaxPtr. + * Both adaptMinPtr and adaptMaxPtr must be already allocated and correctly initialized. + * There is no guarantee that any of these values will be updated. + * @return 1 means that parsing was successful, + * @return 0 in case of malformed parameters + */ +static unsigned parseAdaptParameters(const char* stringPtr, int* adaptMinPtr, int* adaptMaxPtr) +{ + for ( ; ;) { + if (longCommandWArg(&stringPtr, "min=")) { *adaptMinPtr = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "max=")) { *adaptMaxPtr = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + DISPLAYLEVEL(4, "invalid compression parameter \n"); + return 0; + } + if (stringPtr[0] != 0) return 0; /* check the end of string */ + if (*adaptMinPtr > *adaptMaxPtr) { + DISPLAYLEVEL(4, "incoherent adaptation limits \n"); + return 0; + } + return 1; +} + + /** parseCompressionParameters() : * reads compression parameters from *stringPtr (e.g. "--zstd=wlog=23,clog=23,hlog=22,slog=6,slen=3,tlen=48,strat=6") into *params * @return 1 means that compression parameters were correct @@ -430,6 +454,15 @@ typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom #define CLEAN_RETURN(i) { operationResult = (i); goto _end; } +#ifdef ZSTD_NOCOMPRESS +/* symbols from compression library are not defined and should not be invoked */ +# define MINCLEVEL -50 +# define MAXCLEVEL 22 +#else +# define MINCLEVEL ZSTD_minCLevel() +# define MAXCLEVEL ZSTD_maxCLevel() +#endif + int main(int argCount, const char* argv[]) { int argNb, @@ -440,6 +473,8 @@ int main(int argCount, const char* argv[]) main_pause = 0, nbWorkers = 0, adapt = 0, + adaptMin = MINCLEVEL, + adaptMax = MAXCLEVEL, nextArgumentIsOutFileName = 0, nextArgumentIsMaxDict = 0, nextArgumentIsDictID = 0, @@ -559,6 +594,7 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; } if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; } if (!strcmp(argument, "--adapt")) { adapt = 1; continue; } + if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) CLEAN_RETURN(badusage(programName)); continue; } if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; } if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; FIO_setCompressionType(FIO_zstdCompression); continue; } #ifdef ZSTD_GZCOMPRESS @@ -1014,6 +1050,10 @@ int main(int argCount, const char* argv[]) if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) FIO_setLdmBucketSizeLog(g_ldmBucketSizeLog); if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) FIO_setLdmHashEveryLog(g_ldmHashEveryLog); FIO_setAdaptiveMode(adapt); + FIO_setAdaptMin(adaptMin); + FIO_setAdaptMax(adaptMax); + if (adaptMin > cLevel) cLevel = adaptMin; + if (adaptMax < cLevel) cLevel = adaptMax; if ((filenameIdx==1) && outFileName) operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, compressionParams); diff --git a/tests/playTests.sh b/tests/playTests.sh index 5898d183..e49b2344 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -103,6 +103,7 @@ else fi + $ECHO "\n===> simple tests " ./datagen > tmp @@ -811,11 +812,20 @@ roundTripTest -g1M -P50 "1 --single-thread --long=29" " --long=28 --memory=512MB roundTripTest -g1M -P50 "1 --single-thread --long=29" " --zstd=wlog=28 --memory=512MB" +$ECHO "\n===> adaptive mode " +roundTripTest -g270000000 " --adapt" +roundTripTest -g27000000 " --adapt=min=1,max=4" +./datagen > tmp +$ZSTD -f -vv --adapt=min=10,max=9 tmp && die "--adapt must fail on incoherent bounds" + + if [ "$1" != "--test-large-data" ]; then $ECHO "Skipping large data tests" exit 0 fi + + $ECHO "\n===> large files tests " roundTripTest -g270000000 1 @@ -858,10 +868,6 @@ roundTripTest -g700M -P50 "1 --single-thread --long=29" roundTripTest -g600M -P50 "1 --single-thread --long --zstd=wlog=29,clog=28" -$ECHO "\n===> adaptive mode " -roundTripTest -g270000000 " --adapt" - - if [ -n "$hasMT" ] then $ECHO "\n===> zstdmt long round-trip tests "