diff --git a/Makefile b/Makefile index d8e740bd..6e83f33a 100644 --- a/Makefile +++ b/Makefile @@ -120,7 +120,7 @@ asan: clean $(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=address" msan: clean - $(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=memory" # datagen.c used to fail this test for no obvious reason + $(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=memory -fno-omit-frame-pointer" # datagen.c fails this test for no obvious reason asan32: clean $(MAKE) -C $(PRGDIR) test32 CC=clang MOREFLAGS="-g -fsanitize=address" diff --git a/NEWS b/NEWS index 5e46e0a7..20765a3d 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,8 @@ v0.8.1 +Changed : --ultra now enables levels beyond 19 Changed : -i# now selects benchmark time in second Fixed : ZSTD_compress* can now compress > 4 GB in a single pass, reported by Nick Terrell +Fixed : speed regression on specific patterns (#272) v0.8.0 Improved : better speed on clang and gcc -O2, thanks to Eric Biggers diff --git a/images/Dspeed4.png b/images/Dspeed4.png index e75091dd..eba485d0 100644 Binary files a/images/Dspeed4.png and b/images/Dspeed4.png differ diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index a18ab446..3416a979 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -611,18 +611,17 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params, { const BYTE* codePtr = seqStorePtr->llCode; U32 u; for (u=0; usequences; - U32 offset1 = seq[0].offset - 3; - U32 offset2 = seq[1].offset - 3; - if (offset1 >= MAXREPOFFSET) offset1 = 0; - if (offset2 >= MAXREPOFFSET) offset2 = 0; - repOffsets[offset1] += 3; - repOffsets[offset2] += 1; - } - } + if (nbSeq >= 2) { /* rep offsets */ + const seqDef* const seq = seqStorePtr->sequencesStart; + U32 offset1 = seq[0].offset - 3; + U32 offset2 = seq[1].offset - 3; + if (offset1 >= MAXREPOFFSET) offset1 = 0; + if (offset2 >= MAXREPOFFSET) offset2 = 0; + repOffsets[offset1] += 3; + repOffsets[offset2] += 1; + } } } } /* @@ -676,7 +675,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, short matchLengthNCount[MaxML+1]; U32 litLengthCount[MaxLL+1]; short litLengthNCount[MaxLL+1]; - U32 repOffset[MAXREPOFFSET] = { 0 }; + U32 repOffset[MAXREPOFFSET]; offsetCount_t bestRepOffset[ZSTD_REP_NUM+1]; EStats_ress_t esr; ZSTD_parameters params; @@ -701,6 +700,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, for (u=0; u<=offcodeMax; u++) offcodeCount[u]=1; for (u=0; u<=MaxML; u++) matchLengthCount[u]=1; for (u=0; u<=MaxLL; u++) litLengthCount[u]=1; + memset(repOffset, 0, sizeof(repOffset)); repOffset[1] = repOffset[4] = repOffset[8] = 1; memset(bestRepOffset, 0, sizeof(bestRepOffset)); if (compressionLevel==0) compressionLevel=g_compressionLevel_default; diff --git a/programs/playTests.sh b/programs/playTests.sh index 1fc508f9..70ac35ec 100755 --- a/programs/playTests.sh +++ b/programs/playTests.sh @@ -47,7 +47,7 @@ $ECHO "\n**** simple tests **** " $ZSTD -f tmp # trivial compression case, creates tmp.zst $ZSTD -df tmp.zst # trivial decompression case (overwrites tmp) $ECHO "test : too large compression level (must fail)" -$ZSTD -99 tmp && die "too large compression level undetected" +$ZSTD -99 -f tmp # too large compression level, automatic sized down $ECHO "test : compress to stdout" $ZSTD tmp -c > tmpCompressed $ZSTD tmp --stdout > tmpCompressed # long command format diff --git a/programs/zstdcli.c b/programs/zstdcli.c index f4fbba53..faf3bb7b 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -35,6 +35,10 @@ # define ZSTDCLI_CLEVEL_DEFAULT 3 #endif +#ifndef ZSTDCLI_CLEVEL_MAX +# define ZSTDCLI_CLEVEL_MAX 19 +#endif + /*-************************************ * Includes @@ -88,6 +92,8 @@ #define MB *(1 <<20) #define GB *(1U<<30) +#define DEFAULT_DISPLAY_LEVEL 2 + static const char* g_defaultDictName = "dictionary"; static const unsigned g_defaultMaxDictSize = 110 KB; static const int g_defaultDictCLevel = 5; @@ -100,7 +106,7 @@ static const unsigned g_defaultSelectivityLevel = 9; #define DISPLAY(...) fprintf(displayOut, __VA_ARGS__) #define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } static FILE* displayOut; -static unsigned displayLevel = 2; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */ +static unsigned displayLevel = DEFAULT_DISPLAY_LEVEL; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */ /*-************************************ @@ -115,7 +121,7 @@ static int usage(const char* programName) DISPLAY( " with no FILE, or when FILE is - , read standard input\n"); DISPLAY( "Arguments :\n"); #ifndef ZSTD_NOCOMPRESS - DISPLAY( " -# : # compression level (1-%u, default:%u) \n", ZSTD_maxCLevel(), ZSTDCLI_CLEVEL_DEFAULT); + DISPLAY( " -# : # compression level (1-%u, default:%u) \n", ZSTDCLI_CLEVEL_MAX, ZSTDCLI_CLEVEL_DEFAULT); #endif #ifndef ZSTD_NODECOMPRESS DISPLAY( " -d : decompression \n"); @@ -136,14 +142,14 @@ static int usage_advanced(const char* programName) DISPLAY( "\n"); DISPLAY( "Advanced arguments :\n"); DISPLAY( " -V : display Version number and exit\n"); - DISPLAY( " -v : verbose mode\n"); + DISPLAY( " -v : verbose mode; specify multiple times to increase log level (default:%d)\n", DEFAULT_DISPLAY_LEVEL); DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n"); DISPLAY( " -c : force write to standard output, even if it is the console\n"); #ifdef UTIL_HAS_CREATEFILELIST DISPLAY( " -r : operate recursively on directories\n"); #endif #ifndef ZSTD_NOCOMPRESS - DISPLAY( "--ultra : enable ultra modes (requires more memory to decompress)\n"); + DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); DISPLAY( "--no-dictID : don't write dictID into header (dictionary compression)\n"); DISPLAY( "--[no-]check : integrity check (default:enabled)\n"); #endif @@ -215,7 +221,8 @@ int main(int argCount, const char** argv) nextArgumentIsOutFileName=0, nextArgumentIsMaxDict=0, nextArgumentIsDictID=0, - nextArgumentIsFile=0; + nextArgumentIsFile=0, + ultra=0; int cLevel = ZSTDCLI_CLEVEL_DEFAULT; int cLevelLast = 1; unsigned recursive = 0; @@ -268,7 +275,7 @@ int main(int argCount, const char** argv) if (!strcmp(argument, "--verbose")) { displayLevel++; continue; } if (!strcmp(argument, "--quiet")) { displayLevel--; continue; } if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; displayLevel-=(displayLevel==2); continue; } - if (!strcmp(argument, "--ultra")) { FIO_setMaxWLog(0); continue; } + if (!strcmp(argument, "--ultra")) { ultra=1; FIO_setMaxWLog(0); continue; } if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(2); continue; } if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(0); continue; } if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(0); continue; } @@ -298,10 +305,7 @@ int main(int argCount, const char** argv) #ifndef ZSTD_NOCOMPRESS /* compression Level */ if ((*argument>='0') && (*argument<='9')) { - cLevel = readU32FromChar(&argument); - dictCLevel = cLevel; - if (dictCLevel > ZSTD_maxCLevel()) - CLEAN_RETURN(badusage(programName)); + dictCLevel = cLevel = readU32FromChar(&argument); continue; } #endif @@ -317,7 +321,7 @@ int main(int argCount, const char** argv) case 'd': decode=1; argument++; break; /* Force stdout, even if stdout==console */ - case 'c': forceStdout=1; outFileName=stdoutmark; displayLevel-=(displayLevel==2); argument++; break; + case 'c': forceStdout=1; outFileName=stdoutmark; argument++; break; /* Use file content as dictionary */ case 'D': nextEntryIsDictionary = 1; argument++; break; @@ -488,7 +492,14 @@ int main(int argCount, const char** argv) CLEAN_RETURN(filenameIdx); } - /* No warning message in pipe mode (stdin + stdout) or multiple mode */ + /* check compression level limits */ + { int const maxCLevel = ultra ? ZSTD_maxCLevel() : ZSTDCLI_CLEVEL_MAX; + if (cLevel > maxCLevel) { + DISPLAYLEVEL(2, "Warning : compression level higher than max, reduced to %i \n", maxCLevel); + cLevel = maxCLevel; + } } + + /* No warning message in pipe mode (stdin + stdout) or multi-files mode */ if (!strcmp(filenameTable[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1; if ((filenameIdx>1) & (displayLevel==2)) displayLevel=1;