diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 80772592..207fd84b 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -154,7 +154,7 @@ MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pI return (size_t)(pIn - pStart); } - if (MEM_32bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } + if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } if ((pIn 0 */ +void ZSTD_HC_validateParams(ZSTD_HC_parameters* params, size_t srcSize) +{ + const U32 chainplus = (params->strategy == ZSTD_HC_btlazy2); + + /* validate params */ + if (params->windowLog > ZSTD_HC_WINDOWLOG_MAX) params->windowLog = ZSTD_HC_WINDOWLOG_MAX; + if (params->windowLog < ZSTD_HC_WINDOWLOG_MIN) params->windowLog = ZSTD_HC_WINDOWLOG_MIN; + + /* correct params, to use less memory */ + if (srcSize > 0) + { + U32 srcLog = ZSTD_highbit((U32)srcSize-1) + 1; + if (params->windowLog > srcLog) params->windowLog = srcLog; + } + + if (params->chainLog > params->windowLog + chainplus) params->chainLog = params->windowLog+chainplus; /* <= ZSTD_HC_CHAINLOG_MAX */ + if (params->chainLog < ZSTD_HC_CHAINLOG_MIN) params->chainLog = ZSTD_HC_CHAINLOG_MIN; + if (params->hashLog > ZSTD_HC_HASHLOG_MAX) params->hashLog = ZSTD_HC_HASHLOG_MAX; + if (params->hashLog < ZSTD_HC_HASHLOG_MIN) params->hashLog = ZSTD_HC_HASHLOG_MIN; + if (params->searchLog > ZSTD_HC_SEARCHLOG_MAX) params->searchLog = ZSTD_HC_SEARCHLOG_MAX; + if (params->searchLog < ZSTD_HC_SEARCHLOG_MIN) params->searchLog = ZSTD_HC_SEARCHLOG_MIN; + if (params->searchLength> ZSTD_HC_SEARCHLENGTH_MAX) params->searchLength = ZSTD_HC_SEARCHLENGTH_MAX; + if (params->searchLength< ZSTD_HC_SEARCHLENGTH_MIN) params->searchLength = ZSTD_HC_SEARCHLENGTH_MIN; + if ((U32)params->strategy>(U32)ZSTD_HC_btlazy2) params->strategy = ZSTD_HC_btlazy2; + if ((int)params->strategy<(int)ZSTD_HC_greedy) params->strategy = ZSTD_HC_greedy; +} + + static size_t ZSTD_HC_resetCCtx_advanced (ZSTD_HC_CCtx* zc, ZSTD_HC_parameters params) { - /* validate params */ - if (params.windowLog > ZSTD_HC_WINDOWLOG_MAX) params.windowLog = ZSTD_HC_WINDOWLOG_MAX; - if (params.windowLog < ZSTD_HC_WINDOWLOG_MIN) params.windowLog = ZSTD_HC_WINDOWLOG_MIN; - if (params.chainLog > params.windowLog) params.chainLog = params.windowLog; /* <= ZSTD_HC_CHAINLOG_MAX */ - if (params.chainLog < ZSTD_HC_CHAINLOG_MIN) params.chainLog = ZSTD_HC_CHAINLOG_MIN; - if (params.hashLog > ZSTD_HC_HASHLOG_MAX) params.hashLog = ZSTD_HC_HASHLOG_MAX; - if (params.hashLog < ZSTD_HC_HASHLOG_MIN) params.hashLog = ZSTD_HC_HASHLOG_MIN; - if (params.searchLog > ZSTD_HC_SEARCHLOG_MAX) params.searchLog = ZSTD_HC_SEARCHLOG_MAX; - if (params.searchLog < ZSTD_HC_SEARCHLOG_MIN) params.searchLog = ZSTD_HC_SEARCHLOG_MIN; - if (params.searchLength> ZSTD_HC_SEARCHLENGTH_MAX) params.searchLength = ZSTD_HC_SEARCHLENGTH_MAX; - if (params.searchLength< ZSTD_HC_SEARCHLENGTH_MIN) params.searchLength = ZSTD_HC_SEARCHLENGTH_MIN; + ZSTD_HC_validateParams(¶ms, 0); /* reserve table memory */ { @@ -190,8 +212,6 @@ static size_t ZSTD_HC_hashPtr(const void* p, U32 hBits, U32 mls) /* ************************************* * Binary Tree search ***************************************/ -#define BT_SHORTCUT 256 - /** ZSTD_HC_insertBt1 : add one ptr to tree @ip : assumed <= iend-8 */ static void ZSTD_HC_insertBt1(ZSTD_HC_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares) @@ -209,7 +229,7 @@ static void ZSTD_HC_insertBt1(ZSTD_HC_CCtx* zc, const BYTE* const ip, const U32 const U32 btLow = btMask >= current ? 0 : current - btMask; U32* smallerPtr = bt + 2*(current&btMask); U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 dummy32; /* to be nullified at the end */ + U32 dummy32; /* to be nullified at the end */ const U32 windowSize = 1 << zc->params.windowLog; const U32 windowLow = windowSize >= current ? 0 : current - windowSize; @@ -223,30 +243,30 @@ static void ZSTD_HC_insertBt1(ZSTD_HC_CCtx* zc, const BYTE* const ip, const U32 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); - if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ - break; /* just drop , to guarantee consistency (miss a bit of compression; if someone knows better, please tell) */ + if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + break; /* just drop , to guarantee consistency (miss a bit of compression; if someone knows better, please tell) */ if (match[matchLength] < ip[matchLength]) - { + { /* match is smaller than current */ *smallerPtr = matchIndex; /* update smaller idx */ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } + } else - { + { /* match is larger than current */ *largerPtr = matchIndex; commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ largerPtr = nextPtr; matchIndex = nextPtr[0]; - } + } } - *smallerPtr = *largerPtr = 0; + *smallerPtr = *largerPtr = 0; } @@ -273,9 +293,9 @@ size_t ZSTD_HC_insertBtAndFindBestMatch ( U32* smallerPtr = bt + 2*(current&btMask); U32* largerPtr = bt + 2*(current&btMask) + 1; U32 bestLength = 0; - U32 dummy32; /* to be nullified at the end */ + U32 dummy32; /* to be nullified at the end */ - hashTable[h] = (U32)(ip-base); /* Update Hash Table */ + hashTable[h] = (U32)(ip-base); /* Update Hash Table */ while (nbCompares-- && (matchIndex > windowLow)) { @@ -289,34 +309,34 @@ size_t ZSTD_HC_insertBtAndFindBestMatch ( { bestLength = (U32)matchLength; *offsetPtr = current - matchIndex; - if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ - break; /* drop, next to null, to guarantee consistency (is there a way to do better ?) */ + if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + break; /* drop, next to null, to guarantee consistency (is there a way to do better ?) */ } if (match[matchLength] < ip[matchLength]) - { + { /* match is smaller than current */ *smallerPtr = matchIndex; /* update smaller idx */ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } + } else - { + { /* match is larger than current */ *largerPtr = matchIndex; commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ largerPtr = nextPtr; matchIndex = nextPtr[0]; - } + } } - *smallerPtr = *largerPtr = 0; + *smallerPtr = *largerPtr = 0; zc->nextToUpdate = current+1; /* current has been inserted */ - if (bestLength < MINMATCH) return 0; + if (bestLength < MINMATCH) return 0; return bestLength; } @@ -326,7 +346,7 @@ static void ZSTD_HC_updateTree(ZSTD_HC_CCtx* zc, const BYTE* const ip, const BYT const BYTE* const base = zc->base; const U32 target = (U32)(ip - base); U32 idx = zc->nextToUpdate; - //size_t dummy; + //size_t dummy; for( ; idx < target ; idx++) ZSTD_HC_insertBt1(zc, base+idx, mls, iend, nbCompares); @@ -510,7 +530,7 @@ static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U } zc->nextToUpdate = target; - return hashTable[ZSTD_HC_hashPtr(ip, hashLog, mls)]; + return hashTable[ZSTD_HC_hashPtr(ip, hashLog, mls)]; } @@ -891,21 +911,30 @@ size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS } +typedef size_t (*ZSTD_HC_blockCompressor) (ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); + + +static ZSTD_HC_blockCompressor ZSTD_HC_selectBlockCompressor(ZSTD_HC_strategy strat) +{ + switch(strat) + { + default : + case ZSTD_HC_greedy: + return ZSTD_HC_compressBlock_greedy; + case ZSTD_HC_lazy: + return ZSTD_HC_compressBlock_lazy; + case ZSTD_HC_lazydeep: + return ZSTD_HC_compressBlock_lazydeep; + case ZSTD_HC_btlazy2: + return ZSTD_HC_compressBlock_btLazy2; + } +} + + size_t ZSTD_HC_compressBlock(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) { - switch(ctx->params.strategy) - { - case ZSTD_HC_greedy: - return ZSTD_HC_compressBlock_greedy(ctx, dst, maxDstSize, src, srcSize); - case ZSTD_HC_lazy: - return ZSTD_HC_compressBlock_lazy(ctx, dst, maxDstSize, src, srcSize); - case ZSTD_HC_lazydeep: - return ZSTD_HC_compressBlock_lazydeep(ctx, dst, maxDstSize, src, srcSize); - case ZSTD_HC_btlazy2: - return ZSTD_HC_compressBlock_btLazy2(ctx, dst, maxDstSize, src, srcSize); - default : - return ERROR(GENERIC); /* unknown block compressor */ - } + ZSTD_HC_blockCompressor blockCompressor = ZSTD_HC_selectBlockCompressor(ctx->params.strategy); + return blockCompressor(ctx, dst, maxDstSize, src, srcSize); } @@ -919,25 +948,8 @@ static size_t ZSTD_HC_compress_generic (ZSTD_HC_CCtx* ctxPtr, BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; BYTE* const oend = op + maxDstSize; - size_t (*blockCompressor) (ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); + const ZSTD_HC_blockCompressor blockCompressor = ZSTD_HC_selectBlockCompressor(ctxPtr->params.strategy); - switch(ctxPtr->params.strategy) - { - case ZSTD_HC_greedy: - blockCompressor = ZSTD_HC_compressBlock_greedy; - break; - case ZSTD_HC_lazy: - blockCompressor = ZSTD_HC_compressBlock_lazy; - break; - case ZSTD_HC_lazydeep: - blockCompressor = ZSTD_HC_compressBlock_lazydeep; - break; - case ZSTD_HC_btlazy2: - blockCompressor = ZSTD_HC_compressBlock_btLazy2; - break; - default : - return ERROR(GENERIC); /* unknown block compressor */ - } while (remaining > blockSize) { diff --git a/lib/zstdhc_static.h b/lib/zstdhc_static.h index 36e0781f..06174fcd 100644 --- a/lib/zstdhc_static.h +++ b/lib/zstdhc_static.h @@ -59,7 +59,7 @@ typedef struct /* parameters boundaries */ #define ZSTD_HC_WINDOWLOG_MAX 26 #define ZSTD_HC_WINDOWLOG_MIN 18 -#define ZSTD_HC_CHAINLOG_MAX ZSTD_HC_WINDOWLOG_MAX +#define ZSTD_HC_CHAINLOG_MAX (ZSTD_HC_WINDOWLOG_MAX+1) #define ZSTD_HC_CHAINLOG_MIN 4 #define ZSTD_HC_HASHLOG_MAX 28 #define ZSTD_HC_HASHLOG_MIN 4 @@ -79,6 +79,11 @@ size_t ZSTD_HC_compress_advanced (ZSTD_HC_CCtx* ctx, const void* src, size_t srcSize, ZSTD_HC_parameters params); +/** ZSTD_HC_validateParams + correct params value to remain within authorized range + optimize for srcSize if srcSize > 0 */ +void ZSTD_HC_validateParams(ZSTD_HC_parameters* params, size_t srcSize); + /* ************************************* * Streaming functions @@ -97,33 +102,32 @@ static const ZSTD_HC_parameters ZSTD_HC_defaultParameters[ZSTD_HC_MAX_CLEVEL+1] { 18, 12, 14, 1, 4, ZSTD_HC_greedy }, /* level 0 - never used */ { 18, 12, 14, 1, 4, ZSTD_HC_greedy }, /* level 1 - in fact redirected towards zstd fast */ { 18, 12, 15, 2, 4, ZSTD_HC_greedy }, /* level 2 */ - { 19, 13, 17, 3, 5, ZSTD_HC_greedy }, /* level 3 */ - { 20, 18, 19, 2, 5, ZSTD_HC_greedy }, /* level 4 */ + { 19, 14, 18, 2, 5, ZSTD_HC_greedy }, /* level 3 */ + { 20, 17, 19, 3, 5, ZSTD_HC_greedy }, /* level 4 */ { 20, 18, 19, 2, 5, ZSTD_HC_lazy }, /* level 5 */ - { 20, 18, 19, 2, 5, ZSTD_HC_lazydeep }, //{ 20, 18, 20, 3, 5, ZSTD_HC_lazy }, /* level 6 */ - { 20, 18, 19, 2, 5, ZSTD_HC_btlazy2 }, //{ 20, 18, 20, 4, 5, ZSTD_HC_lazy }, /* level 7 */ + { 21, 18, 20, 3, 5, ZSTD_HC_lazy }, /* level 6 */ + { 21, 18, 20, 4, 5, ZSTD_HC_lazy }, /* level 7 */ { 21, 19, 20, 4, 5, ZSTD_HC_lazy }, /* level 8 */ { 21, 19, 20, 5, 5, ZSTD_HC_lazy }, /* level 9 */ { 21, 20, 20, 5, 5, ZSTD_HC_lazy }, /* level 10 */ { 21, 20, 20, 5, 5, ZSTD_HC_lazydeep }, /* level 11 */ - { 21, 20, 20, 5, 5, ZSTD_HC_btlazy2 }, //{ 22, 20, 22, 5, 5, ZSTD_HC_lazydeep }, /* level 12 */ + { 22, 20, 22, 5, 5, ZSTD_HC_lazydeep }, /* level 12 */ { 22, 20, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 13 */ { 21, 21, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 14 */ { 22, 21, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 15 */ - { 22, 21, 23, 7, 5, ZSTD_HC_lazydeep }, /* level 16 */ - { 23, 21, 23, 7, 5, ZSTD_HC_lazydeep }, /* level 17 */ - { 23, 22, 23, 7, 5, ZSTD_HC_lazydeep }, /* level 18 */ - { 23, 22, 23, 7, 5, ZSTD_HC_lazydeep }, /* level 19 */ - { 23, 22, 23, 8, 5, ZSTD_HC_lazydeep }, /* level 20 */ - { 23, 22, 23, 8, 5, ZSTD_HC_lazydeep }, /* level 21 */ - { 23, 23, 24, 8, 5, ZSTD_HC_lazydeep }, /* level 22 */ - { 24, 24, 24, 8, 5, ZSTD_HC_lazydeep }, /* level 23 */ - { 23, 23, 23, 9, 5, ZSTD_HC_lazydeep }, /* level 24 */ - { 24, 23, 23, 9, 5, ZSTD_HC_lazydeep }, /* level 25 */ - { 24, 24, 24, 9, 5, ZSTD_HC_lazydeep }, /* level 26 */ + { 22, 21, 22, 5, 5, ZSTD_HC_btlazy2 }, /* level 16 */ + { 22, 22, 23, 5, 5, ZSTD_HC_btlazy2 }, /* level 17 */ + { 22, 22, 23, 7, 5, ZSTD_HC_btlazy2 }, /* level 18 */ + { 24, 24, 22, 7, 5, ZSTD_HC_btlazy2 }, /* level 19 */ + { 25, 25, 23, 8, 5, ZSTD_HC_btlazy2 }, /* level 20 */ + { 25, 25, 23, 8, 5, ZSTD_HC_btlazy2 }, /* level 21 */ + { 25, 25, 23, 8, 5, ZSTD_HC_btlazy2 }, /* level 22 */ + { 25, 25, 23, 8, 5, ZSTD_HC_btlazy2 }, /* level 23 */ + { 25, 25, 23, 8, 5, ZSTD_HC_btlazy2 }, /* level 24 */ + { 25, 25, 23, 8, 5, ZSTD_HC_btlazy2 }, /* level 25 */ + { 25, 25, 24, 9, 5, ZSTD_HC_btlazy2 }, /* level 26 */ }; - #if defined (__cplusplus) } #endif diff --git a/programs/paramgrill.c b/programs/paramgrill.c index 78f2b195..f7cfef98 100644 --- a/programs/paramgrill.c +++ b/programs/paramgrill.c @@ -348,7 +348,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr, if (totalTime > g_maxParamTime) break; /* Compression */ - DISPLAY("%1u-%s : %9u ->\r", loopNb, name, (U32)srcSize); + DISPLAY("\r%1u-%s : %9u ->", loopNb, name, (U32)srcSize); memset(compressedBuffer, 0xE5, maxCompressedSize); nbLoops = 0; @@ -371,8 +371,9 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr, cSize += blockTable[blockNb].cSize; if ((double)milliTime < fastestC*nbLoops) fastestC = (double)milliTime / nbLoops; ratio = (double)srcSize / (double)cSize; + DISPLAY("\r"); DISPLAY("%1u-%s : %9u ->", loopNb, name, (U32)srcSize); - DISPLAY(" %9u (%4.3f),%7.1f MB/s\r", (U32)cSize, ratio, (double)srcSize / fastestC / 1000.); + DISPLAY(" %9u (%4.3f),%7.1f MB/s", (U32)cSize, ratio, (double)srcSize / fastestC / 1000.); resultPtr->cSize = cSize; resultPtr->cSpeed = (U32)((double)srcSize / fastestC); @@ -393,9 +394,10 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr, milliTime = BMK_GetMilliSpan(milliTime); if ((double)milliTime < fastestD*nbLoops) fastestD = (double)milliTime / nbLoops; + DISPLAY("\r"); DISPLAY("%1u-%s : %9u -> ", loopNb, name, (U32)srcSize); DISPLAY("%9u (%4.3f),%7.1f MB/s, ", (U32)cSize, ratio, (double)srcSize / fastestC / 1000.); - DISPLAY("%7.1f MB/s\r", (double)srcSize / fastestD / 1000.); + DISPLAY("%7.1f MB/s", (double)srcSize / fastestD / 1000.); resultPtr->dSpeed = (U32)((double)srcSize / fastestD); /* CRC Checking */ @@ -420,11 +422,13 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr, } /* End cleaning */ + DISPLAY("\r"); free(compressedBuffer); free(resultBuffer); return 0; } + const char* g_stratName[] = { "ZSTD_HC_greedy ", "ZSTD_HC_lazy ", "ZSTD_HC_lazydeep", "ZSTD_HC_btlazy2 " }; static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_HC_parameters params, size_t srcSize) @@ -592,7 +596,6 @@ static void playAround(FILE* f, winnerInfo_t* winners, const void* srcBuffer, size_t srcSize, ZSTD_HC_CCtx* ctx) { - const U32 srcLog = BMK_highbit((U32)( (g_blockSize ? g_blockSize : srcSize) -1))+1; int nbVariations = 0; const int startTime = BMK_GetMilliStart(); @@ -635,19 +638,11 @@ static void playAround(FILE* f, winnerInfo_t* winners, } /* validate new conf */ - if (p.windowLog > srcLog) continue; - if (p.windowLog > ZSTD_HC_WINDOWLOG_MAX) continue; - if (p.windowLog < MAX(ZSTD_HC_WINDOWLOG_MIN, p.chainLog)) continue; - if (p.chainLog > p.windowLog) continue; - if (p.chainLog < ZSTD_HC_CHAINLOG_MIN) continue; - if (p.hashLog > ZSTD_HC_HASHLOG_MAX) continue; - if (p.hashLog < ZSTD_HC_HASHLOG_MIN) continue; - if (p.searchLog > p.chainLog) continue; - if (p.searchLog < ZSTD_HC_SEARCHLOG_MIN) continue; - if (p.searchLength > ZSTD_HC_SEARCHLENGTH_MAX) continue; - if (p.searchLength < ZSTD_HC_SEARCHLENGTH_MIN) continue; - if (p.strategy < ZSTD_HC_greedy) continue; - if (p.strategy > ZSTD_HC_btlazy2) continue; + { + ZSTD_HC_parameters saved = p; + ZSTD_HC_validateParams(&p, g_blockSize ? g_blockSize : srcSize); + if (memcmp(&p, &saved, sizeof(p))) continue; /* p was invalid */ + } /* exclude faster if already played params */ if (FUZ_rand(&g_rand) & ((1 << NB_TESTS_PLAYED(p))-1))