diff --git a/build/VS2008/fullbench/fullbench.vcproj b/build/VS2008/fullbench/fullbench.vcproj index 05ec5ca0..715ea257 100644 --- a/build/VS2008/fullbench/fullbench.vcproj +++ b/build/VS2008/fullbench/fullbench.vcproj @@ -403,6 +403,10 @@ + + + + diff --git a/build/VS2008/fuzzer/fuzzer.vcproj b/build/VS2008/fuzzer/fuzzer.vcproj index 700dd7eb..1421619a 100644 --- a/build/VS2008/fuzzer/fuzzer.vcproj +++ b/build/VS2008/fuzzer/fuzzer.vcproj @@ -415,6 +415,10 @@ + + + + diff --git a/build/VS2008/zstd/zstd.vcproj b/build/VS2008/zstd/zstd.vcproj index 86dd3a25..dbd211c0 100644 --- a/build/VS2008/zstd/zstd.vcproj +++ b/build/VS2008/zstd/zstd.vcproj @@ -459,6 +459,10 @@ + + + + diff --git a/build/VS2008/zstdlib/zstdlib.vcproj b/build/VS2008/zstdlib/zstdlib.vcproj index ac8f896c..340a4cd8 100644 --- a/build/VS2008/zstdlib/zstdlib.vcproj +++ b/build/VS2008/zstdlib/zstdlib.vcproj @@ -403,6 +403,10 @@ + + + + + @@ -189,6 +190,7 @@ + diff --git a/build/VS2010/fuzzer/fuzzer.vcxproj b/build/VS2010/fuzzer/fuzzer.vcxproj index 9f00899d..6fe32720 100644 --- a/build/VS2010/fuzzer/fuzzer.vcxproj +++ b/build/VS2010/fuzzer/fuzzer.vcxproj @@ -169,6 +169,7 @@ + @@ -192,6 +193,7 @@ + diff --git a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj index 0a4be69d..2d04c693 100644 --- a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj +++ b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj @@ -33,6 +33,7 @@ + @@ -76,6 +77,7 @@ + diff --git a/build/VS2010/libzstd/libzstd.vcxproj b/build/VS2010/libzstd/libzstd.vcxproj index 51b84067..c01a5d17 100644 --- a/build/VS2010/libzstd/libzstd.vcxproj +++ b/build/VS2010/libzstd/libzstd.vcxproj @@ -33,6 +33,7 @@ + @@ -76,6 +77,7 @@ + diff --git a/build/VS2010/zstd/zstd.vcxproj b/build/VS2010/zstd/zstd.vcxproj index 90470180..ace34346 100644 --- a/build/VS2010/zstd/zstd.vcxproj +++ b/build/VS2010/zstd/zstd.vcxproj @@ -34,6 +34,7 @@ + @@ -69,6 +70,7 @@ + diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index f4b7e375..f5d2eff9 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -42,6 +42,7 @@ SET(Sources ${LIBRARY_DIR}/compress/zstd_double_fast.c ${LIBRARY_DIR}/compress/zstd_lazy.c ${LIBRARY_DIR}/compress/zstd_opt.c + ${LIBRARY_DIR}/compress/zstd_ldm.c ${LIBRARY_DIR}/decompress/huf_decompress.c ${LIBRARY_DIR}/decompress/zstd_decompress.c ${LIBRARY_DIR}/dictBuilder/cover.c @@ -67,6 +68,7 @@ SET(Headers ${LIBRARY_DIR}/compress/zstd_double_fast.h ${LIBRARY_DIR}/compress/zstd_lazy.h ${LIBRARY_DIR}/compress/zstd_opt.h + ${LIBRARY_DIR}/compress/zstd_ldm.h ${LIBRARY_DIR}/compress/zstdmt_compress.h ${LIBRARY_DIR}/dictBuilder/zdict.h ${LIBRARY_DIR}/deprecated/zbuff.h) diff --git a/doc/images/ldmCspeed.png b/doc/images/ldmCspeed.png new file mode 100644 index 00000000..d3bfce4c Binary files /dev/null and b/doc/images/ldmCspeed.png differ diff --git a/doc/images/ldmDspeed.png b/doc/images/ldmDspeed.png new file mode 100644 index 00000000..d5445f01 Binary files /dev/null and b/doc/images/ldmDspeed.png differ diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 1e20619a..cd0dbcc2 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -249,6 +249,26 @@ typedef struct { const BYTE* cachedLiterals; } optState_t; +typedef struct { + U32 offset; + U32 checksum; +} ldmEntry_t; + +typedef struct { + ldmEntry_t* hashTable; + BYTE* bucketOffsets; /* Next position in bucket to insert entry */ + U64 hashPower; /* Used to compute the rolling hash. + * Depends on ldmParams.minMatchLength */ +} ldmState_t; + +typedef struct { + U32 enableLdm; /* 1 if enable long distance matching */ + U32 hashLog; /* Log size of hashTable */ + U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ + U32 minMatchLength; /* Minimum match length */ + U32 hashEveryLog; /* Log number of entries to skip */ +} ldmParams_t; + typedef struct { U32 hufCTable[HUF_CTABLE_SIZE_U32(255)]; FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; @@ -274,6 +294,9 @@ struct ZSTD_CCtx_params_s { unsigned jobSize; unsigned overlapSizeLog; + /* Long distance matching parameters */ + ldmParams_t ldmParams; + /* For use with createCCtxParams() and freeCCtxParams() only */ ZSTD_customMem customMem; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 884a4e00..9f59ea68 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -31,7 +31,7 @@ #include "zstd_double_fast.h" #include "zstd_lazy.h" #include "zstd_opt.h" - +#include "zstd_ldm.h" /*-************************************* @@ -65,7 +65,6 @@ struct ZSTD_CDict_s { ZSTD_CCtx* refContext; }; /* typedef'd to ZSTD_CDict within "zstd.h" */ - ZSTD_CCtx* ZSTD_createCCtx(void) { return ZSTD_createCCtx_advanced(ZSTD_defaultCMem); @@ -301,6 +300,24 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v DEBUGLOG(5, " setting overlap with nbThreads == %u", cctx->requestedParams.nbThreads); return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + case ZSTD_p_enableLongDistanceMatching: + if (cctx->cdict) return ERROR(stage_wrong); + if (value != 0) { + ZSTD_cLevelToCParams(cctx); + } + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_ldmHashLog: + case ZSTD_p_ldmMinMatch: + if (value == 0) return 0; /* special value : 0 means "don't change anything" */ + if (cctx->cdict) return ERROR(stage_wrong); + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + + case ZSTD_p_ldmBucketSizeLog: + case ZSTD_p_ldmHashEveryLog: + if (cctx->cdict) return ERROR(stage_wrong); + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + default: return ERROR(parameter_unsupported); } } @@ -410,6 +427,39 @@ size_t ZSTD_CCtxParam_setParameter( return ZSTDMT_CCtxParam_setMTCtxParameter(params, ZSTDMT_p_overlapSectionLog, value); #endif + case ZSTD_p_enableLongDistanceMatching : + if (value != 0) { + ZSTD_cLevelToCCtxParams(params); + params->cParams.windowLog = ZSTD_LDM_WINDOW_LOG; + } + return ZSTD_ldm_initializeParameters(¶ms->ldmParams, value); + + case ZSTD_p_ldmHashLog : + if (value == 0) return 0; + CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + params->ldmParams.hashLog = value; + return 0; + + case ZSTD_p_ldmMinMatch : + if (value == 0) return 0; + CLAMPCHECK(value, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX); + params->ldmParams.minMatchLength = value; + return 0; + + case ZSTD_p_ldmBucketSizeLog : + if (value > ZSTD_LDM_BUCKETSIZELOG_MAX) { + return ERROR(parameter_outOfBound); + } + params->ldmParams.bucketSizeLog = value; + return 0; + + case ZSTD_p_ldmHashEveryLog : + if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) { + return ERROR(parameter_outOfBound); + } + params->ldmParams.hashEveryLog = value; + return 0; + default: return ERROR(parameter_unsupported); } } @@ -445,6 +495,9 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams( cctx, ZSTD_p_overlapSizeLog, params->overlapSizeLog) ); } + /* Copy long distance matching parameters */ + cctx->requestedParams.ldmParams = params->ldmParams; + /* customMem is used only for create/free params and can be ignored */ return 0; } @@ -635,7 +688,13 @@ size_t ZSTD_estimateCCtxSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* pa ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<ldmParams.enableLdm ? + ZSTD_ldm_getTableSize(params->ldmParams.hashLog, + params->ldmParams.bucketSizeLog) : 0; + + size_t const neededSpace = entropySpace + tableSpace + tokenSpace + + optSpace + ldmSpace; DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx)); DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace); @@ -691,11 +750,25 @@ static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1, & ((cParams1.searchLength==3) == (cParams2.searchLength==3)); /* hashlog3 space */ } +/** The parameters are equivalent if ldm is not enabled in both sets or + * all the parameters are equivalent. */ +static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1, + ldmParams_t ldmParams2) +{ + return (!ldmParams1.enableLdm && !ldmParams2.enableLdm) || + (ldmParams1.enableLdm == ldmParams2.enableLdm && + ldmParams1.hashLog == ldmParams2.hashLog && + ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog && + ldmParams1.minMatchLength == ldmParams2.minMatchLength && + ldmParams1.hashEveryLog == ldmParams2.hashEveryLog); +} + /** Equivalence for resetCCtx purposes */ static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1, ZSTD_CCtx_params params2) { - return ZSTD_equivalentCParams(params1.cParams, params2.cParams); + return ZSTD_equivalentCParams(params1.cParams, params2.cParams) && + ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams); } /*! ZSTD_continueCCtx() : @@ -738,6 +811,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, if (crp == ZSTDcrp_continue) { if (ZSTD_equivalentParams(params, zc->appliedParams)) { DEBUGLOG(5, "ZSTD_equivalentParams()==1"); + assert(!(params.ldmParams.enableLdm && + params.ldmParams.hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET)); zc->entropy->hufCTable_repeatMode = HUF_repeat_none; zc->entropy->offcode_repeatMode = FSE_repeat_none; zc->entropy->matchlength_repeatMode = FSE_repeat_none; @@ -745,6 +820,15 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, return ZSTD_continueCCtx(zc, params, pledgedSrcSize); } } + if (params.ldmParams.enableLdm) { + /* Adjust long distance matching parameters */ + ZSTD_ldm_adjustParameters(¶ms.ldmParams, params.cParams.windowLog); + assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); + assert(params.ldmParams.hashEveryLog < 32); + zc->ldmState.hashPower = + ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength); + } + { size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.cParams.windowLog); U32 const divider = (params.cParams.searchLength==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; @@ -768,10 +852,13 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, || (params.cParams.strategy == ZSTD_btultra)) ? optPotentialSpace : 0; size_t const bufferSpace = buffInSize + buffOutSize; - size_t const neededSpace = entropySpace + optSpace + tableSpace - + tokenSpace + bufferSpace; + size_t const ldmSpace = params.ldmParams.enableLdm + ? ZSTD_ldm_getTableSize(params.ldmParams.hashLog, params.ldmParams.bucketSizeLog) + : 0; + size_t const neededSpace = entropySpace + optSpace + ldmSpace + + tableSpace + tokenSpace + bufferSpace; - if (zc->workSpaceSize < neededSpace) { /* too small : resize /*/ + if (zc->workSpaceSize < neededSpace) { /* too small : resize */ DEBUGLOG(5, "Need to update workSpaceSize from %uK to %uK \n", (unsigned)zc->workSpaceSize>>10, (unsigned)neededSpace>>10); @@ -836,6 +923,16 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ptr = zc->optState.priceTable + ZSTD_OPT_NUM+1; } + /* ldm hash table */ + /* initialize bucketOffsets table later for pointer alignment */ + if (params.ldmParams.enableLdm) { + size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; + memset(ptr, 0, ldmHSize * sizeof(ldmEntry_t)); + assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ + zc->ldmState.hashTable = (ldmEntry_t*)ptr; + ptr = zc->ldmState.hashTable + ldmHSize; + } + /* table Space */ if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */ assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ @@ -853,6 +950,16 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq; ptr = zc->seqStore.litStart + blockSize; + /* ldm bucketOffsets table */ + if (params.ldmParams.enableLdm) { + size_t const ldmBucketSize = + ((size_t)1) << (params.ldmParams.hashLog - + params.ldmParams.bucketSizeLog); + memset(ptr, 0, ldmBucketSize); + zc->ldmState.bucketOffsets = (BYTE*)ptr; + ptr = zc->ldmState.bucketOffsets + ldmBucketSize; + } + /* buffers */ zc->inBuffSize = buffInSize; zc->inBuff = (char*)ptr; @@ -952,18 +1059,36 @@ static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reduce } } +/*! ZSTD_ldm_reduceTable() : + * reduce table indexes by `reducerValue` */ +static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, + U32 const reducerValue) +{ + U32 u; + for (u = 0; u < size; u++) { + if (table[u].offset < reducerValue) table[u].offset = 0; + else table[u].offset -= reducerValue; + } +} + /*! ZSTD_reduceIndex() : * rescale all indexes to avoid future overflow (indexes are U32) */ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) { - { U32 const hSize = 1 << zc->appliedParams.cParams.hashLog; + { U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog; ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); } - { U32 const chainSize = (zc->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->appliedParams.cParams.chainLog); + { U32 const chainSize = (zc->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((U32)1 << zc->appliedParams.cParams.chainLog); ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); } - { U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; + { U32 const h3Size = (zc->hashLog3) ? (U32)1 << zc->hashLog3 : 0; ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); } + + { if (zc->appliedParams.ldmParams.enableLdm) { + U32 const ldmHSize = (U32)1 << zc->appliedParams.ldmParams.hashLog; + ZSTD_ldm_reduceTable(zc->ldmState.hashTable, ldmHSize, reducerValue); + } + } } @@ -1387,11 +1512,11 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, return cSize; } - /* ZSTD_selectBlockCompressor() : + * Not static, but internal use only (used by long distance matcher) * assumption : strat is a valid strategy */ -typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize); -static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) +typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize); +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) { static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = { { ZSTD_compressBlock_fast /* default for 0 */, @@ -1410,18 +1535,37 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int return blockCompressor[extDict!=0][(U32)strat]; } +static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, + const BYTE* anchor, size_t lastLLSize) +{ + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; +} static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, zc->lowLimit < zc->dictLimit); const BYTE* const base = zc->base; const BYTE* const istart = (const BYTE*)src; const U32 current = (U32)(istart-base); + size_t lastLLSize; + const BYTE* anchor; + U32 const extDict = zc->lowLimit < zc->dictLimit; + const ZSTD_blockCompressor blockCompressor = + zc->appliedParams.ldmParams.enableLdm + ? (extDict ? ZSTD_compressBlock_ldm_extDict : ZSTD_compressBlock_ldm) + : ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict); + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */ ZSTD_resetSeqStore(&(zc->seqStore)); if (current > zc->nextToUpdate + 384) zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); /* limited update after finding a very long match */ - blockCompressor(zc, src, srcSize); + + lastLLSize = blockCompressor(zc, src, srcSize); + + /* Last literals */ + anchor = (const BYTE*)src + srcSize - lastLLSize; + ZSTD_storeLastLiterals(&zc->seqStore, anchor, lastLLSize); + return ZSTD_compressSequences(&zc->seqStore, zc->entropy, &zc->appliedParams.cParams, dst, dstCapacity, srcSize); } @@ -1645,7 +1789,6 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t case ZSTD_fast: ZSTD_fillHashTable (zc, iend, zc->appliedParams.cParams.searchLength); break; - case ZSTD_dfast: ZSTD_fillDoubleHashTable (zc, iend, zc->appliedParams.cParams.searchLength); break; @@ -2578,7 +2721,6 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbThreads, cctx->customMem); if (cctx->mtctx == NULL) return ERROR(memory_allocation); } - DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbThreads=%u", params.nbThreads); CHECK_F( ZSTDMT_initCStream_internal( cctx->mtctx, @@ -2607,7 +2749,6 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, return flushMin; } #endif - CHECK_F( ZSTD_compressStream_generic(cctx, output, input, endOp) ); DEBUGLOG(5, "completed ZSTD_compress_generic"); return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ diff --git a/lib/compress/zstd_compress.h b/lib/compress/zstd_compress.h index d8813782..94606edc 100644 --- a/lib/compress/zstd_compress.h +++ b/lib/compress/zstd_compress.h @@ -68,6 +68,7 @@ struct ZSTD_CCtx_s { seqStore_t seqStore; /* sequences storage ptrs */ optState_t optState; + ldmState_t ldmState; /* long distance matching state */ U32* hashTable; U32* hashTable3; U32* chainTable; diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 62368736..876a3604 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -31,7 +31,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_CCtx* cctx, const void* end, const U32 mls) FORCE_INLINE_TEMPLATE -void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, +size_t ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, const void* src, size_t srcSize, const U32 mls) { @@ -138,33 +138,30 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + /* Return the last literals size */ + return iend - anchor; } -void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { const U32 mls = ctx->appliedParams.cParams.searchLength; switch(mls) { default: /* includes case 3 */ case 4 : - ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return; + return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); case 5 : - ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return; + return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); case 6 : - ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return; + return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); case 7 : - ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return; + return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); } } -static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx, +static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const U32 mls) { @@ -287,15 +284,12 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx, /* save reps for next block */ seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + /* Return the last literals size */ + return iend - anchor; } -void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, +size_t ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { U32 const mls = ctx->appliedParams.cParams.searchLength; @@ -303,12 +297,12 @@ void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, { default: /* includes case 3 */ case 4 : - ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return; + return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); case 5 : - ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return; + return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); case 6 : - ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return; + return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); case 7 : - ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return; + return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); } } diff --git a/lib/compress/zstd_double_fast.h b/lib/compress/zstd_double_fast.h index 1b7db5b3..3dba6c71 100644 --- a/lib/compress/zstd_double_fast.h +++ b/lib/compress/zstd_double_fast.h @@ -18,8 +18,8 @@ extern "C" { #endif void ZSTD_fillDoubleHashTable(ZSTD_CCtx* cctx, const void* end, const U32 mls); -void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize); -void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); #if defined (__cplusplus) } diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 22f2c10e..2e057017 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -28,9 +28,9 @@ void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) FORCE_INLINE_TEMPLATE -void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, - const void* src, size_t srcSize, - const U32 mls) +size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, + const void* src, size_t srcSize, + const U32 mls) { U32* const hashTable = cctx->hashTable; U32 const hBits = cctx->appliedParams.cParams.hashLog; @@ -107,15 +107,12 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + /* Return the last literals size */ + return iend - anchor; } -void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, +size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { const U32 mls = ctx->appliedParams.cParams.searchLength; @@ -123,18 +120,18 @@ void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, { default: /* includes case 3 */ case 4 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return; + return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); case 5 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return; + return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); case 6 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return; + return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); case 7 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return; + return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); } } -static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, +static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const U32 mls) { @@ -221,15 +218,12 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, /* save reps for next block */ seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + /* Return the last literals size */ + return iend - anchor; } -void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, +size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { U32 const mls = ctx->appliedParams.cParams.searchLength; @@ -237,12 +231,12 @@ void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, { default: /* includes case 3 */ case 4 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return; + return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); case 5 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return; + return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); case 6 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return; + return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); case 7 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return; + return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); } } diff --git a/lib/compress/zstd_fast.h b/lib/compress/zstd_fast.h index e15a4ce6..4205141a 100644 --- a/lib/compress/zstd_fast.h +++ b/lib/compress/zstd_fast.h @@ -18,9 +18,9 @@ extern "C" { #endif void ZSTD_fillHashTable(ZSTD_CCtx* zc, const void* end, const U32 mls); -void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, - const void* src, size_t srcSize); -void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, +size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, + const void* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); #if defined (__cplusplus) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index d12619ac..b4fec514 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -398,9 +398,9 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( * Common parser - lazy strategy *********************************/ FORCE_INLINE_TEMPLATE -void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, - const U32 searchMethod, const U32 depth) +size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; @@ -530,37 +530,34 @@ _storeSequence: seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : savedOffset; seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : savedOffset; - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + /* Return the last literals size */ + return iend - anchor; } -void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); + return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); } -void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); + return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); } -void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); + return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); } -void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); + return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); } FORCE_INLINE_TEMPLATE -void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, +size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const U32 searchMethod, const U32 depth) { @@ -724,30 +721,27 @@ _storeSequence: /* Save reps for next block */ seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + /* Return the last literals size */ + return iend - anchor; } -void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); + return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); } -void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); + return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); } -void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); + return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); } -void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); + return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); } diff --git a/lib/compress/zstd_lazy.h b/lib/compress/zstd_lazy.h index b83c475b..a9c4daed 100644 --- a/lib/compress/zstd_lazy.h +++ b/lib/compress/zstd_lazy.h @@ -21,15 +21,15 @@ U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls); void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls); void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls); -void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize); -void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize); -void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize); -void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize); -void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); -void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); -void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); -void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); #if defined (__cplusplus) } diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c new file mode 100644 index 00000000..e7efecdb --- /dev/null +++ b/lib/compress/zstd_ldm.c @@ -0,0 +1,703 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#include "zstd_ldm.h" + +#include "zstd_fast.h" /* ZSTD_fillHashTable() */ +#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ + +#define LDM_BUCKET_SIZE_LOG 3 +#define LDM_MIN_MATCH_LENGTH 64 +#define LDM_HASH_LOG 20 +#define LDM_HASH_CHAR_OFFSET 10 + +size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm) +{ + ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); + params->enableLdm = enableLdm>0; + params->hashLog = LDM_HASH_LOG; + params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; + params->minMatchLength = LDM_MIN_MATCH_LENGTH; + params->hashEveryLog = ZSTD_LDM_HASHEVERYLOG_NOTSET; + return 0; +} + +void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog) +{ + if (params->hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET) { + params->hashEveryLog = + windowLog < params->hashLog ? 0 : windowLog - params->hashLog; + } + params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); +} + +size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog) { + size_t const ldmHSize = ((size_t)1) << hashLog; + size_t const ldmBucketSizeLog = MIN(bucketSizeLog, hashLog); + size_t const ldmBucketSize = + ((size_t)1) << (hashLog - ldmBucketSizeLog); + return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t))); +} + +/** ZSTD_ldm_getSmallHash() : + * numBits should be <= 32 + * If numBits==0, returns 0. + * @return : the most significant numBits of value. */ +static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits) +{ + assert(numBits <= 32); + return numBits == 0 ? 0 : (U32)(value >> (64 - numBits)); +} + +/** ZSTD_ldm_getChecksum() : + * numBitsToDiscard should be <= 32 + * @return : the next most significant 32 bits after numBitsToDiscard */ +static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard) +{ + assert(numBitsToDiscard <= 32); + return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF; +} + +/** ZSTD_ldm_getTag() ; + * Given the hash, returns the most significant numTagBits bits + * after (32 + hbits) bits. + * + * If there are not enough bits remaining, return the last + * numTagBits bits. */ +static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits) +{ + assert(numTagBits < 32 && hbits <= 32); + if (32 - hbits < numTagBits) { + return hash & (((U32)1 << numTagBits) - 1); + } else { + return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1); + } +} + +/** ZSTD_ldm_getBucket() : + * Returns a pointer to the start of the bucket associated with hash. */ +static ldmEntry_t* ZSTD_ldm_getBucket( + ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) +{ + return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); +} + +/** ZSTD_ldm_insertEntry() : + * Insert the entry with corresponding hash into the hash table */ +static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, + size_t const hash, const ldmEntry_t entry, + ldmParams_t const ldmParams) +{ + BYTE* const bucketOffsets = ldmState->bucketOffsets; + *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry; + bucketOffsets[hash]++; + bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1; +} + +/** ZSTD_ldm_makeEntryAndInsertByTag() : + * + * Gets the small hash, checksum, and tag from the rollingHash. + * + * If the tag matches (1 << ldmParams.hashEveryLog)-1, then + * creates an ldmEntry from the offset, and inserts it into the hash table. + * + * hBits is the length of the small hash, which is the most significant hBits + * of rollingHash. The checksum is the next 32 most significant bits, followed + * by ldmParams.hashEveryLog bits that make up the tag. */ +static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, + U64 const rollingHash, + U32 const hBits, + U32 const offset, + ldmParams_t const ldmParams) +{ + U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog); + U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1; + if (tag == tagMask) { + U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); + U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); + ldmEntry_t entry; + entry.offset = offset; + entry.checksum = checksum; + ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams); + } +} + +/** ZSTD_ldm_getRollingHash() : + * Get a 64-bit hash using the first len bytes from buf. + * + * Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be + * H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0) + * + * where the constant a is defined to be prime8bytes. + * + * The implementation adds an offset to each byte, so + * H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */ +static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len) +{ + U64 ret = 0; + U32 i; + for (i = 0; i < len; i++) { + ret *= prime8bytes; + ret += buf[i] + LDM_HASH_CHAR_OFFSET; + } + return ret; +} + +/** ZSTD_ldm_ipow() : + * Return base^exp. */ +static U64 ZSTD_ldm_ipow(U64 base, U64 exp) +{ + U64 ret = 1; + while (exp) { + if (exp & 1) { ret *= base; } + exp >>= 1; + base *= base; + } + return ret; +} + +U64 ZSTD_ldm_getHashPower(U32 minMatchLength) { + assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN); + return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1); +} + +/** ZSTD_ldm_updateHash() : + * Updates hash by removing toRemove and adding toAdd. */ +static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower) +{ + hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower); + hash *= prime8bytes; + hash += toAdd + LDM_HASH_CHAR_OFFSET; + return hash; +} + +/** ZSTD_ldm_countBackwardsMatch() : + * Returns the number of bytes that match backwards before pIn and pMatch. + * + * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ +static size_t ZSTD_ldm_countBackwardsMatch( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pBase) +{ + size_t matchLength = 0; + while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { + pIn--; + pMatch--; + matchLength++; + } + return matchLength; +} + +/** ZSTD_ldm_fillFastTables() : + * + * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. + * This is similar to ZSTD_loadDictionaryContent. + * + * The tables for the other strategies are filled within their + * block compressors. */ +static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end) +{ + const BYTE* const iend = (const BYTE*)end; + const U32 mls = zc->appliedParams.cParams.searchLength; + + switch(zc->appliedParams.cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(zc, iend, mls); + zc->nextToUpdate = (U32)(iend - zc->base); + break; + + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(zc, iend, mls); + zc->nextToUpdate = (U32)(iend - zc->base); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + break; + default: + assert(0); /* not possible : not a valid strategy id */ + } + + return 0; +} + +/** ZSTD_ldm_fillLdmHashTable() : + * + * Fills hashTable from (lastHashed + 1) to iend (non-inclusive). + * lastHash is the rolling hash that corresponds to lastHashed. + * + * Returns the rolling hash corresponding to position iend-1. */ +static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, + U64 lastHash, const BYTE* lastHashed, + const BYTE* iend, const BYTE* base, + U32 hBits, ldmParams_t const ldmParams) +{ + U64 rollingHash = lastHash; + const BYTE* cur = lastHashed + 1; + + while (cur < iend) { + rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1], + cur[ldmParams.minMatchLength-1], + state->hashPower); + ZSTD_ldm_makeEntryAndInsertByTag(state, + rollingHash, hBits, + (U32)(cur - base), ldmParams); + ++cur; + } + return rollingHash; +} + + +/** ZSTD_ldm_limitTableUpdate() : + * + * Sets cctx->nextToUpdate to a position corresponding closer to anchor + * if it is far way + * (after a long match, only update tables a limited amount). */ +static void ZSTD_ldm_limitTableUpdate(ZSTD_CCtx* cctx, const BYTE* anchor) +{ + U32 const current = (U32)(anchor - cctx->base); + if (current > cctx->nextToUpdate + 1024) { + cctx->nextToUpdate = + current - MIN(512, current - cctx->nextToUpdate - 1024); + } +} + +typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize); +/* defined in zstd_compress.c */ +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict); + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx, + const void* src, size_t srcSize) +{ + ldmState_t* const ldmState = &(cctx->ldmState); + const ldmParams_t ldmParams = cctx->appliedParams.ldmParams; + const U64 hashPower = ldmState->hashPower; + const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog; + const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog); + const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1; + seqStore_t* const seqStorePtr = &(cctx->seqStore); + const BYTE* const base = cctx->base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = cctx->dictLimit; + const BYTE* const lowest = base + lowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - ldmParams.minMatchLength; + + const ZSTD_blockCompressor blockCompressor = + ZSTD_selectBlockCompressor(cctx->appliedParams.cParams.strategy, 0); + U32* const repToConfirm = seqStorePtr->repToConfirm; + U32 savedRep[ZSTD_REP_NUM]; + U64 rollingHash = 0; + const BYTE* lastHashed = NULL; + size_t i, lastLiterals; + + /* Save seqStorePtr->rep and copy repToConfirm */ + for (i = 0; i < ZSTD_REP_NUM; i++) + savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i]; + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + U32 const current = (U32)(ip - base); + size_t forwardMatchLength = 0, backwardMatchLength = 0; + ldmEntry_t* bestEntry = NULL; + if (ip != istart) { + rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0], + lastHashed[ldmParams.minMatchLength], + hashPower); + } else { + rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength); + } + lastHashed = ip; + + /* Do not insert and do not look for a match */ + if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) != + ldmTagMask) { + ip++; + continue; + } + + /* Get the best entry and compute the match lengths */ + { + ldmEntry_t* const bucket = + ZSTD_ldm_getBucket(ldmState, + ZSTD_ldm_getSmallHash(rollingHash, hBits), + ldmParams); + ldmEntry_t* cur; + size_t bestMatchLength = 0; + U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); + + for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { + const BYTE* const pMatch = cur->offset + base; + size_t curForwardMatchLength, curBackwardMatchLength, + curTotalMatchLength; + if (cur->checksum != checksum || cur->offset <= lowestIndex) { + continue; + } + + curForwardMatchLength = ZSTD_count(ip, pMatch, iend); + if (curForwardMatchLength < ldmParams.minMatchLength) { + continue; + } + curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch( + ip, anchor, pMatch, lowest); + curTotalMatchLength = curForwardMatchLength + + curBackwardMatchLength; + + if (curTotalMatchLength > bestMatchLength) { + bestMatchLength = curTotalMatchLength; + forwardMatchLength = curForwardMatchLength; + backwardMatchLength = curBackwardMatchLength; + bestEntry = cur; + } + } + } + + /* No match found -- continue searching */ + if (bestEntry == NULL) { + ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, + hBits, current, + ldmParams); + ip++; + continue; + } + + /* Match found */ + mLength = forwardMatchLength + backwardMatchLength; + ip -= backwardMatchLength; + + /* Call the block compressor on the remaining literals */ + { + U32 const matchIndex = bestEntry->offset; + const BYTE* const match = base + matchIndex - backwardMatchLength; + U32 const offset = (U32)(ip - match); + + /* Overwrite rep codes */ + for (i = 0; i < ZSTD_REP_NUM; i++) + seqStorePtr->rep[i] = repToConfirm[i]; + + /* Fill tables for block compressor */ + ZSTD_ldm_limitTableUpdate(cctx, anchor); + ZSTD_ldm_fillFastTables(cctx, anchor); + + /* Call block compressor and get remaining literals */ + lastLiterals = blockCompressor(cctx, anchor, ip - anchor); + cctx->nextToUpdate = (U32)(ip - base); + + /* Update repToConfirm with the new offset */ + for (i = ZSTD_REP_NUM - 1; i > 0; i--) + repToConfirm[i] = repToConfirm[i-1]; + repToConfirm[0] = offset; + + /* Store the sequence with the leftover literals */ + ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals, + offset + ZSTD_REP_MOVE, mLength - MINMATCH); + } + + /* Insert the current entry into the hash table */ + ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, + (U32)(lastHashed - base), + ldmParams); + + assert(ip + backwardMatchLength == lastHashed); + + /* Fill the hash table from lastHashed+1 to ip+mLength*/ + /* Heuristic: don't need to fill the entire table at end of block */ + if (ip + mLength < ilimit) { + rollingHash = ZSTD_ldm_fillLdmHashTable( + ldmState, rollingHash, lastHashed, + ip + mLength, base, hBits, ldmParams); + lastHashed = ip + mLength - 1; + } + ip += mLength; + anchor = ip; + /* Check immediate repcode */ + while ( (ip < ilimit) + && ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest)) + && (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) { + + size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1], + iend) + 4; + /* Swap repToConfirm[1] <=> repToConfirm[0] */ + { + U32 const tmpOff = repToConfirm[1]; + repToConfirm[1] = repToConfirm[0]; + repToConfirm[0] = tmpOff; + } + + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); + + /* Fill the hash table from lastHashed+1 to ip+rLength*/ + if (ip + rLength < ilimit) { + rollingHash = ZSTD_ldm_fillLdmHashTable( + ldmState, rollingHash, lastHashed, + ip + rLength, base, hBits, ldmParams); + lastHashed = ip + rLength - 1; + } + ip += rLength; + anchor = ip; + } + } + + /* Overwrite rep */ + for (i = 0; i < ZSTD_REP_NUM; i++) + seqStorePtr->rep[i] = repToConfirm[i]; + + ZSTD_ldm_limitTableUpdate(cctx, anchor); + ZSTD_ldm_fillFastTables(cctx, anchor); + + lastLiterals = blockCompressor(cctx, anchor, iend - anchor); + cctx->nextToUpdate = (U32)(iend - base); + + /* Restore seqStorePtr->rep */ + for (i = 0; i < ZSTD_REP_NUM; i++) + seqStorePtr->rep[i] = savedRep[i]; + + /* Return the last literals size */ + return lastLiterals; +} + +size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_ldm_generic(ctx, src, srcSize); +} + +static size_t ZSTD_compressBlock_ldm_extDict_generic( + ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + ldmState_t* const ldmState = &(ctx->ldmState); + const ldmParams_t ldmParams = ctx->appliedParams.ldmParams; + const U64 hashPower = ldmState->hashPower; + const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog; + const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog); + const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1; + seqStore_t* const seqStorePtr = &(ctx->seqStore); + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = ctx->lowLimit; + const BYTE* const dictStart = dictBase + lowestIndex; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const lowPrefixPtr = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - ldmParams.minMatchLength; + + const ZSTD_blockCompressor blockCompressor = + ZSTD_selectBlockCompressor(ctx->appliedParams.cParams.strategy, 1); + U32* const repToConfirm = seqStorePtr->repToConfirm; + U32 savedRep[ZSTD_REP_NUM]; + U64 rollingHash = 0; + const BYTE* lastHashed = NULL; + size_t i, lastLiterals; + + /* Save seqStorePtr->rep and copy repToConfirm */ + for (i = 0; i < ZSTD_REP_NUM; i++) { + savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i]; + } + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + size_t mLength; + const U32 current = (U32)(ip-base); + size_t forwardMatchLength = 0, backwardMatchLength = 0; + ldmEntry_t* bestEntry = NULL; + if (ip != istart) { + rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0], + lastHashed[ldmParams.minMatchLength], + hashPower); + } else { + rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength); + } + lastHashed = ip; + + if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) != + ldmTagMask) { + /* Don't insert and don't look for a match */ + ip++; + continue; + } + + /* Get the best entry and compute the match lengths */ + { + ldmEntry_t* const bucket = + ZSTD_ldm_getBucket(ldmState, + ZSTD_ldm_getSmallHash(rollingHash, hBits), + ldmParams); + ldmEntry_t* cur; + size_t bestMatchLength = 0; + U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); + + for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { + const BYTE* const curMatchBase = + cur->offset < dictLimit ? dictBase : base; + const BYTE* const pMatch = curMatchBase + cur->offset; + const BYTE* const matchEnd = + cur->offset < dictLimit ? dictEnd : iend; + const BYTE* const lowMatchPtr = + cur->offset < dictLimit ? dictStart : lowPrefixPtr; + size_t curForwardMatchLength, curBackwardMatchLength, + curTotalMatchLength; + + if (cur->checksum != checksum || cur->offset <= lowestIndex) { + continue; + } + + curForwardMatchLength = ZSTD_count_2segments( + ip, pMatch, iend, + matchEnd, lowPrefixPtr); + if (curForwardMatchLength < ldmParams.minMatchLength) { + continue; + } + curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch( + ip, anchor, pMatch, lowMatchPtr); + curTotalMatchLength = curForwardMatchLength + + curBackwardMatchLength; + + if (curTotalMatchLength > bestMatchLength) { + bestMatchLength = curTotalMatchLength; + forwardMatchLength = curForwardMatchLength; + backwardMatchLength = curBackwardMatchLength; + bestEntry = cur; + } + } + } + + /* No match found -- continue searching */ + if (bestEntry == NULL) { + ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, + (U32)(lastHashed - base), + ldmParams); + ip++; + continue; + } + + /* Match found */ + mLength = forwardMatchLength + backwardMatchLength; + ip -= backwardMatchLength; + + /* Call the block compressor on the remaining literals */ + { + /* ip = current - backwardMatchLength + * The match is at (bestEntry->offset - backwardMatchLength) */ + U32 const matchIndex = bestEntry->offset; + U32 const offset = current - matchIndex; + + /* Overwrite rep codes */ + for (i = 0; i < ZSTD_REP_NUM; i++) + seqStorePtr->rep[i] = repToConfirm[i]; + + /* Fill the hash table for the block compressor */ + ZSTD_ldm_limitTableUpdate(ctx, anchor); + ZSTD_ldm_fillFastTables(ctx, anchor); + + /* Call block compressor and get remaining literals */ + lastLiterals = blockCompressor(ctx, anchor, ip - anchor); + ctx->nextToUpdate = (U32)(ip - base); + + /* Update repToConfirm with the new offset */ + for (i = ZSTD_REP_NUM - 1; i > 0; i--) + repToConfirm[i] = repToConfirm[i-1]; + repToConfirm[0] = offset; + + /* Store the sequence with the leftover literals */ + ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals, + offset + ZSTD_REP_MOVE, mLength - MINMATCH); + } + + /* Insert the current entry into the hash table */ + ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, + (U32)(lastHashed - base), + ldmParams); + + /* Fill the hash table from lastHashed+1 to ip+mLength */ + assert(ip + backwardMatchLength == lastHashed); + if (ip + mLength < ilimit) { + rollingHash = ZSTD_ldm_fillLdmHashTable( + ldmState, rollingHash, lastHashed, + ip + mLength, base, hBits, + ldmParams); + lastHashed = ip + mLength - 1; + } + ip += mLength; + anchor = ip; + + /* check immediate repcode */ + while (ip < ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - repToConfirm[1]; + const BYTE* repMatch2 = repIndex2 < dictLimit ? + dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & + (repIndex2 > lowestIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < dictLimit ? + dictEnd : iend; + size_t const repLength2 = + ZSTD_count_2segments(ip+4, repMatch2+4, iend, + repEnd2, lowPrefixPtr) + 4; + + U32 tmpOffset = repToConfirm[1]; + repToConfirm[1] = repToConfirm[0]; + repToConfirm[0] = tmpOffset; + + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); + + /* Fill the hash table from lastHashed+1 to ip+repLength2*/ + if (ip + repLength2 < ilimit) { + rollingHash = ZSTD_ldm_fillLdmHashTable( + ldmState, rollingHash, lastHashed, + ip + repLength2, base, hBits, + ldmParams); + lastHashed = ip + repLength2 - 1; + } + ip += repLength2; + anchor = ip; + continue; + } + break; + } + } + + /* Overwrite rep */ + for (i = 0; i < ZSTD_REP_NUM; i++) + seqStorePtr->rep[i] = repToConfirm[i]; + + ZSTD_ldm_limitTableUpdate(ctx, anchor); + ZSTD_ldm_fillFastTables(ctx, anchor); + + /* Call the block compressor one last time on the last literals */ + lastLiterals = blockCompressor(ctx, anchor, iend - anchor); + ctx->nextToUpdate = (U32)(iend - base); + + /* Restore seqStorePtr->rep */ + for (i = 0; i < ZSTD_REP_NUM; i++) + seqStorePtr->rep[i] = savedRep[i]; + + /* Return the last literals size */ + return lastLiterals; +} + +size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_ldm_extDict_generic(ctx, src, srcSize); +} diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h new file mode 100644 index 00000000..7a624839 --- /dev/null +++ b/lib/compress/zstd_ldm.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#ifndef ZSTD_LDM_H +#define ZSTD_LDM_H + +#include "zstd_compress.h" + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Long distance matching +***************************************/ + +#define ZSTD_LDM_WINDOW_LOG 27 +#define ZSTD_LDM_HASHEVERYLOG_NOTSET 9999 + +/** ZSTD_compressBlock_ldm_generic() : + * + * This is a block compressor intended for long distance matching. + * + * The function searches for matches of length at least + * ldmParams.minMatchLength using a hash table in cctx->ldmState. + * Matches can be at a distance of up to cParams.windowLog. + * + * Upon finding a match, the unmatched literals are compressed using a + * ZSTD_blockCompressor (depending on the strategy in the compression + * parameters), which stores the matched sequences. The "long distance" + * match is then stored with the remaining literals from the + * ZSTD_blockCompressor. */ +size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* cctx, const void* src, size_t srcSize); +size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx, + const void* src, size_t srcSize); + +/** ZSTD_ldm_initializeParameters() : + * Initialize the long distance matching parameters to their default values. */ +size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm); + +/** ZSTD_ldm_getTableSize() : + * Estimate the space needed for long distance matching tables. */ +size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog); + +/** ZSTD_ldm_getTableSize() : + * Return prime8bytes^(minMatchLength-1) */ +U64 ZSTD_ldm_getHashPower(U32 minMatchLength); + +/** ZSTD_ldm_adjustParameters() : + * If the params->hashEveryLog is not set, set it to its default value based on + * windowLog and params->hashLog. + * + * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to + * params->hashLog if it is not). */ +void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FAST_H */ diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index cf5dbd86..fd102da2 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -410,8 +410,8 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( * Optimal parser *********************************/ FORCE_INLINE_TEMPLATE -void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, const int ultra) +size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, const int ultra) { seqStore_t* seqStorePtr = &(ctx->seqStore); optState_t* optStatePtr = &(ctx->optState); @@ -651,27 +651,24 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ /* Save reps for next block */ { int i; for (i=0; irepToConfirm[i] = rep[i]; } - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + /* Return the last literals size */ + return iend - anchor; } -void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0); + return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0); } -void ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1); + return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1); } FORCE_INLINE_TEMPLATE -void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, +size_t ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const int ultra) { seqStore_t* seqStorePtr = &(ctx->seqStore); @@ -936,20 +933,17 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ /* Save reps for next block */ { int i; for (i=0; irepToConfirm[i] = rep[i]; } - /* Last Literals */ - { size_t lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } + /* Return the last literals size */ + return iend - anchor; } -void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0); + return ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0); } -void ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1); + return ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1); } diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index a304bd66..816a1fab 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -17,11 +17,11 @@ extern "C" { #endif -void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize); -void ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize); -void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); -void ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); #if defined (__cplusplus) } diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index ae8c63cc..ecb799ab 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -197,6 +197,8 @@ static ZSTD_CCtx_params ZSTDMT_makeJobCCtxParams(ZSTD_CCtx_params const params) jobParams.cParams = params.cParams; jobParams.fParams = params.fParams; jobParams.compressionLevel = params.compressionLevel; + + jobParams.ldmParams = params.ldmParams; return jobParams; } diff --git a/lib/zstd.h b/lib/zstd.h index 7695776f..ddb28429 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -391,6 +391,9 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output #define ZSTD_SEARCHLENGTH_MIN 3 /* only for ZSTD_btopt, other strategies are limited to 4 */ #define ZSTD_TARGETLENGTH_MIN 4 #define ZSTD_TARGETLENGTH_MAX 999 +#define ZSTD_LDM_MINMATCH_MIN 4 +#define ZSTD_LDM_MINMATCH_MAX 4096 +#define ZSTD_LDM_BUCKETSIZELOG_MAX 8 #define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */ #define ZSTD_FRAMEHEADERSIZE_MIN 6 @@ -992,6 +995,35 @@ typedef enum { /* advanced parameters - may not remain available after API update */ ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize, * even when referencing into Dictionary content (default:0) */ + ZSTD_p_enableLongDistanceMatching=1200, /* Enable long distance matching. + * This parameter is designed to improve the compression + * ratio for large inputs with long distance matches. + * This increases the memory usage as well as window size. + * Note: setting this parameter sets all the LDM parameters + * as well as ZSTD_p_windowLog. It should be set after + * ZSTD_p_compressionLevel and before ZSTD_p_windowLog and + * other LDM parameters. Setting the compression level + * after this parameter overrides the window log, though LDM + * will remain enabled until explicitly disabled. */ + ZSTD_p_ldmHashLog, /* Size of the table for long distance matching, as a power of 2. + * Larger values increase memory usage and compression ratio, but decrease + * compression speed. + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX + * (default: 20). */ + ZSTD_p_ldmMinMatch, /* Minimum size of searched matches for long distance matcher. + * Larger/too small values usually decrease compression ratio. + * Must be clamped between ZSTD_LDM_MINMATCH_MIN + * and ZSTD_LDM_MINMATCH_MAX (default: 64). */ + ZSTD_p_ldmBucketSizeLog, /* Log size of each bucket in the LDM hash table for collision resolution. + * Larger values usually improve collision resolution but may decrease + * compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX (default: 3). */ + ZSTD_p_ldmHashEveryLog, /* Frequency of inserting/looking up entries in the LDM hash table. + * The default is MAX(0, (windowLog - ldmHashLog)) to + * optimize hash table usage. + * Larger values improve compression speed. Deviating far from the + * default value will likely result in a decrease in compression ratio. + * Must be clamped between 0 and ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN. */ } ZSTD_cParameter; diff --git a/programs/README.md b/programs/README.md index 8b65dfdb..2aae5258 100644 --- a/programs/README.md +++ b/programs/README.md @@ -113,6 +113,7 @@ Advanced arguments : -c : force write to standard output, even if it is the console -l : print information about zstd compressed files --ultra : enable levels beyond 19, up to 22 (requires more memory) +--long : enable long distance matching (requires more memory) --no-dictID : don't write dictID into header (dictionary compression) --[no-]check : integrity check (default:enabled) -r : operate recursively on directories @@ -139,3 +140,60 @@ Benchmark arguments : -B# : cut file into independent blocks of size # (default: no block) --priority=rt : set process priority to real-time ``` + + +#### Long distance matching mode +The long distance matching mode, enabled with `--long`, is designed to improve +the compression ratio for files with long matches at a large distance (up to the +maximum window size, `128 MiB`) while still maintaining compression speed. + +Enabling this mode sets the window size to `128 MiB` and thus increases the memory +usage for both the compressor and decompressor. Performance in terms of speed is +dependent on long matches being found. Compression speed may degrade if few long +matches are found. Decompression speed usually improves when there are many long +distance matches. + +Below are graphs comparing the compression speed, compression ratio, and +decompression speed with and without long distance matching on an ideal use +case: a tar of four versions of clang (versions `3.4.1`, `3.4.2`, `3.5.0`, +`3.5.1`) with a total size of `244889600 B`. This is an ideal use case as there +are many long distance matches within the maximum window size of `128 MiB` (each +version is less than `128 MiB`). + +Compression Speed vs Ratio | Decompression Speed +---------------------------|--------------------- +![Compression Speed vs Ratio](../doc/images/ldmCspeed.png "Compression Speed vs Ratio") | ![Decompression Speed](../doc/images/ldmDspeed.png "Decompression Speed") + +| Method | Compression ratio | Compression speed | Decompression speed | +|:-------|------------------:|-------------------------:|---------------------------:| +| `zstd -1` | `5.065` | `284.8 MB/s` | `759.3 MB/s` | +| `zstd -5` | `5.826` | `124.9 MB/s` | `674.0 MB/s` | +| `zstd -10` | `6.504` | `29.5 MB/s` | `771.3 MB/s` | +| `zstd -1 --long` | `17.426` | `220.6 MB/s` | `1638.4 MB/s` | +| `zstd -5 --long` | `19.661` | `165.5 MB/s` | `1530.6 MB/s`| +| `zstd -10 --long`| `21.949` | `75.6 MB/s` | `1632.6 MB/s`| + +On this file, the compression ratio improves significantly with minimal impact +on compression speed, and the decompression speed doubles. + +On the other extreme, compressing a file with few long distance matches (such as +the [Silesia compression corpus]) will likely lead to a deterioration in +compression speed (for lower levels) with minimal change in compression ratio. + +The below table illustrates this on the [Silesia compression corpus]. + +[Silesia compression corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia + +| Method | Compression ratio | Compression speed | Decompression speed | +|:-------|------------------:|-------------------------:|---------------------------:| +| `zstd -1` | `2.878` | `231.7 MB/s` | `594.4 MB/s` | +| `zstd -1 --long` | `2.929` | `106.5 MB/s` | `517.9 MB/s` | +| `zstd -5` | `3.274` | `77.1 MB/s` | `464.2 MB/s` | +| `zstd -5 --long` | `3.319` | `51.7 MB/s` | `371.9 MB/s` | +| `zstd -10` | `3.523` | `16.4 MB/s` | `489.2 MB/s` | +| `zstd -10 --long`| `3.566` | `16.2 MB/s` | `415.7 MB/s` | + + + + + diff --git a/programs/bench.c b/programs/bench.c index 11f778d0..ec99c61c 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -130,6 +130,31 @@ void BMK_setNbThreads(unsigned nbThreads) { #endif g_nbThreads = nbThreads; } +static U32 g_ldmFlag = 0; +void BMK_setLdmFlag(unsigned ldmFlag) { + g_ldmFlag = ldmFlag; +} + +static U32 g_ldmMinMatch = 0; +void BMK_setLdmMinMatch(unsigned ldmMinMatch) { + g_ldmMinMatch = ldmMinMatch; +} + +static U32 g_ldmHashLog = 0; +void BMK_setLdmHashLog(unsigned ldmHashLog) { + g_ldmHashLog = ldmHashLog; +} + +#define BMK_LDM_PARAM_NOTSET 9999 +static U32 g_ldmBucketSizeLog = BMK_LDM_PARAM_NOTSET; +void BMK_setLdmBucketSizeLog(unsigned ldmBucketSizeLog) { + g_ldmBucketSizeLog = ldmBucketSizeLog; +} + +static U32 g_ldmHashEveryLog = BMK_LDM_PARAM_NOTSET; +void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog) { + g_ldmHashEveryLog = ldmHashEveryLog; +} /* ******************************************************** @@ -265,6 +290,15 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, #ifdef ZSTD_NEWAPI ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbThreads, g_nbThreads); ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashLog, g_ldmHashLog); + if (g_ldmBucketSizeLog != BMK_LDM_PARAM_NOTSET) { + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmBucketSizeLog, g_ldmBucketSizeLog); + } + if (g_ldmHashEveryLog != BMK_LDM_PARAM_NOTSET) { + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog); + } ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog); ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog); ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog); diff --git a/programs/bench.h b/programs/bench.h index 860d1ab2..82bb3456 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -26,5 +26,10 @@ void BMK_setNbThreads(unsigned nbThreads); void BMK_setNotificationLevel(unsigned level); void BMK_setAdditionalParam(int additionalParam); void BMK_setDecodeOnlyMode(unsigned decodeFlag); +void BMK_setLdmFlag(unsigned ldmFlag); +void BMK_setLdmMinMatch(unsigned ldmMinMatch); +void BMK_setLdmHashLog(unsigned ldmHashLog); +void BMK_setLdmBucketSizeLog(unsigned ldmBucketSizeLog); +void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog); #endif /* BENCH_H_121279284357 */ diff --git a/programs/fileio.c b/programs/fileio.c index e8aba4b5..cef3f2c9 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -214,6 +214,30 @@ void FIO_setOverlapLog(unsigned overlapLog){ DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n"); g_overlapLog = overlapLog; } +static U32 g_ldmFlag = 0; +void FIO_setLdmFlag(unsigned ldmFlag) { + g_ldmFlag = (ldmFlag>0); +} +static U32 g_ldmHashLog = 0; +void FIO_setLdmHashLog(unsigned ldmHashLog) { + g_ldmHashLog = ldmHashLog; +} +static U32 g_ldmMinMatch = 0; +void FIO_setLdmMinMatch(unsigned ldmMinMatch) { + g_ldmMinMatch = ldmMinMatch; +} + +#define FIO_LDM_PARAM_NOTSET 9999 +static U32 g_ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; +void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog) { + g_ldmBucketSizeLog = ldmBucketSizeLog; +} + +static U32 g_ldmHashEveryLog = FIO_LDM_PARAM_NOTSET; +void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog) { + g_ldmHashEveryLog = ldmHashEveryLog; +} + /*-************************************* @@ -399,8 +423,20 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_dictIDFlag, g_dictIDFlag) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_checksumFlag, g_checksumFlag) ); CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) ); - /* compression parameters */ + /* compression level */ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, cLevel) ); + /* long distance matching */ + CHECK( ZSTD_CCtx_setParameter( + ress.cctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashLog, g_ldmHashLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch) ); + if (g_ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) { + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmBucketSizeLog, g_ldmBucketSizeLog) ); + } + if (g_ldmHashEveryLog != FIO_LDM_PARAM_NOTSET) { + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog) ); + } + /* compression parameters */ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_windowLog, comprParams->windowLog) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_chainLog, comprParams->chainLog) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_hashLog, comprParams->hashLog) ); diff --git a/programs/fileio.h b/programs/fileio.h index 74810368..aa4484fd 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -57,6 +57,11 @@ void FIO_setMemLimit(unsigned memLimit); void FIO_setNbThreads(unsigned nbThreads); void FIO_setBlockSize(unsigned blockSize); void FIO_setOverlapLog(unsigned overlapLog); +void FIO_setLdmFlag(unsigned ldmFlag); +void FIO_setLdmHashLog(unsigned ldmHashLog); +void FIO_setLdmMinMatch(unsigned ldmMinMatch); +void FIO_setLdmBucketSizeLog(unsigned ldmBucketSizeLog); +void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog); /*-************************************* diff --git a/programs/zstd.1 b/programs/zstd.1 index 5a91eea2..0fad1d27 100644 --- a/programs/zstd.1 +++ b/programs/zstd.1 @@ -1,5 +1,5 @@ . -.TH "ZSTD" "1" "August 2017" "zstd 1.3.1" "User Commands" +.TH "ZSTD" "1" "September 2017" "zstd 1.3.1" "User Commands" . .SH "NAME" \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files @@ -104,6 +104,10 @@ Display information related to a zstd compressed file, such as size, ratio, and unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\. . .TP +\fB\-\-long\fR +enables long distance matching\. This increases the window size (\fBwindowLog\fR) and memory usage for both the compressor and decompressor\. This setting is designed to improve the compression ratio for files with long matches at a large distance (up to the maximum window size, 128 MiB)\. +. +.TP \fB\-T#\fR, \fB\-\-threads=#\fR Compress using \fB#\fR threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==256\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\. . @@ -322,6 +326,58 @@ Determine \fBoverlapSize\fR, amount of data reloaded from previous job\. This pa .IP The minimum \fIovlog\fR is 0, and the maximum is 9\. 0 means "no overlap", hence completely independent jobs\. 9 means "full overlap", meaning up to \fBwindowSize\fR is reloaded from previous job\. Reducing \fIovlog\fR by 1 reduces the amount of reload by a factor 2\. Default \fIovlog\fR is 6, which means "reload \fBwindowSize / 8\fR"\. Exception : the maximum compression level (22) has a default \fIovlog\fR of 9\. . +.TP +\fBldmHashLog\fR=\fIldmhlog\fR, \fBldmhlog\fR=\fIldmhlog\fR +Specify the maximum size for a hash table used for long distance matching\. +. +.IP +This option is ignored unless long distance matching is enabled\. +. +.IP +Bigger hash tables usually improve compression ratio at the expense of more memory during compression and a decrease in compression speed\. +. +.IP +The minimum \fIldmhlog\fR is 6 and the maximum is 26 (default: 20)\. +. +.TP +\fBldmSearchLength\fR=\fIldmslen\fR, \fBldmSlen\fR=\fIldmslen\fR +Specify the minimum searched length of a match for long distance matching\. +. +.IP +This option is ignored unless long distance matching is enabled\. +. +.IP +Larger/very small values usually decrease compression ratio\. +. +.IP +The minumum \fIldmslen\fR is 4 and the maximum is 4096 (default: 64)\. +. +.TP +\fBldmBucketSizeLog\fR=\fIldmblog\fR, \fBldmblog\fR=\fIldmblog\fR +Specify the size of each bucket for the hash table used for long distance matching\. +. +.IP +This option is ignored unless long distance matching is enabled\. +. +.IP +Larger bucket sizes improve collision resolution but decrease compression speed\. +. +.IP +The minimum \fIldmblog\fR is 0 and the maximum is 8 (default: 3)\. +. +.TP +\fBldmHashEveryLog\fR=\fIldmhevery\fR, \fBldmhevery\fR=\fIldmhevery\fR +Specify the frequency of inserting entries into the long distance matching hash table\. +. +.IP +This option is ignored unless long distance matching is enabled\. +. +.IP +Larger values will improve compression speed\. Deviating far from the default value will likely result in a decrease in compression ratio\. +. +.IP +The default value is \fBwlog \- ldmhlog\fR\. +. .SS "\-B#:" Select the size of each compression job\. This parameter is available only when multi\-threading is enabled\. Default value is \fB4 * windowSize\fR, which means it varies depending on compression level\. \fB\-B#\fR makes it possible to select a custom value\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 1 MB, or \fBoverlapSize\fR, whichever is largest\. . diff --git a/programs/zstd.1.md b/programs/zstd.1.md index 4310afa1..2fcedde3 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -105,6 +105,12 @@ the last one takes effect. * `--ultra`: unlocks high compression levels 20+ (maximum 22), using a lot more memory. Note that decompression will also require more memory when using these levels. +* `--long`: + enables long distance matching. + This increases the window size (`windowLog`) and memory usage for both the + compressor and decompressor. This setting is designed to improve the + compression ratio for files with long matches at a large distance + (up to the maximum window size, 128 MiB). * `-T#`, `--threads=#`: Compress using `#` threads (default: 1). If `#` is 0, attempt to detect and use the number of physical CPU cores. @@ -327,6 +333,47 @@ The list of available _options_: Default _ovlog_ is 6, which means "reload `windowSize / 8`". Exception : the maximum compression level (22) has a default _ovlog_ of 9. +- `ldmHashLog`=_ldmhlog_, `ldmhlog`=_ldmhlog_: + Specify the maximum size for a hash table used for long distance matching. + + This option is ignored unless long distance matching is enabled. + + Bigger hash tables usually improve compression ratio at the expense of more + memory during compression and a decrease in compression speed. + + The minimum _ldmhlog_ is 6 and the maximum is 26 (default: 20). + +- `ldmSearchLength`=_ldmslen_, `ldmslen`=_ldmslen_: + Specify the minimum searched length of a match for long distance matching. + + This option is ignored unless long distance matching is enabled. + + Larger/very small values usually decrease compression ratio. + + The minumum _ldmslen_ is 4 and the maximum is 4096 (default: 64). + +- `ldmBucketSizeLog`=_ldmblog_, `ldmblog`=_ldmblog_: + Specify the size of each bucket for the hash table used for long distance + matching. + + This option is ignored unless long distance matching is enabled. + + Larger bucket sizes improve collision resolution but decrease compression + speed. + + The minimum _ldmblog_ is 0 and the maximum is 8 (default: 3). + +- `ldmHashEveryLog`=_ldmhevery_, `ldmhevery`=_ldmhevery_: + Specify the frequency of inserting entries into the long distance matching + hash table. + + This option is ignored unless long distance matching is enabled. + + Larger values will improve compression speed. Deviating far from the + default value will likely result in a decrease in compression ratio. + + The default value is `wlog - ldmhlog`. + ### -B#: Select the size of each compression job. This parameter is available only when multi-threading is enabled. diff --git a/programs/zstdcli.c b/programs/zstdcli.c index c5f7f578..607287c9 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -73,7 +73,12 @@ static const unsigned g_defaultMaxDictSize = 110 KB; static const int g_defaultDictCLevel = 3; static const unsigned g_defaultSelectivityLevel = 9; #define OVERLAP_LOG_DEFAULT 9999 +#define LDM_PARAM_DEFAULT 9999 /* Default for parameters where 0 is valid */ static U32 g_overlapLog = OVERLAP_LOG_DEFAULT; +static U32 g_ldmHashLog = 0; +static U32 g_ldmMinMatch = 0; +static U32 g_ldmHashEveryLog = LDM_PARAM_DEFAULT; +static U32 g_ldmBucketSizeLog = LDM_PARAM_DEFAULT; /*-************************************ @@ -124,6 +129,7 @@ static int usage_advanced(const char* programName) DISPLAY( " -l : print information about zstd compressed files \n"); #ifndef ZSTD_NOCOMPRESS DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); + DISPLAY( "--long : enable long distance matching (requires more memory)\n"); #ifdef ZSTD_MULTITHREAD DISPLAY( " -T# : use # threads for compression (default:1) \n"); DISPLAY( " -B# : select size of each job (default:0==automatic) \n"); @@ -305,6 +311,10 @@ static unsigned parseCompressionParameters(const char* stringPtr, ZSTD_compressi if (longCommandWArg(&stringPtr, "targetLength=") || longCommandWArg(&stringPtr, "tlen=")) { params->targetLength = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } if (longCommandWArg(&stringPtr, "strategy=") || longCommandWArg(&stringPtr, "strat=")) { params->strategy = (ZSTD_strategy)(readU32FromChar(&stringPtr)); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } if (longCommandWArg(&stringPtr, "overlapLog=") || longCommandWArg(&stringPtr, "ovlog=")) { g_overlapLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "ldmHashLog=") || longCommandWArg(&stringPtr, "ldmhlog=")) { g_ldmHashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "ldmSearchLength=") || longCommandWArg(&stringPtr, "ldmslen=")) { g_ldmMinMatch = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "ldmBucketSizeLog=") || longCommandWArg(&stringPtr, "ldmblog")) { g_ldmBucketSizeLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "ldmHashEveryLog=") || longCommandWArg(&stringPtr, "ldmhevery")) { g_ldmHashEveryLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } return 0; } @@ -363,7 +373,8 @@ int main(int argCount, const char* argv[]) ultra=0, lastCommand = 0, nbThreads = 1, - setRealTimePrio = 0; + setRealTimePrio = 0, + ldmFlag = 0; unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */ size_t blockSize = 0; zstd_operation_mode operation = zom_compress; @@ -395,7 +406,7 @@ int main(int argCount, const char* argv[]) /* init */ (void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */ (void)dictCLevel; (void)dictSelect; (void)dictID; (void)maxDictSize; /* not used when ZSTD_NODICT set */ - (void)ultra; (void)cLevel; /* not used when ZSTD_NOCOMPRESS set */ + (void)ultra; (void)cLevel; (void)ldmFlag; /* not used when ZSTD_NOCOMPRESS set */ (void)memLimit; /* not used when ZSTD_NODECOMPRESS set */ if (filenameTable==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); } filenameTable[0] = stdinmark; @@ -448,6 +459,7 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; } if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; g_displayLevel-=(g_displayLevel==2); continue; } if (!strcmp(argument, "--ultra")) { ultra=1; continue; } + if (!strcmp(argument, "--long")) { ldmFlag = 1; continue; } if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(2); continue; } if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(0); continue; } if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(2); continue; } @@ -721,6 +733,15 @@ int main(int argCount, const char* argv[]) BMK_setBlockSize(blockSize); BMK_setNbThreads(nbThreads); BMK_setNbSeconds(bench_nbSeconds); + BMK_setLdmFlag(ldmFlag); + BMK_setLdmMinMatch(g_ldmMinMatch); + BMK_setLdmHashLog(g_ldmHashLog); + if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) { + BMK_setLdmBucketSizeLog(g_ldmBucketSizeLog); + } + if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) { + BMK_setLdmHashEveryLog(g_ldmHashEveryLog); + } BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, setRealTimePrio); #endif (void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio; @@ -788,6 +809,16 @@ int main(int argCount, const char* argv[]) #ifndef ZSTD_NOCOMPRESS FIO_setNbThreads(nbThreads); FIO_setBlockSize((U32)blockSize); + FIO_setLdmFlag(ldmFlag); + FIO_setLdmHashLog(g_ldmHashLog); + FIO_setLdmMinMatch(g_ldmMinMatch); + if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) { + FIO_setLdmBucketSizeLog(g_ldmBucketSizeLog); + } + if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) { + FIO_setLdmHashEveryLog(g_ldmHashEveryLog); + } + if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(g_overlapLog); if ((filenameIdx==1) && outFileName) operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, &compressionParams); diff --git a/tests/playTests.sh b/tests/playTests.sh index 706cef2d..ed12802c 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -553,6 +553,15 @@ roundTripTest -g516K 19 # btopt fileRoundTripTest -g500K +$ECHO "\n**** zstd long distance matching round-trip tests **** " +roundTripTest -g0 "2 --long" +roundTripTest -g1000K "1 --long" +roundTripTest -g517K "6 --long" +roundTripTest -g516K "16 --long" +roundTripTest -g518K "19 --long" +fileRoundTripTest -g5M "3 --long" + + if [ -n "$hasMT" ] then $ECHO "\n**** zstdmt round-trip tests **** " @@ -560,6 +569,9 @@ then roundTripTest -g8M "3 -T2" roundTripTest -g8000K "2 --threads=2" fileRoundTripTest -g4M "19 -T2 -B1M" + + $ECHO "\n**** zstdmt long distance matching round-trip tests **** " + roundTripTest -g8M "3 --long -T2" else $ECHO "\n**** no multithreading, skipping zstdmt tests **** " fi @@ -648,6 +660,15 @@ roundTripTest -g6000000000 -P99 1 fileRoundTripTest -g4193M -P99 1 +$ECHO "\n**** zstd long, long distance matching round-trip tests **** " +roundTripTest -g0 "2 --long" +roundTripTest -g270000000 "1 --long" +roundTripTest -g140000000 -P60 "5 --long" +roundTripTest -g70000000 -P70 "8 --long" +roundTripTest -g18000001 -P80 "18 --long" +fileRoundTripTest -g4100M -P99 "1 --long" + + if [ -n "$hasMT" ] then $ECHO "\n**** zstdmt long round-trip tests **** " @@ -655,6 +676,7 @@ then roundTripTest -g6000000000 -P99 "1 -T2" roundTripTest -g1500000000 -P97 "1 -T999" fileRoundTripTest -g4195M -P98 " -T0" + roundTripTest -g1500000000 -P97 "1 --long -T999" else $ECHO "\n**** no multithreading, skipping zstdmt tests **** " fi diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 8c8adc62..e52335da 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -710,6 +710,13 @@ static size_t FUZ_randomLength(U32* seed, U32 maxLog) #define MIN(a,b) ( (a) < (b) ? (a) : (b) ) +/* Return value in range minVal <= v <= maxVal */ +static U32 FUZ_randomClampedLength(U32* seed, U32 minVal, U32 maxVal) +{ + U32 const mod = maxVal < minVal ? 1 : (maxVal + 1) - minVal; + return (U32)((FUZ_rand(seed) % mod) + minVal); +} + #define CHECK(cond, ...) { \ if (cond) { \ DISPLAY("Error => "); \ @@ -1387,6 +1394,13 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_minMatch, cParams.searchLength, useOpaqueAPI) ); if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_targetLength, cParams.targetLength, useOpaqueAPI) ); + /* mess with long distance matching parameters */ + if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_enableLongDistanceMatching, FUZ_rand(&lseed) & 63, useOpaqueAPI) ); + if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_ldmHashLog, FUZ_randomClampedLength(&lseed, ZSTD_HASHLOG_MIN, 23), useOpaqueAPI) ); + if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_ldmMinMatch, FUZ_randomClampedLength(&lseed, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX), useOpaqueAPI) ); + if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_ldmBucketSizeLog, FUZ_randomClampedLength(&lseed, 0, ZSTD_LDM_BUCKETSIZELOG_MAX), useOpaqueAPI) ); + if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_ldmHashEveryLog, FUZ_randomClampedLength(&lseed, 0, ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN), useOpaqueAPI) ); + /* unconditionally set, to be sync with decoder */ /* mess with frame parameters */ if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_checksumFlag, FUZ_rand(&lseed) & 1, useOpaqueAPI) );