From a1f04d518d34decb2ec5c6471d406fb8dad249ae Mon Sep 17 00:00:00 2001 From: Stella Lau Date: Fri, 1 Sep 2017 14:52:51 -0700 Subject: [PATCH] Move hashEveryLog to cctxParams and update cli --- lib/common/zstd_internal.h | 2 +- lib/compress/zstd_compress.c | 47 ++++++++++++++++++++++++------------ lib/zstd.h | 13 +++++++--- programs/bench.c | 22 +++++++++++++++++ programs/bench.h | 3 +++ programs/fileio.c | 19 +++++++++++++++ programs/fileio.h | 3 +++ programs/zstdcli.c | 16 ++++++++++++ tests/fuzzer.c | 1 - tests/zstreamtest.c | 2 -- 10 files changed, 105 insertions(+), 23 deletions(-) diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index b3d9a6c6..cd414646 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -256,7 +256,6 @@ typedef struct { typedef struct { ldmEntry_t* hashTable; BYTE* bucketOffsets; /* Next position in bucket to insert entry */ - U32 hashEveryLog; /* Log number of entries to skip */ U64 hashPower; /* Used to compute the rolling hash. * Depends on ldmParams.minMatchLength */ } ldmState_t; @@ -266,6 +265,7 @@ typedef struct { U32 hashLog; /* Log size of hashTable */ U32 bucketLog; /* Log number of buckets, at most 4 */ U32 minMatchLength; /* Minimum match length */ + U32 hashEveryLog; /* Log number of entries to skip */ } ldmParams_t; typedef struct { diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e74787f3..fc8e9b0f 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -42,6 +42,7 @@ typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZS #define LDM_WINDOW_LOG 27 #define LDM_HASH_LOG 20 #define LDM_HASH_CHAR_OFFSET 10 +#define LDM_HASHEVERYLOG_NOTSET 9999 /*-************************************* @@ -320,6 +321,7 @@ static size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm) params->hashLog = LDM_HASH_LOG; params->bucketLog = LDM_BUCKET_SIZE_LOG; params->minMatchLength = LDM_MIN_MATCH_LENGTH; + params->hashEveryLog = LDM_HASHEVERYLOG_NOTSET; return 0; } @@ -385,6 +387,10 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v if (cctx->cdict) return ERROR(stage_wrong); return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + case ZSTD_p_ldmHashEveryLog: + if (cctx->cdict) return ERROR(stage_wrong); + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + default: return ERROR(parameter_unsupported); } } @@ -503,6 +509,13 @@ size_t ZSTD_CCtxParam_setParameter( params->ldmParams.minMatchLength = value; return 0; + case ZSTD_p_ldmHashEveryLog : + if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) { + return ERROR(parameter_outOfBound); + } + params->ldmParams.hashEveryLog = value; + return 0; + default: return ERROR(parameter_unsupported); } } @@ -538,7 +551,7 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams( cctx, ZSTD_p_overlapSizeLog, params->overlapSizeLog) ); } - /* Copy long distance matching parameter */ + /* Copy long distance matching parameters */ cctx->requestedParams.ldmParams = params->ldmParams; /* customMem is used only for create/free params and can be ignored */ @@ -742,7 +755,6 @@ size_t ZSTD_estimateCCtxSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* pa + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t)); size_t const optSpace = ((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btultra)) ? optBudget : 0; - /* Ldm parameters can not currently be changed */ size_t const ldmSpace = params->ldmParams.enableLdm ? ZSTD_ldm_getTableSize(params->ldmParams.hashLog, params->ldmParams.bucketLog) : 0; @@ -813,7 +825,8 @@ static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1, (ldmParams1.enableLdm == ldmParams2.enableLdm && ldmParams1.hashLog == ldmParams2.hashLog && ldmParams1.bucketLog == ldmParams2.bucketLog && - ldmParams1.minMatchLength == ldmParams2.minMatchLength); + ldmParams1.minMatchLength == ldmParams2.minMatchLength && + ldmParams1.hashEveryLog == ldmParams2.hashEveryLog); } /** Equivalence for resetCCtx purposes */ @@ -866,6 +879,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, if (crp == ZSTDcrp_continue) { if (ZSTD_equivalentParams(params, zc->appliedParams)) { DEBUGLOG(5, "ZSTD_equivalentParams()==1"); + assert(!(params.ldmParams.enableLdm && + params.ldmParams.hashEveryLog == LDM_HASHEVERYLOG_NOTSET)); zc->entropy->hufCTable_repeatMode = HUF_repeat_none; zc->entropy->offcode_repeatMode = FSE_repeat_none; zc->entropy->matchlength_repeatMode = FSE_repeat_none; @@ -874,9 +889,11 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, } } if (params.ldmParams.enableLdm) { - zc->ldmState.hashEveryLog = - params.cParams.windowLog < params.ldmParams.hashLog ? - 0 : params.cParams.windowLog - params.ldmParams.hashLog; + if (params.ldmParams.hashEveryLog == LDM_HASHEVERYLOG_NOTSET) { + params.ldmParams.hashEveryLog = + params.cParams.windowLog < params.ldmParams.hashLog ? + 0 : params.cParams.windowLog - params.ldmParams.hashLog; + } zc->ldmState.hashPower = ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength); } @@ -3159,19 +3176,19 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, * * Gets the small hash, checksum, and tag from the rollingHash. * - * If the tag matches (1 << ldmState->hashEveryLog)-1, then + * If the tag matches (1 << ldmParams.hashEveryLog)-1, then * creates an ldmEntry from the offset, and inserts it into the hash table. * * hBits is the length of the small hash, which is the most significant hBits * of rollingHash. The checksum is the next 32 most significant bits, followed - * by ldmState->hashEveryLog bits that make up the tag. */ + * by ldmParams.hashEveryLog bits that make up the tag. */ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, U64 rollingHash, U32 hBits, U32 const offset, ldmParams_t const ldmParams) { - U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmState->hashEveryLog); - U32 const tagMask = (1 << ldmState->hashEveryLog) - 1; + U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog); + U32 const tagMask = (1 << ldmParams.hashEveryLog) - 1; if (tag == tagMask) { U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); @@ -3349,7 +3366,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx, const U64 hashPower = ldmState->hashPower; const U32 hBits = ldmParams.hashLog - ldmParams.bucketLog; const U32 ldmBucketSize = (1 << ldmParams.bucketLog); - const U32 ldmTagMask = (1 << ldmState->hashEveryLog) - 1; + const U32 ldmTagMask = (1 << ldmParams.hashEveryLog) - 1; seqStore_t* const seqStorePtr = &(cctx->seqStore); const BYTE* const base = cctx->base; const BYTE* const istart = (const BYTE*)src; @@ -3388,7 +3405,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx, lastHashed = ip; /* Do not insert and do not look for a match */ - if (ZSTD_ldm_getTag(rollingHash, hBits, ldmState->hashEveryLog) != + if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) != ldmTagMask) { ip++; continue; @@ -3546,12 +3563,12 @@ static size_t ZSTD_compressBlock_ldm_extDict_generic( ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - ldmState_t* ldmState = &(ctx->ldmState); + ldmState_t* const ldmState = &(ctx->ldmState); const ldmParams_t ldmParams = ctx->appliedParams.ldmParams; const U64 hashPower = ldmState->hashPower; const U32 hBits = ldmParams.hashLog - ldmParams.bucketLog; const U32 ldmBucketSize = (1 << ldmParams.bucketLog); - const U32 ldmTagMask = (1 << ctx->ldmState.hashEveryLog) - 1; + const U32 ldmTagMask = (1 << ldmParams.hashEveryLog) - 1; seqStore_t* const seqStorePtr = &(ctx->seqStore); const BYTE* const base = ctx->base; const BYTE* const dictBase = ctx->dictBase; @@ -3594,7 +3611,7 @@ static size_t ZSTD_compressBlock_ldm_extDict_generic( } lastHashed = ip; - if (ZSTD_ldm_getTag(rollingHash, hBits, ldmState->hashEveryLog) != + if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) != ldmTagMask) { /* Don't insert and don't look for a match */ ip++; diff --git a/lib/zstd.h b/lib/zstd.h index bed583c9..a7ad9771 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -984,11 +984,16 @@ typedef enum { * ZSTD_p_compressionLevel and before * ZSTD_p_windowLog and other LDM parameters. */ ZSTD_p_ldmHashLog, /* Size of the table for long distance matching. - * Must be clamped between ZSTD_HASHLOG_MIN and - * ZSTD_HASHLOG_MAX */ + * Must be clamped between ZSTD_HASHLOG_MIN and + * ZSTD_HASHLOG_MAX */ ZSTD_p_ldmMinMatch, /* Minimum size of searched matches for long distance matcher. - * Must be clamped between ZSTD_LDM_SEARCHLENGTH_MIN - * and ZSTD_LDM_SEARCHLENGTH_MAX. */ + * Must be clamped between ZSTD_LDM_SEARCHLENGTH_MIN + * and ZSTD_LDM_SEARCHLENGTH_MAX. */ + ZSTD_p_ldmHashEveryLog, /* Frequency of inserting/looking up entries in the + * LDM hash table. The default is + * (windowLog - ldmHashLog) to optimize hash table + * usage. Must be clamped between 0 and + * ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN. */ } ZSTD_cParameter; diff --git a/programs/bench.c b/programs/bench.c index d77f9a20..2a2510a2 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -134,6 +134,23 @@ void BMK_setLdmFlag(unsigned ldmFlag) { g_ldmFlag = ldmFlag; } +static U32 g_ldmMinMatch = 0; +void BMK_setLdmMinMatch(unsigned ldmMinMatch) { + g_ldmMinMatch = ldmMinMatch; +} + +static U32 g_ldmHashLog = 0; +void BMK_setLdmHashLog(unsigned ldmHashLog) { + g_ldmHashLog = ldmHashLog; +} + +#define BMK_LDM_HASHEVERYLOG_NOTSET 9999 +static U32 g_ldmHashEveryLog = BMK_LDM_HASHEVERYLOG_NOTSET; +void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog) { + g_ldmHashEveryLog = ldmHashEveryLog; +} + + /* ******************************************************** * Bench functions **********************************************************/ @@ -270,6 +287,11 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbThreads, g_nbThreads); ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel); ZSTD_CCtx_setParameter(ctx, ZSTD_p_longDistanceMatching, g_ldmFlag); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch); + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashLog, g_ldmHashLog); + if (g_ldmHashEveryLog != BMK_LDM_HASHEVERYLOG_NOTSET) { + ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog); + } ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog); ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog); ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog); diff --git a/programs/bench.h b/programs/bench.h index 7fb73c4d..04d220a9 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -26,5 +26,8 @@ void BMK_setNotificationLevel(unsigned level); void BMK_setAdditionalParam(int additionalParam); void BMK_setDecodeOnlyMode(unsigned decodeFlag); void BMK_setLdmFlag(unsigned ldmFlag); +void BMK_setLdmMinMatch(unsigned ldmMinMatch); +void BMK_setLdmHashLog(unsigned ldmHashLog); +void BMK_setLdmHashEveryLog(unsigned ldmHashEveryLog); #endif /* BENCH_H_121279284357 */ diff --git a/programs/fileio.c b/programs/fileio.c index c86b2533..fc390afe 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -217,6 +217,20 @@ static U32 g_ldmFlag = 0; void FIO_setLdmFlag(unsigned ldmFlag) { g_ldmFlag = (ldmFlag>0); } +static U32 g_ldmHashLog = 0; +void FIO_setLdmHashLog(unsigned ldmHashLog) { + g_ldmHashLog = ldmHashLog; +} +static U32 g_ldmMinMatch = 0; +void FIO_setLdmMinMatch(unsigned ldmMinMatch) { + g_ldmMinMatch = ldmMinMatch; +} +#define FIO_LDM_HASHEVERYLOG_NOTSET 9999 +static U32 g_ldmHashEveryLog = FIO_LDM_HASHEVERYLOG_NOTSET; +void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog) { + g_ldmHashEveryLog = ldmHashEveryLog; +} + /*-************************************* @@ -406,6 +420,11 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel, CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, cLevel) ); /* long distance matching */ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_longDistanceMatching, g_ldmFlag) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashLog, g_ldmHashLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmMinMatch, g_ldmMinMatch) ); + if (g_ldmHashEveryLog != FIO_LDM_HASHEVERYLOG_NOTSET) { + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashEveryLog, g_ldmHashEveryLog) ); + } /* compression parameters */ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_windowLog, comprParams->windowLog) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_chainLog, comprParams->chainLog) ); diff --git a/programs/fileio.h b/programs/fileio.h index 7e200b06..fabb46db 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -57,6 +57,9 @@ void FIO_setNbThreads(unsigned nbThreads); void FIO_setBlockSize(unsigned blockSize); void FIO_setOverlapLog(unsigned overlapLog); void FIO_setLdmFlag(unsigned ldmFlag); +void FIO_setLdmHashLog(unsigned ldmHashLog); +void FIO_setLdmMinMatch(unsigned ldmMinMatch); +void FIO_setLdmHashEveryLog(unsigned ldmHashEveryLog); /*-************************************* diff --git a/programs/zstdcli.c b/programs/zstdcli.c index b5fd1be5..78d6b339 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -72,7 +72,11 @@ static const unsigned g_defaultMaxDictSize = 110 KB; static const int g_defaultDictCLevel = 3; static const unsigned g_defaultSelectivityLevel = 9; #define OVERLAP_LOG_DEFAULT 9999 +#define LDM_HASHEVERYLOG_DEFAULT 9999 static U32 g_overlapLog = OVERLAP_LOG_DEFAULT; +static U32 g_ldmHashLog = 0; +static U32 g_ldmMinMatch = 0; +static U32 g_ldmHashEveryLog = LDM_HASHEVERYLOG_DEFAULT; /*-************************************ @@ -305,6 +309,9 @@ static unsigned parseCompressionParameters(const char* stringPtr, ZSTD_compressi if (longCommandWArg(&stringPtr, "targetLength=") || longCommandWArg(&stringPtr, "tlen=")) { params->targetLength = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } if (longCommandWArg(&stringPtr, "strategy=") || longCommandWArg(&stringPtr, "strat=")) { params->strategy = (ZSTD_strategy)(readU32FromChar(&stringPtr)); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } if (longCommandWArg(&stringPtr, "overlapLog=") || longCommandWArg(&stringPtr, "ovlog=")) { g_overlapLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "ldmHashLog=") || longCommandWArg(&stringPtr, "ldmHlog=")) { g_ldmHashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "ldmSearchLength=") || longCommandWArg(&stringPtr, "ldmSlen=")) { g_ldmMinMatch = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "ldmHashEveryLog=")) { g_ldmHashEveryLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } return 0; } @@ -724,6 +731,9 @@ int main(int argCount, const char* argv[]) BMK_setNbThreads(nbThreads); BMK_setNbSeconds(bench_nbSeconds); BMK_setLdmFlag(ldmFlag); + BMK_setLdmMinMatch(g_ldmMinMatch); + BMK_setLdmHashLog(g_ldmHashLog); + BMK_setLdmHashEveryLog(g_ldmHashEveryLog); BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams, setRealTimePrio); #endif (void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio; @@ -792,6 +802,12 @@ int main(int argCount, const char* argv[]) FIO_setNbThreads(nbThreads); FIO_setBlockSize((U32)blockSize); FIO_setLdmFlag(ldmFlag); + FIO_setLdmHashLog(g_ldmHashLog); + FIO_setLdmMinMatch(g_ldmMinMatch); + if (g_ldmHashEveryLog != LDM_HASHEVERYLOG_DEFAULT) { + FIO_setLdmHashEveryLog(g_ldmHashEveryLog); + } + if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(g_overlapLog); if ((filenameIdx==1) && outFileName) operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, &compressionParams); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 3b3e23b3..b2349870 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1342,7 +1342,6 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD dictSize = FUZ_rLogLength(&lseed, dictLog); /* needed also for decompression */ dict = srcBuffer + (FUZ_rand(&lseed) % (srcBufferSize - dictSize)); - CHECK_Z ( ZSTD_CCtx_setParameter(refCtx, ZSTD_p_longDistanceMatching, FUZ_rand(&lseed)&255) ); if (FUZ_rand(&lseed) & 0xF) { CHECK_Z ( ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel) ); } else { diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index dedb7eb3..f248d260 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -1381,8 +1381,6 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_targetLength, cParams.targetLength, useOpaqueAPI) ); if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_longDistanceMatching, FUZ_rand(&lseed) & 63, useOpaqueAPI) ); - if (FUZ_rand(&lseed) & 7) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_ldmMinMatch, FUZ_rand(&lseed) % 128 + 4, useOpaqueAPI ) ); - if (FUZ_rand(&lseed) & 7) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_ldmHashLog, FUZ_rand(&lseed) % 18 + 10, useOpaqueAPI ) ); /* unconditionally set, to be sync with decoder */ /* mess with frame parameters */