Merge pull request #3177 from embg/dms_prefetch2

Add prefetchCDictTables CCtxParam (+10-20% cold dict compression speed)
dev
Elliot Gorokhovsky 2022-06-24 08:24:43 -07:00 committed by GitHub
commit e9d6fc867a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 67 additions and 1 deletions

View File

@ -968,6 +968,7 @@ int main (int argc, const char** argv)
unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto;
ZSTD_dictAttachPref_e dictAttachPref = ZSTD_dictDefaultAttach;
ZSTD_paramSwitch_e prefetchCDictTables = ZSTD_ps_auto;
for (int argNb = 1; argNb < argc ; argNb++) {
const char* argument = argv[argNb];
@ -986,6 +987,7 @@ int main (int argc, const char** argv)
if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; }
if (longCommandWArg(&argument, "--dict-content-type=")) { dictContentType = (int)readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--dict-attach-pref=")) { dictAttachPref = (int)readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--prefetch-cdict-tables=")) { prefetchCDictTables = (int)readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; }
/* anything that's not a command is a filename */
nameTable[nameIdx++] = argument;
@ -1008,6 +1010,7 @@ int main (int argc, const char** argv)
ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_enableDedicatedDictSearch, dedicatedDictSearch);
ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_nbWorkers, 0);
ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_forceAttachDict, dictAttachPref);
ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_prefetchCDictTables, prefetchCDictTables);
int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dictContentType, cctxParams, exeName);

View File

@ -576,6 +576,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
bounds.upperBound = 1;
return bounds;
case ZSTD_c_prefetchCDictTables:
bounds.lowerBound = (int)ZSTD_ps_auto;
bounds.upperBound = (int)ZSTD_ps_disable;
return bounds;
default:
bounds.error = ERROR(parameter_unsupported);
return bounds;
@ -640,6 +645,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
case ZSTD_c_useBlockSplitter:
case ZSTD_c_useRowMatchFinder:
case ZSTD_c_deterministicRefPrefix:
case ZSTD_c_prefetchCDictTables:
default:
return 0;
}
@ -695,6 +701,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
case ZSTD_c_useBlockSplitter:
case ZSTD_c_useRowMatchFinder:
case ZSTD_c_deterministicRefPrefix:
case ZSTD_c_prefetchCDictTables:
break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
@ -921,6 +928,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
CCtxParams->deterministicRefPrefix = !!value;
return CCtxParams->deterministicRefPrefix;
case ZSTD_c_prefetchCDictTables:
BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);
CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value;
return CCtxParams->prefetchCDictTables;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
}
@ -1053,6 +1065,9 @@ size_t ZSTD_CCtxParams_getParameter(
case ZSTD_c_deterministicRefPrefix:
*value = (int)CCtxParams->deterministicRefPrefix;
break;
case ZSTD_c_prefetchCDictTables:
*value = (int)CCtxParams->prefetchCDictTables;
break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
return 0;
@ -1928,6 +1943,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
/* init params */
zc->blockState.matchState.cParams = params->cParams;
zc->blockState.matchState.prefetchCDictTables = params->prefetchCDictTables == ZSTD_ps_enable;
zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
zc->consumedSrcSize = 0;
zc->producedCSize = 0;

View File

@ -235,6 +235,11 @@ struct ZSTD_matchState_t {
const ZSTD_matchState_t* dictMatchState;
ZSTD_compressionParameters cParams;
const rawSeqStore_t* ldmSeqStore;
/* Controls prefetching in some dictMatchState matchfinders.
* This behavior is controlled from the cctx ms.
* This parameter has no effect in the cdict ms. */
int prefetchCDictTables;
};
typedef struct {
@ -331,6 +336,9 @@ struct ZSTD_CCtx_params_s {
/* Internal use, for createCCtxParams() and freeCCtxParams() only */
ZSTD_customMem customMem;
/* Controls prefetching in some dictMatchState matchfinders */
ZSTD_paramSwitch_e prefetchCDictTables;
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))

View File

@ -345,6 +345,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
/* if a dictionary is attached, it must be within window range */
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
PREFETCH_AREA(dictHashLong, hashTableBytes)
PREFETCH_AREA(dictHashSmall, chainTableBytes)
}
/* init */
ip += (dictAndPrefixLength == 0);

View File

@ -500,6 +500,11 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
* when translating a dict index into a local index */
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
PREFETCH_AREA(dictHashTable, hashTableBytes)
}
/* init */
DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
ip0 += (dictAndPrefixLength == 0);

View File

@ -421,6 +421,7 @@ typedef enum {
* ZSTD_c_validateSequences
* ZSTD_c_useBlockSplitter
* ZSTD_c_useRowMatchFinder
* ZSTD_c_prefetchCDictTables
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change.
@ -439,7 +440,8 @@ typedef enum {
ZSTD_c_experimentalParam12=1009,
ZSTD_c_experimentalParam13=1010,
ZSTD_c_experimentalParam14=1011,
ZSTD_c_experimentalParam15=1012
ZSTD_c_experimentalParam15=1012,
ZSTD_c_experimentalParam16=1013
} ZSTD_cParameter;
typedef struct {
@ -1954,6 +1956,29 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
*/
#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
/* ZSTD_c_prefetchCDictTables
* Controlled with ZSTD_paramSwitch_e enum. Default is ZSTD_ps_auto.
*
* In some situations, zstd uses CDict tables in-place rather than copying them
* into the working context. (See docs on ZSTD_dictAttachPref_e above for details).
* In such situations, compression speed is seriously impacted when CDict tables are
* "cold" (outside CPU cache). This parameter instructs zstd to prefetch CDict tables
* when they are used in-place.
*
* For sufficiently small inputs, the cost of the prefetch will outweigh the benefit.
* For sufficiently large inputs, zstd will by default memcpy() CDict tables
* into the working context, so there is no need to prefetch. This parameter is
* targeted at a middle range of input sizes, where a prefetch is cheap enough to be
* useful but memcpy() is too expensive. The exact range of input sizes where this
* makes sense is best determined by careful experimentation.
*
* Note: for this parameter, ZSTD_ps_auto is currently equivalent to ZSTD_ps_disable,
* but in the future zstd may conditionally enable this feature via an auto-detection
* heuristic for cold CDicts.
* Use ZSTD_ps_disable to opt out of prefetching under any circumstances.
*/
#define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16
/*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value.

View File

@ -98,6 +98,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer
setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
setRand(cctx, ZSTD_c_useBlockSplitter, 0, 2, producer);
setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer);
setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer);
if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
}

View File

@ -2041,6 +2041,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_compressionLevel, l) );
CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_enableDedicatedDictSearch, 0) );
CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach) );
CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_prefetchCDictTables, seed % 3) );
wdict_cSize = ZSTD_compress2(ctxOrig, compressedBuffer, compressedBufferSize, contentStart, contentSize);
if (wdict_cSize > target_wdict_cSize[l]) {
DISPLAYLEVEL(1, "error : compression with dictionary and compress2 at level %i worse than expected (%u > %u) \n",