Merge pull request #3177 from embg/dms_prefetch2

Add prefetchCDictTables CCtxParam (+10-20% cold dict compression speed)
dev
Elliot Gorokhovsky 2022-06-24 08:24:43 -07:00 committed by GitHub
commit e9d6fc867a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 67 additions and 1 deletions

View File

@ -968,6 +968,7 @@ int main (int argc, const char** argv)
unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */ unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto; ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto;
ZSTD_dictAttachPref_e dictAttachPref = ZSTD_dictDefaultAttach; ZSTD_dictAttachPref_e dictAttachPref = ZSTD_dictDefaultAttach;
ZSTD_paramSwitch_e prefetchCDictTables = ZSTD_ps_auto;
for (int argNb = 1; argNb < argc ; argNb++) { for (int argNb = 1; argNb < argc ; argNb++) {
const char* argument = argv[argNb]; const char* argument = argv[argNb];
@ -986,6 +987,7 @@ int main (int argc, const char** argv)
if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; } if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; }
if (longCommandWArg(&argument, "--dict-content-type=")) { dictContentType = (int)readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--dict-content-type=")) { dictContentType = (int)readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--dict-attach-pref=")) { dictAttachPref = (int)readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--dict-attach-pref=")) { dictAttachPref = (int)readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--prefetch-cdict-tables=")) { prefetchCDictTables = (int)readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; }
/* anything that's not a command is a filename */ /* anything that's not a command is a filename */
nameTable[nameIdx++] = argument; nameTable[nameIdx++] = argument;
@ -1008,6 +1010,7 @@ int main (int argc, const char** argv)
ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_enableDedicatedDictSearch, dedicatedDictSearch); ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_enableDedicatedDictSearch, dedicatedDictSearch);
ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_nbWorkers, 0); ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_nbWorkers, 0);
ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_forceAttachDict, dictAttachPref); ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_forceAttachDict, dictAttachPref);
ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_prefetchCDictTables, prefetchCDictTables);
int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dictContentType, cctxParams, exeName); int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dictContentType, cctxParams, exeName);

View File

@ -576,6 +576,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
bounds.upperBound = 1; bounds.upperBound = 1;
return bounds; return bounds;
case ZSTD_c_prefetchCDictTables:
bounds.lowerBound = (int)ZSTD_ps_auto;
bounds.upperBound = (int)ZSTD_ps_disable;
return bounds;
default: default:
bounds.error = ERROR(parameter_unsupported); bounds.error = ERROR(parameter_unsupported);
return bounds; return bounds;
@ -640,6 +645,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
case ZSTD_c_useBlockSplitter: case ZSTD_c_useBlockSplitter:
case ZSTD_c_useRowMatchFinder: case ZSTD_c_useRowMatchFinder:
case ZSTD_c_deterministicRefPrefix: case ZSTD_c_deterministicRefPrefix:
case ZSTD_c_prefetchCDictTables:
default: default:
return 0; return 0;
} }
@ -695,6 +701,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
case ZSTD_c_useBlockSplitter: case ZSTD_c_useBlockSplitter:
case ZSTD_c_useRowMatchFinder: case ZSTD_c_useRowMatchFinder:
case ZSTD_c_deterministicRefPrefix: case ZSTD_c_deterministicRefPrefix:
case ZSTD_c_prefetchCDictTables:
break; break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
@ -921,6 +928,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
CCtxParams->deterministicRefPrefix = !!value; CCtxParams->deterministicRefPrefix = !!value;
return CCtxParams->deterministicRefPrefix; return CCtxParams->deterministicRefPrefix;
case ZSTD_c_prefetchCDictTables:
BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);
CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value;
return CCtxParams->prefetchCDictTables;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
} }
} }
@ -1053,6 +1065,9 @@ size_t ZSTD_CCtxParams_getParameter(
case ZSTD_c_deterministicRefPrefix: case ZSTD_c_deterministicRefPrefix:
*value = (int)CCtxParams->deterministicRefPrefix; *value = (int)CCtxParams->deterministicRefPrefix;
break; break;
case ZSTD_c_prefetchCDictTables:
*value = (int)CCtxParams->prefetchCDictTables;
break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
} }
return 0; return 0;
@ -1928,6 +1943,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
/* init params */ /* init params */
zc->blockState.matchState.cParams = params->cParams; zc->blockState.matchState.cParams = params->cParams;
zc->blockState.matchState.prefetchCDictTables = params->prefetchCDictTables == ZSTD_ps_enable;
zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
zc->consumedSrcSize = 0; zc->consumedSrcSize = 0;
zc->producedCSize = 0; zc->producedCSize = 0;

View File

@ -235,6 +235,11 @@ struct ZSTD_matchState_t {
const ZSTD_matchState_t* dictMatchState; const ZSTD_matchState_t* dictMatchState;
ZSTD_compressionParameters cParams; ZSTD_compressionParameters cParams;
const rawSeqStore_t* ldmSeqStore; const rawSeqStore_t* ldmSeqStore;
/* Controls prefetching in some dictMatchState matchfinders.
* This behavior is controlled from the cctx ms.
* This parameter has no effect in the cdict ms. */
int prefetchCDictTables;
}; };
typedef struct { typedef struct {
@ -331,6 +336,9 @@ struct ZSTD_CCtx_params_s {
/* Internal use, for createCCtxParams() and freeCCtxParams() only */ /* Internal use, for createCCtxParams() and freeCCtxParams() only */
ZSTD_customMem customMem; ZSTD_customMem customMem;
/* Controls prefetching in some dictMatchState matchfinders */
ZSTD_paramSwitch_e prefetchCDictTables;
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))

View File

@ -345,6 +345,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
/* if a dictionary is attached, it must be within window range */ /* if a dictionary is attached, it must be within window range */
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
PREFETCH_AREA(dictHashLong, hashTableBytes)
PREFETCH_AREA(dictHashSmall, chainTableBytes)
}
/* init */ /* init */
ip += (dictAndPrefixLength == 0); ip += (dictAndPrefixLength == 0);

View File

@ -500,6 +500,11 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
* when translating a dict index into a local index */ * when translating a dict index into a local index */
assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
PREFETCH_AREA(dictHashTable, hashTableBytes)
}
/* init */ /* init */
DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
ip0 += (dictAndPrefixLength == 0); ip0 += (dictAndPrefixLength == 0);

View File

@ -421,6 +421,7 @@ typedef enum {
* ZSTD_c_validateSequences * ZSTD_c_validateSequences
* ZSTD_c_useBlockSplitter * ZSTD_c_useBlockSplitter
* ZSTD_c_useRowMatchFinder * ZSTD_c_useRowMatchFinder
* ZSTD_c_prefetchCDictTables
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly; * note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change. * also, the enums values themselves are unstable and can still change.
@ -439,7 +440,8 @@ typedef enum {
ZSTD_c_experimentalParam12=1009, ZSTD_c_experimentalParam12=1009,
ZSTD_c_experimentalParam13=1010, ZSTD_c_experimentalParam13=1010,
ZSTD_c_experimentalParam14=1011, ZSTD_c_experimentalParam14=1011,
ZSTD_c_experimentalParam15=1012 ZSTD_c_experimentalParam15=1012,
ZSTD_c_experimentalParam16=1013
} ZSTD_cParameter; } ZSTD_cParameter;
typedef struct { typedef struct {
@ -1954,6 +1956,29 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
*/ */
#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15 #define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
/* ZSTD_c_prefetchCDictTables
* Controlled with ZSTD_paramSwitch_e enum. Default is ZSTD_ps_auto.
*
* In some situations, zstd uses CDict tables in-place rather than copying them
* into the working context. (See docs on ZSTD_dictAttachPref_e above for details).
* In such situations, compression speed is seriously impacted when CDict tables are
* "cold" (outside CPU cache). This parameter instructs zstd to prefetch CDict tables
* when they are used in-place.
*
* For sufficiently small inputs, the cost of the prefetch will outweigh the benefit.
* For sufficiently large inputs, zstd will by default memcpy() CDict tables
* into the working context, so there is no need to prefetch. This parameter is
* targeted at a middle range of input sizes, where a prefetch is cheap enough to be
* useful but memcpy() is too expensive. The exact range of input sizes where this
* makes sense is best determined by careful experimentation.
*
* Note: for this parameter, ZSTD_ps_auto is currently equivalent to ZSTD_ps_disable,
* but in the future zstd may conditionally enable this feature via an auto-detection
* heuristic for cold CDicts.
* Use ZSTD_ps_disable to opt out of prefetching under any circumstances.
*/
#define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16
/*! ZSTD_CCtx_getParameter() : /*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter, * Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value. * and store it into int* value.

View File

@ -98,6 +98,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer
setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer); setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
setRand(cctx, ZSTD_c_useBlockSplitter, 0, 2, producer); setRand(cctx, ZSTD_c_useBlockSplitter, 0, 2, producer);
setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer); setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer);
setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer);
if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer); setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
} }

View File

@ -2041,6 +2041,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_compressionLevel, l) ); CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_compressionLevel, l) );
CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_enableDedicatedDictSearch, 0) ); CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_enableDedicatedDictSearch, 0) );
CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach) ); CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach) );
CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_prefetchCDictTables, seed % 3) );
wdict_cSize = ZSTD_compress2(ctxOrig, compressedBuffer, compressedBufferSize, contentStart, contentSize); wdict_cSize = ZSTD_compress2(ctxOrig, compressedBuffer, compressedBufferSize, contentStart, contentSize);
if (wdict_cSize > target_wdict_cSize[l]) { if (wdict_cSize > target_wdict_cSize[l]) {
DISPLAYLEVEL(1, "error : compression with dictionary and compress2 at level %i worse than expected (%u > %u) \n", DISPLAYLEVEL(1, "error : compression with dictionary and compress2 at level %i worse than expected (%u > %u) \n",