Merge pull request #3177 from embg/dms_prefetch2
Add prefetchCDictTables CCtxParam (+10-20% cold dict compression speed)dev
commit
e9d6fc867a
|
@ -968,6 +968,7 @@ int main (int argc, const char** argv)
|
||||||
unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
|
unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
|
||||||
ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto;
|
ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto;
|
||||||
ZSTD_dictAttachPref_e dictAttachPref = ZSTD_dictDefaultAttach;
|
ZSTD_dictAttachPref_e dictAttachPref = ZSTD_dictDefaultAttach;
|
||||||
|
ZSTD_paramSwitch_e prefetchCDictTables = ZSTD_ps_auto;
|
||||||
|
|
||||||
for (int argNb = 1; argNb < argc ; argNb++) {
|
for (int argNb = 1; argNb < argc ; argNb++) {
|
||||||
const char* argument = argv[argNb];
|
const char* argument = argv[argNb];
|
||||||
|
@ -986,6 +987,7 @@ int main (int argc, const char** argv)
|
||||||
if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; }
|
if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; }
|
||||||
if (longCommandWArg(&argument, "--dict-content-type=")) { dictContentType = (int)readU32FromChar(&argument); continue; }
|
if (longCommandWArg(&argument, "--dict-content-type=")) { dictContentType = (int)readU32FromChar(&argument); continue; }
|
||||||
if (longCommandWArg(&argument, "--dict-attach-pref=")) { dictAttachPref = (int)readU32FromChar(&argument); continue; }
|
if (longCommandWArg(&argument, "--dict-attach-pref=")) { dictAttachPref = (int)readU32FromChar(&argument); continue; }
|
||||||
|
if (longCommandWArg(&argument, "--prefetch-cdict-tables=")) { prefetchCDictTables = (int)readU32FromChar(&argument); continue; }
|
||||||
if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; }
|
if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; }
|
||||||
/* anything that's not a command is a filename */
|
/* anything that's not a command is a filename */
|
||||||
nameTable[nameIdx++] = argument;
|
nameTable[nameIdx++] = argument;
|
||||||
|
@ -1008,6 +1010,7 @@ int main (int argc, const char** argv)
|
||||||
ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_enableDedicatedDictSearch, dedicatedDictSearch);
|
ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_enableDedicatedDictSearch, dedicatedDictSearch);
|
||||||
ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_nbWorkers, 0);
|
ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_nbWorkers, 0);
|
||||||
ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_forceAttachDict, dictAttachPref);
|
ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_forceAttachDict, dictAttachPref);
|
||||||
|
ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_prefetchCDictTables, prefetchCDictTables);
|
||||||
|
|
||||||
int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dictContentType, cctxParams, exeName);
|
int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dictContentType, cctxParams, exeName);
|
||||||
|
|
||||||
|
|
|
@ -576,6 +576,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
||||||
bounds.upperBound = 1;
|
bounds.upperBound = 1;
|
||||||
return bounds;
|
return bounds;
|
||||||
|
|
||||||
|
case ZSTD_c_prefetchCDictTables:
|
||||||
|
bounds.lowerBound = (int)ZSTD_ps_auto;
|
||||||
|
bounds.upperBound = (int)ZSTD_ps_disable;
|
||||||
|
return bounds;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
bounds.error = ERROR(parameter_unsupported);
|
bounds.error = ERROR(parameter_unsupported);
|
||||||
return bounds;
|
return bounds;
|
||||||
|
@ -640,6 +645,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
|
||||||
case ZSTD_c_useBlockSplitter:
|
case ZSTD_c_useBlockSplitter:
|
||||||
case ZSTD_c_useRowMatchFinder:
|
case ZSTD_c_useRowMatchFinder:
|
||||||
case ZSTD_c_deterministicRefPrefix:
|
case ZSTD_c_deterministicRefPrefix:
|
||||||
|
case ZSTD_c_prefetchCDictTables:
|
||||||
default:
|
default:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -695,6 +701,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
|
||||||
case ZSTD_c_useBlockSplitter:
|
case ZSTD_c_useBlockSplitter:
|
||||||
case ZSTD_c_useRowMatchFinder:
|
case ZSTD_c_useRowMatchFinder:
|
||||||
case ZSTD_c_deterministicRefPrefix:
|
case ZSTD_c_deterministicRefPrefix:
|
||||||
|
case ZSTD_c_prefetchCDictTables:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
||||||
|
@ -921,6 +928,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
||||||
CCtxParams->deterministicRefPrefix = !!value;
|
CCtxParams->deterministicRefPrefix = !!value;
|
||||||
return CCtxParams->deterministicRefPrefix;
|
return CCtxParams->deterministicRefPrefix;
|
||||||
|
|
||||||
|
case ZSTD_c_prefetchCDictTables:
|
||||||
|
BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);
|
||||||
|
CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value;
|
||||||
|
return CCtxParams->prefetchCDictTables;
|
||||||
|
|
||||||
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1053,6 +1065,9 @@ size_t ZSTD_CCtxParams_getParameter(
|
||||||
case ZSTD_c_deterministicRefPrefix:
|
case ZSTD_c_deterministicRefPrefix:
|
||||||
*value = (int)CCtxParams->deterministicRefPrefix;
|
*value = (int)CCtxParams->deterministicRefPrefix;
|
||||||
break;
|
break;
|
||||||
|
case ZSTD_c_prefetchCDictTables:
|
||||||
|
*value = (int)CCtxParams->prefetchCDictTables;
|
||||||
|
break;
|
||||||
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1928,6 +1943,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||||
|
|
||||||
/* init params */
|
/* init params */
|
||||||
zc->blockState.matchState.cParams = params->cParams;
|
zc->blockState.matchState.cParams = params->cParams;
|
||||||
|
zc->blockState.matchState.prefetchCDictTables = params->prefetchCDictTables == ZSTD_ps_enable;
|
||||||
zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
|
zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
|
||||||
zc->consumedSrcSize = 0;
|
zc->consumedSrcSize = 0;
|
||||||
zc->producedCSize = 0;
|
zc->producedCSize = 0;
|
||||||
|
|
|
@ -235,6 +235,11 @@ struct ZSTD_matchState_t {
|
||||||
const ZSTD_matchState_t* dictMatchState;
|
const ZSTD_matchState_t* dictMatchState;
|
||||||
ZSTD_compressionParameters cParams;
|
ZSTD_compressionParameters cParams;
|
||||||
const rawSeqStore_t* ldmSeqStore;
|
const rawSeqStore_t* ldmSeqStore;
|
||||||
|
|
||||||
|
/* Controls prefetching in some dictMatchState matchfinders.
|
||||||
|
* This behavior is controlled from the cctx ms.
|
||||||
|
* This parameter has no effect in the cdict ms. */
|
||||||
|
int prefetchCDictTables;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -331,6 +336,9 @@ struct ZSTD_CCtx_params_s {
|
||||||
|
|
||||||
/* Internal use, for createCCtxParams() and freeCCtxParams() only */
|
/* Internal use, for createCCtxParams() and freeCCtxParams() only */
|
||||||
ZSTD_customMem customMem;
|
ZSTD_customMem customMem;
|
||||||
|
|
||||||
|
/* Controls prefetching in some dictMatchState matchfinders */
|
||||||
|
ZSTD_paramSwitch_e prefetchCDictTables;
|
||||||
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
|
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
|
||||||
|
|
||||||
#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
|
#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
|
||||||
|
|
|
@ -345,6 +345,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
||||||
/* if a dictionary is attached, it must be within window range */
|
/* if a dictionary is attached, it must be within window range */
|
||||||
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
|
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
|
||||||
|
|
||||||
|
if (ms->prefetchCDictTables) {
|
||||||
|
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
|
||||||
|
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
|
||||||
|
PREFETCH_AREA(dictHashLong, hashTableBytes)
|
||||||
|
PREFETCH_AREA(dictHashSmall, chainTableBytes)
|
||||||
|
}
|
||||||
|
|
||||||
/* init */
|
/* init */
|
||||||
ip += (dictAndPrefixLength == 0);
|
ip += (dictAndPrefixLength == 0);
|
||||||
|
|
||||||
|
|
|
@ -500,6 +500,11 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||||
* when translating a dict index into a local index */
|
* when translating a dict index into a local index */
|
||||||
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
|
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
|
||||||
|
|
||||||
|
if (ms->prefetchCDictTables) {
|
||||||
|
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
|
||||||
|
PREFETCH_AREA(dictHashTable, hashTableBytes)
|
||||||
|
}
|
||||||
|
|
||||||
/* init */
|
/* init */
|
||||||
DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
|
DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
|
||||||
ip0 += (dictAndPrefixLength == 0);
|
ip0 += (dictAndPrefixLength == 0);
|
||||||
|
|
27
lib/zstd.h
27
lib/zstd.h
|
@ -421,6 +421,7 @@ typedef enum {
|
||||||
* ZSTD_c_validateSequences
|
* ZSTD_c_validateSequences
|
||||||
* ZSTD_c_useBlockSplitter
|
* ZSTD_c_useBlockSplitter
|
||||||
* ZSTD_c_useRowMatchFinder
|
* ZSTD_c_useRowMatchFinder
|
||||||
|
* ZSTD_c_prefetchCDictTables
|
||||||
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
|
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
|
||||||
* note : never ever use experimentalParam? names directly;
|
* note : never ever use experimentalParam? names directly;
|
||||||
* also, the enums values themselves are unstable and can still change.
|
* also, the enums values themselves are unstable and can still change.
|
||||||
|
@ -439,7 +440,8 @@ typedef enum {
|
||||||
ZSTD_c_experimentalParam12=1009,
|
ZSTD_c_experimentalParam12=1009,
|
||||||
ZSTD_c_experimentalParam13=1010,
|
ZSTD_c_experimentalParam13=1010,
|
||||||
ZSTD_c_experimentalParam14=1011,
|
ZSTD_c_experimentalParam14=1011,
|
||||||
ZSTD_c_experimentalParam15=1012
|
ZSTD_c_experimentalParam15=1012,
|
||||||
|
ZSTD_c_experimentalParam16=1013
|
||||||
} ZSTD_cParameter;
|
} ZSTD_cParameter;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -1954,6 +1956,29 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
|
||||||
*/
|
*/
|
||||||
#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
|
#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
|
||||||
|
|
||||||
|
/* ZSTD_c_prefetchCDictTables
|
||||||
|
* Controlled with ZSTD_paramSwitch_e enum. Default is ZSTD_ps_auto.
|
||||||
|
*
|
||||||
|
* In some situations, zstd uses CDict tables in-place rather than copying them
|
||||||
|
* into the working context. (See docs on ZSTD_dictAttachPref_e above for details).
|
||||||
|
* In such situations, compression speed is seriously impacted when CDict tables are
|
||||||
|
* "cold" (outside CPU cache). This parameter instructs zstd to prefetch CDict tables
|
||||||
|
* when they are used in-place.
|
||||||
|
*
|
||||||
|
* For sufficiently small inputs, the cost of the prefetch will outweigh the benefit.
|
||||||
|
* For sufficiently large inputs, zstd will by default memcpy() CDict tables
|
||||||
|
* into the working context, so there is no need to prefetch. This parameter is
|
||||||
|
* targeted at a middle range of input sizes, where a prefetch is cheap enough to be
|
||||||
|
* useful but memcpy() is too expensive. The exact range of input sizes where this
|
||||||
|
* makes sense is best determined by careful experimentation.
|
||||||
|
*
|
||||||
|
* Note: for this parameter, ZSTD_ps_auto is currently equivalent to ZSTD_ps_disable,
|
||||||
|
* but in the future zstd may conditionally enable this feature via an auto-detection
|
||||||
|
* heuristic for cold CDicts.
|
||||||
|
* Use ZSTD_ps_disable to opt out of prefetching under any circumstances.
|
||||||
|
*/
|
||||||
|
#define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16
|
||||||
|
|
||||||
/*! ZSTD_CCtx_getParameter() :
|
/*! ZSTD_CCtx_getParameter() :
|
||||||
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
|
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
|
||||||
* and store it into int* value.
|
* and store it into int* value.
|
||||||
|
|
|
@ -98,6 +98,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer
|
||||||
setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
|
setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
|
||||||
setRand(cctx, ZSTD_c_useBlockSplitter, 0, 2, producer);
|
setRand(cctx, ZSTD_c_useBlockSplitter, 0, 2, producer);
|
||||||
setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer);
|
setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer);
|
||||||
|
setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer);
|
||||||
if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
|
if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
|
||||||
setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
|
setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2041,6 +2041,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||||
CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_compressionLevel, l) );
|
CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_compressionLevel, l) );
|
||||||
CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_enableDedicatedDictSearch, 0) );
|
CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_enableDedicatedDictSearch, 0) );
|
||||||
CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach) );
|
CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach) );
|
||||||
|
CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_prefetchCDictTables, seed % 3) );
|
||||||
wdict_cSize = ZSTD_compress2(ctxOrig, compressedBuffer, compressedBufferSize, contentStart, contentSize);
|
wdict_cSize = ZSTD_compress2(ctxOrig, compressedBuffer, compressedBufferSize, contentStart, contentSize);
|
||||||
if (wdict_cSize > target_wdict_cSize[l]) {
|
if (wdict_cSize > target_wdict_cSize[l]) {
|
||||||
DISPLAYLEVEL(1, "error : compression with dictionary and compress2 at level %i worse than expected (%u > %u) \n",
|
DISPLAYLEVEL(1, "error : compression with dictionary and compress2 at level %i worse than expected (%u > %u) \n",
|
||||||
|
|
Loading…
Reference in New Issue