Add long distance matching as a CCtxParam
parent
6a546efb8c
commit
8081becadc
|
@ -281,11 +281,10 @@ typedef struct {
|
|||
|
||||
typedef struct {
|
||||
ldmEntry_t* hashTable;
|
||||
BYTE* bucketOffsets;
|
||||
U32 ldmEnable; /* 1 if enable long distance matching */
|
||||
BYTE* bucketOffsets; /* next position in bucket to insert entry */
|
||||
U32 hashLog; /* log size of hashTable */
|
||||
U32 bucketLog; /* log number of buckets, at most 4 */
|
||||
U32 hashEveryLog;
|
||||
U32 hashEveryLog; /* log number of entries to skip */
|
||||
} ldmState_t;
|
||||
|
||||
typedef struct {
|
||||
|
@ -313,6 +312,8 @@ struct ZSTD_CCtx_params_s {
|
|||
unsigned jobSize;
|
||||
unsigned overlapSizeLog;
|
||||
|
||||
U32 enableLdm; /* 1 if enable long distance matching */
|
||||
|
||||
/* For use with createCCtxParams() and freeCCtxParams() only */
|
||||
ZSTD_customMem customMem;
|
||||
|
||||
|
|
|
@ -53,6 +53,7 @@ size_t ZSTD_compressBound(size_t srcSize) {
|
|||
return srcSize + (srcSize >> 8) + margin;
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Sequence storage
|
||||
***************************************/
|
||||
|
@ -362,14 +363,11 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v
|
|||
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
|
||||
|
||||
case ZSTD_p_longDistanceMatching:
|
||||
/* TODO */
|
||||
if (cctx->cdict) return ERROR(stage_wrong);
|
||||
cctx->ldmState.ldmEnable = value>0;
|
||||
if (value != 0) {
|
||||
ZSTD_cLevelToCParams(cctx);
|
||||
cctx->requestedParams.cParams.windowLog = LDM_WINDOW_LOG;
|
||||
}
|
||||
return 0;
|
||||
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
|
||||
|
||||
default: return ERROR(parameter_unsupported);
|
||||
}
|
||||
|
@ -471,8 +469,12 @@ size_t ZSTD_CCtxParam_setParameter(
|
|||
return ZSTDMT_CCtxParam_setMTCtxParameter(params, ZSTDMT_p_overlapSectionLog, value);
|
||||
|
||||
case ZSTD_p_longDistanceMatching :
|
||||
/* TODO */
|
||||
return ERROR(parameter_unsupported);
|
||||
params->enableLdm = value>0;
|
||||
if (value != 0) {
|
||||
ZSTD_cLevelToCCtxParams(params);
|
||||
params->cParams.windowLog = LDM_WINDOW_LOG;
|
||||
}
|
||||
return 0;
|
||||
|
||||
default: return ERROR(parameter_unsupported);
|
||||
}
|
||||
|
@ -509,6 +511,9 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
|
|||
cctx, ZSTD_p_overlapSizeLog, params->overlapSizeLog) );
|
||||
}
|
||||
|
||||
/* Copy long distance matching parameter */
|
||||
cctx->requestedParams.enableLdm = params->enableLdm;
|
||||
|
||||
/* customMem is used only for create/free params and can be ignored */
|
||||
return 0;
|
||||
}
|
||||
|
@ -675,6 +680,16 @@ ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, u
|
|||
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
|
||||
}
|
||||
|
||||
/* Estimate the space needed for long distance matching tables. */
|
||||
static size_t ZSTD_ldm_getTableSize(U32 ldmHashLog, U32 bucketLog) {
|
||||
size_t const ldmHSize = ((size_t)1) << ldmHashLog;
|
||||
size_t const ldmBucketLog =
|
||||
MIN(bucketLog, LDM_BUCKET_SIZE_LOG_MAX);
|
||||
size_t const ldmBucketSize =
|
||||
((size_t)1) << (ldmHashLog - ldmBucketLog);
|
||||
return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t)));
|
||||
}
|
||||
|
||||
size_t ZSTD_estimateCCtxSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* params)
|
||||
{
|
||||
/* Estimate CCtx size is supported for single-threaded compression only. */
|
||||
|
@ -699,8 +714,10 @@ size_t ZSTD_estimateCCtxSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* pa
|
|||
+ (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
|
||||
size_t const optSpace = ((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btultra)) ? optBudget : 0;
|
||||
|
||||
/* TODO: Long distance matching is not suported */
|
||||
size_t const ldmSpace = 0;
|
||||
/* Ldm parameters can not currently be changed */
|
||||
size_t const ldmSpace = params->enableLdm ?
|
||||
ZSTD_ldm_getTableSize(LDM_HASH_LOG, LDM_BUCKET_SIZE_LOG) : 0;
|
||||
|
||||
size_t const neededSpace = entropySpace + tableSpace + tokenSpace +
|
||||
optSpace + ldmSpace;
|
||||
|
||||
|
@ -762,7 +779,8 @@ static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1,
|
|||
static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1,
|
||||
ZSTD_CCtx_params params2)
|
||||
{
|
||||
return ZSTD_equivalentCParams(params1.cParams, params2.cParams);
|
||||
return ZSTD_equivalentCParams(params1.cParams, params2.cParams) &&
|
||||
params1.enableLdm == params2.enableLdm;
|
||||
}
|
||||
|
||||
/*! ZSTD_continueCCtx() :
|
||||
|
@ -803,9 +821,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|||
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
|
||||
|
||||
if (crp == ZSTDcrp_continue) {
|
||||
/* TODO: For now, reset if long distance matching is enabled */
|
||||
if (ZSTD_equivalentParams(params, zc->appliedParams) &&
|
||||
!zc->ldmState.ldmEnable) {
|
||||
if (ZSTD_equivalentParams(params, zc->appliedParams)) {
|
||||
DEBUGLOG(5, "ZSTD_equivalentParams()==1");
|
||||
zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
|
||||
zc->entropy->offcode_repeatMode = FSE_repeat_none;
|
||||
|
@ -838,13 +854,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|||
size_t const buffInSize = (zbuff==ZSTDb_buffered) ? ((size_t)1 << params.cParams.windowLog) + blockSize : 0;
|
||||
void* ptr;
|
||||
|
||||
size_t const ldmHSize = ((size_t)1) << zc->ldmState.hashLog;
|
||||
size_t const ldmBucketSize =
|
||||
((size_t)1) << (zc->ldmState.hashLog - zc->ldmState.bucketLog);
|
||||
size_t const ldmPotentialSpace =
|
||||
ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t)));
|
||||
size_t const ldmSpace = zc->ldmState.ldmEnable ?
|
||||
ldmPotentialSpace : 0;
|
||||
size_t const ldmSpace = params.enableLdm ? ZSTD_ldm_getTableSize(zc->ldmState.hashLog, zc->ldmState.bucketLog) : 0;
|
||||
|
||||
/* Check if workSpace is large enough, alloc a new one if needed */
|
||||
{ size_t const entropySpace = sizeof(ZSTD_entropyCTables_t);
|
||||
|
@ -923,8 +933,11 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|||
}
|
||||
|
||||
/* ldm space */
|
||||
if (zc->ldmState.ldmEnable) {
|
||||
if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, ldmSpace);
|
||||
if (params.enableLdm) {
|
||||
size_t const ldmHSize = ((size_t)1) << zc->ldmState.hashLog;
|
||||
size_t const ldmBucketSize =
|
||||
((size_t)1) << (zc->ldmState.hashLog - zc->ldmState.bucketLog);
|
||||
memset(ptr, 0, ldmSpace);
|
||||
assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
|
||||
zc->ldmState.hashTable = (ldmEntry_t*)ptr;
|
||||
ptr = zc->ldmState.hashTable + ldmHSize;
|
||||
|
@ -1047,7 +1060,7 @@ static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reduce
|
|||
/*! ZSTD_ldm_reduceTable() :
|
||||
* reduce table indexes by `reducerValue` */
|
||||
static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
|
||||
U32 const reducerValue)
|
||||
U32 const reducerValue)
|
||||
{
|
||||
U32 u;
|
||||
for (u = 0; u < size; u++) {
|
||||
|
@ -1069,8 +1082,8 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
|
|||
{ U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
|
||||
ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); }
|
||||
|
||||
{ if (zc->ldmState.ldmEnable) {
|
||||
U32 const ldmHSize = 1 << LDM_HASH_LOG;
|
||||
{ if (zc->appliedParams.enableLdm) {
|
||||
U32 const ldmHSize = 1 << zc->ldmState.hashLog;
|
||||
ZSTD_ldm_reduceTable(zc->ldmState.hashTable, ldmHSize, reducerValue);
|
||||
}
|
||||
}
|
||||
|
@ -1683,6 +1696,7 @@ static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Fast Scan
|
||||
***************************************/
|
||||
|
@ -1751,6 +1765,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
|
|||
while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
|
||||
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
}
|
||||
|
||||
|
@ -1983,7 +1998,6 @@ size_t ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
|
|||
ip++;
|
||||
ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||
} else {
|
||||
|
||||
U32 offset;
|
||||
if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) {
|
||||
mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
|
||||
|
@ -3405,7 +3419,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
|
|||
|
||||
/* Check immediate repcode */
|
||||
while ( (ip < ilimit)
|
||||
&& ( (repToConfirm[1] > 0)
|
||||
&& ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest))
|
||||
&& (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) {
|
||||
|
||||
size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1],
|
||||
|
@ -3413,7 +3427,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
|
|||
/* Swap repToConfirm[1] <=> repToConfirm[0] */
|
||||
{
|
||||
U32 const tmpOff = repToConfirm[1];
|
||||
repToConfirm[1] = repToConfirm[0];
|
||||
repToConfirm[1] = repToConfirm[0];
|
||||
repToConfirm[0] = tmpOff;
|
||||
}
|
||||
|
||||
|
@ -3571,6 +3585,8 @@ static size_t ZSTD_compressBlock_ldm_extDict_generic(
|
|||
|
||||
/* Call the block compressor on the remaining literals */
|
||||
{
|
||||
/* ip = current - backwardMatchLength
|
||||
* The match is at (bestEntry->offset - backwardMatchLength) */
|
||||
U32 const matchIndex = bestEntry->offset;
|
||||
U32 const offset = current - matchIndex;
|
||||
|
||||
|
@ -3687,7 +3703,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCa
|
|||
size_t lastLLSize;
|
||||
const BYTE* anchor;
|
||||
const ZSTD_blockCompressor blockCompressor =
|
||||
zc->ldmState.ldmEnable ?
|
||||
zc->appliedParams.enableLdm?
|
||||
(zc->lowLimit < zc->dictLimit ? ZSTD_compressBlock_ldm_extDict :
|
||||
ZSTD_compressBlock_ldm) :
|
||||
ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
|
||||
|
@ -4870,7 +4886,6 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
|
|||
cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbThreads, cctx->customMem);
|
||||
if (cctx->mtctx == NULL) return ERROR(memory_allocation);
|
||||
}
|
||||
|
||||
DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbThreads=%u", params.nbThreads);
|
||||
CHECK_F( ZSTDMT_initCStream_internal(
|
||||
cctx->mtctx,
|
||||
|
|
|
@ -196,6 +196,8 @@ static ZSTD_CCtx_params ZSTDMT_makeJobCCtxParams(ZSTD_CCtx_params const params)
|
|||
jobParams.cParams = params.cParams;
|
||||
jobParams.fParams = params.fParams;
|
||||
jobParams.compressionLevel = params.compressionLevel;
|
||||
|
||||
jobParams.enableLdm = params.enableLdm;
|
||||
return jobParams;
|
||||
}
|
||||
|
||||
|
|
|
@ -978,9 +978,11 @@ typedef enum {
|
|||
/* advanced parameters - may not remain available after API update */
|
||||
ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
|
||||
* even when referencing into Dictionary content (default:0) */
|
||||
ZSTD_p_longDistanceMatching, /* Enable long distance matching.
|
||||
* This increases the memory usage as well as the
|
||||
* window size. */
|
||||
ZSTD_p_longDistanceMatching, /* Enable long distance matching. This
|
||||
* increases the memory usage as well as the
|
||||
* window size. Note: this should be set after
|
||||
* ZSTD_p_compressionLevel and before
|
||||
* ZSTD_p_windowLog. */
|
||||
} ZSTD_cParameter;
|
||||
|
||||
|
||||
|
|
|
@ -269,12 +269,12 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
|||
#ifdef ZSTD_NEWAPI
|
||||
ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbThreads, g_nbThreads);
|
||||
ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel);
|
||||
ZSTD_CCtx_setParameter(ctx, ZSTD_p_longDistanceMatching, g_ldmFlag);
|
||||
ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog);
|
||||
ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog);
|
||||
ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog);
|
||||
ZSTD_CCtx_setParameter(ctx, ZSTD_p_minMatch, comprParams->searchLength);
|
||||
ZSTD_CCtx_setParameter(ctx, ZSTD_p_targetLength, comprParams->targetLength);
|
||||
ZSTD_CCtx_setParameter(ctx, ZSTD_p_longDistanceMatching, g_ldmFlag);
|
||||
ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionStrategy, comprParams->strategy);
|
||||
ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize);
|
||||
#else
|
||||
|
|
|
@ -402,8 +402,11 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
|
|||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_dictIDFlag, g_dictIDFlag) );
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_checksumFlag, g_checksumFlag) );
|
||||
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) );
|
||||
/* compression parameters */
|
||||
/* compression level */
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, cLevel) );
|
||||
/* long distance matching */
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_longDistanceMatching, g_ldmFlag) );
|
||||
/* compression parameters */
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_windowLog, comprParams->windowLog) );
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_chainLog, comprParams->chainLog) );
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_hashLog, comprParams->hashLog) );
|
||||
|
@ -411,8 +414,6 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
|
|||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_minMatch, comprParams->searchLength) );
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_targetLength, comprParams->targetLength) );
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionStrategy, (U32)comprParams->strategy) );
|
||||
/* long distance matching */
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_longDistanceMatching, g_ldmFlag) );
|
||||
/* multi-threading */
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_nbThreads, g_nbThreads) );
|
||||
/* dictionary */
|
||||
|
|
|
@ -440,6 +440,8 @@ static int basicUnitTests(U32 seed, double compressibility)
|
|||
free(staticDCtxBuffer);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* ZSTDMT simple MT compression test */
|
||||
DISPLAYLEVEL(4, "test%3i : create ZSTDMT CCtx : ", testNb++);
|
||||
{ ZSTDMT_CCtx* mtctx = ZSTDMT_createCCtx(2);
|
||||
|
@ -1340,7 +1342,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
|
|||
dictSize = FUZ_rLogLength(&lseed, dictLog); /* needed also for decompression */
|
||||
dict = srcBuffer + (FUZ_rand(&lseed) % (srcBufferSize - dictSize));
|
||||
|
||||
|
||||
CHECK_Z ( ZSTD_CCtx_setParameter(refCtx, ZSTD_p_longDistanceMatching, FUZ_rand(&lseed)&255) );
|
||||
if (FUZ_rand(&lseed) & 0xF) {
|
||||
CHECK_Z ( ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel) );
|
||||
} else {
|
||||
|
@ -1349,7 +1351,6 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
|
|||
!(FUZ_rand(&lseed)&3) /* contentChecksumFlag*/,
|
||||
0 /*NodictID*/ }; /* note : since dictionary is fake, dictIDflag has no impact */
|
||||
ZSTD_parameters const p = FUZ_makeParams(cPar, fPar);
|
||||
|
||||
CHECK_Z ( ZSTD_compressBegin_advanced(refCtx, dict, dictSize, p, 0) );
|
||||
}
|
||||
CHECK_Z( ZSTD_copyCCtx(ctx, refCtx, 0) );
|
||||
|
|
|
@ -1380,7 +1380,7 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double
|
|||
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_minMatch, cParams.searchLength, useOpaqueAPI) );
|
||||
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_targetLength, cParams.targetLength, useOpaqueAPI) );
|
||||
|
||||
if (FUZ_rand(&lseed) & 1) CHECK_Z( ZSTD_CCtx_setParameter(zc, ZSTD_p_longDistanceMatching, FUZ_rand(&lseed) & 63) );
|
||||
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_p_longDistanceMatching, FUZ_rand(&lseed) & 63, useOpaqueAPI) );
|
||||
|
||||
/* unconditionally set, to be sync with decoder */
|
||||
/* mess with frame parameters */
|
||||
|
|
Loading…
Reference in New Issue