introduced strategy btultra2

note : not yet applied on any compression level
dev
Yann Collet 2018-12-06 13:38:09 -08:00
parent 0c404a48f0
commit e9448cdf4c
3 changed files with 109 additions and 61 deletions

View File

@ -269,7 +269,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
case ZSTD_c_compressionStrategy: case ZSTD_c_compressionStrategy:
bounds.lowerBound = (int)ZSTD_fast; bounds.lowerBound = (int)ZSTD_fast;
bounds.upperBound = (int)ZSTD_btultra; /* note : how to ensure at compile time that this is the highest value strategy ? */ bounds.upperBound = (int)ZSTD_btultra2; /* note : how to ensure at compile time that this is the highest value strategy ? */
return bounds; return bounds;
case ZSTD_c_contentSizeFlag: case ZSTD_c_contentSizeFlag:
@ -364,9 +364,21 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
} }
} }
#define CLAMPCHECK(val,min,max) { \ /* ZSTD_cParam_withinBounds:
if (((val)<(min)) | ((val)>(max))) { \ * @return 1 if value is within cParam bounds,
return ERROR(parameter_outOfBound); \ * 0 otherwise */
static int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
{
ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
if (ZSTD_isError(bounds.error)) return 0;
if (value < bounds.lowerBound) return 0;
if (value > bounds.upperBound) return 0;
return 1;
}
#define CLAMPCHECK(cParam, val) { \
if (!ZSTD_cParam_withinBounds(cParam,val)) { \
return ERROR(parameter_outOfBound); \
} } } }
@ -493,31 +505,31 @@ size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* CCtxParams,
case ZSTD_c_windowLog : case ZSTD_c_windowLog :
if (value!=0) /* 0 => use default */ if (value!=0) /* 0 => use default */
CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); CLAMPCHECK(ZSTD_c_windowLog, value);
CCtxParams->cParams.windowLog = value; CCtxParams->cParams.windowLog = value;
return CCtxParams->cParams.windowLog; return CCtxParams->cParams.windowLog;
case ZSTD_c_hashLog : case ZSTD_c_hashLog :
if (value!=0) /* 0 => use default */ if (value!=0) /* 0 => use default */
CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); CLAMPCHECK(ZSTD_c_hashLog, value);
CCtxParams->cParams.hashLog = value; CCtxParams->cParams.hashLog = value;
return CCtxParams->cParams.hashLog; return CCtxParams->cParams.hashLog;
case ZSTD_c_chainLog : case ZSTD_c_chainLog :
if (value!=0) /* 0 => use default */ if (value!=0) /* 0 => use default */
CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); CLAMPCHECK(ZSTD_c_chainLog, value);
CCtxParams->cParams.chainLog = value; CCtxParams->cParams.chainLog = value;
return CCtxParams->cParams.chainLog; return CCtxParams->cParams.chainLog;
case ZSTD_c_searchLog : case ZSTD_c_searchLog :
if (value!=0) /* 0 => use default */ if (value!=0) /* 0 => use default */
CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); CLAMPCHECK(ZSTD_c_searchLog, value);
CCtxParams->cParams.searchLog = value; CCtxParams->cParams.searchLog = value;
return value; return value;
case ZSTD_c_minMatch : case ZSTD_c_minMatch :
if (value!=0) /* 0 => use default */ if (value!=0) /* 0 => use default */
CLAMPCHECK(value, ZSTD_MINMATCH_MIN, ZSTD_MINMATCH_MAX); CLAMPCHECK(ZSTD_c_minMatch, value);
CCtxParams->cParams.minMatch = value; CCtxParams->cParams.minMatch = value;
return CCtxParams->cParams.minMatch; return CCtxParams->cParams.minMatch;
@ -528,7 +540,7 @@ size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* CCtxParams,
case ZSTD_c_compressionStrategy : case ZSTD_c_compressionStrategy :
if (value!=0) /* 0 => use default */ if (value!=0) /* 0 => use default */
CLAMPCHECK(value, (int)ZSTD_fast, (int)ZSTD_btultra); CLAMPCHECK(ZSTD_c_compressionStrategy, value);
CCtxParams->cParams.strategy = (ZSTD_strategy)value; CCtxParams->cParams.strategy = (ZSTD_strategy)value;
return (size_t)CCtxParams->cParams.strategy; return (size_t)CCtxParams->cParams.strategy;
@ -554,7 +566,7 @@ size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* CCtxParams,
case ZSTD_c_forceAttachDict : { case ZSTD_c_forceAttachDict : {
const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
CLAMPCHECK(pref, ZSTD_dictDefaultAttach, ZSTD_dictForceCopy); CLAMPCHECK(ZSTD_c_forceAttachDict, pref);
CCtxParams->attachDictPref = pref; CCtxParams->attachDictPref = pref;
return CCtxParams->attachDictPref; return CCtxParams->attachDictPref;
} }
@ -594,19 +606,19 @@ size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* CCtxParams,
case ZSTD_c_ldmHashLog : case ZSTD_c_ldmHashLog :
if (value!=0) /* 0 ==> auto */ if (value!=0) /* 0 ==> auto */
CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); CLAMPCHECK(ZSTD_c_ldmHashLog, value);
CCtxParams->ldmParams.hashLog = value; CCtxParams->ldmParams.hashLog = value;
return CCtxParams->ldmParams.hashLog; return CCtxParams->ldmParams.hashLog;
case ZSTD_c_ldmMinMatch : case ZSTD_c_ldmMinMatch :
if (value!=0) /* 0 ==> default */ if (value!=0) /* 0 ==> default */
CLAMPCHECK(value, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX); CLAMPCHECK(ZSTD_c_ldmMinMatch, value);
CCtxParams->ldmParams.minMatchLength = value; CCtxParams->ldmParams.minMatchLength = value;
return CCtxParams->ldmParams.minMatchLength; return CCtxParams->ldmParams.minMatchLength;
case ZSTD_c_ldmBucketSizeLog : case ZSTD_c_ldmBucketSizeLog :
if (value!=0) /* 0 ==> default */ if (value!=0) /* 0 ==> default */
CLAMPCHECK(value, ZSTD_LDM_BUCKETSIZELOG_MIN, ZSTD_LDM_BUCKETSIZELOG_MAX); CLAMPCHECK(ZSTD_c_ldmBucketSizeLog, value);
CCtxParams->ldmParams.bucketSizeLog = value; CCtxParams->ldmParams.bucketSizeLog = value;
return CCtxParams->ldmParams.bucketSizeLog; return CCtxParams->ldmParams.bucketSizeLog;
@ -832,16 +844,15 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
@return : 0, or an error code if one value is beyond authorized range */ @return : 0, or an error code if one value is beyond authorized range */
size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
{ {
CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); CLAMPCHECK(ZSTD_c_windowLog, cParams.windowLog);
CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); CLAMPCHECK(ZSTD_c_chainLog, cParams.chainLog);
CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); CLAMPCHECK(ZSTD_c_hashLog, cParams.hashLog);
CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); CLAMPCHECK(ZSTD_c_searchLog, cParams.searchLog);
CLAMPCHECK(cParams.minMatch, ZSTD_MINMATCH_MIN, ZSTD_MINMATCH_MAX); CLAMPCHECK(ZSTD_c_minMatch, cParams.minMatch);
ZSTD_STATIC_ASSERT(ZSTD_TARGETLENGTH_MIN == 0); ZSTD_STATIC_ASSERT(ZSTD_TARGETLENGTH_MIN == 0);
if (cParams.targetLength > ZSTD_TARGETLENGTH_MAX) if (cParams.targetLength > ZSTD_TARGETLENGTH_MAX)
return ERROR(parameter_outOfBound); return ERROR(parameter_outOfBound);
if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) CLAMPCHECK(ZSTD_c_compressionStrategy, cParams.strategy);
return ERROR(parameter_unsupported);
return 0; return 0;
} }
@ -851,19 +862,20 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
static ZSTD_compressionParameters static ZSTD_compressionParameters
ZSTD_clampCParams(ZSTD_compressionParameters cParams) ZSTD_clampCParams(ZSTD_compressionParameters cParams)
{ {
# define CLAMP(val,min,max) { \ # define CLAMP(cParam, val) { \
if (val<min) val=min; \ ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \
else if (val>max) val=max; \ if (val<bounds.lowerBound) val=bounds.lowerBound; \
else if (val>bounds.upperBound) val=bounds.upperBound; \
} }
CLAMP(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); CLAMP(ZSTD_c_windowLog, cParams.windowLog);
CLAMP(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); CLAMP(ZSTD_c_chainLog, cParams.chainLog);
CLAMP(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); CLAMP(ZSTD_c_hashLog, cParams.hashLog);
CLAMP(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); CLAMP(ZSTD_c_searchLog, cParams.searchLog);
CLAMP(cParams.minMatch, ZSTD_MINMATCH_MIN, ZSTD_MINMATCH_MAX); CLAMP(ZSTD_c_minMatch, cParams.minMatch);
ZSTD_STATIC_ASSERT(ZSTD_TARGETLENGTH_MIN == 0); ZSTD_STATIC_ASSERT(ZSTD_TARGETLENGTH_MIN == 0);
if (cParams.targetLength > ZSTD_TARGETLENGTH_MAX) if (cParams.targetLength > ZSTD_TARGETLENGTH_MAX)
cParams.targetLength = ZSTD_TARGETLENGTH_MAX; cParams.targetLength = ZSTD_TARGETLENGTH_MAX;
CLAMP(cParams.strategy, ZSTD_fast, ZSTD_btultra); CLAMP(ZSTD_c_compressionStrategy, cParams.strategy);
return cParams; return cParams;
} }
@ -951,8 +963,7 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32) size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
+ (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t)); + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
size_t const optSpace = (forCCtx && ((cParams->strategy == ZSTD_btopt) || size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
(cParams->strategy == ZSTD_btultra)))
? optPotentialSpace ? optPotentialSpace
: 0; : 0;
DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
@ -1253,7 +1264,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
ZSTD_invalidateMatchState(ms); ZSTD_invalidateMatchState(ms);
/* opt parser space */ /* opt parser space */
if (forCCtx && ((cParams->strategy == ZSTD_btopt) | (cParams->strategy == ZSTD_btultra))) { if (forCCtx && (cParams->strategy >= ZSTD_btopt)) {
DEBUGLOG(4, "reserving optimal parser space"); DEBUGLOG(4, "reserving optimal parser space");
ms->opt.litFreq = (U32*)ptr; ms->opt.litFreq = (U32*)ptr;
ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits); ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits);
@ -1465,16 +1476,17 @@ void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
* dictionary tables into the working context is faster than using them * dictionary tables into the working context is faster than using them
* in-place. * in-place.
*/ */
static const size_t attachDictSizeCutoffs[(unsigned)ZSTD_btultra+1] = { static const size_t attachDictSizeCutoffs[(unsigned)ZSTD_btultra2+1] = {
8 KB, /* unused */ 8 KB, /* unused */
8 KB, /* ZSTD_fast */ 8 KB, /* ZSTD_fast */
16 KB, /* ZSTD_dfast */ 16 KB, /* ZSTD_dfast */
32 KB, /* ZSTD_greedy */ 32 KB, /* ZSTD_greedy */
32 KB, /* ZSTD_lazy */ 32 KB, /* ZSTD_lazy */
32 KB, /* ZSTD_lazy2 */ 32 KB, /* ZSTD_lazy2 */
32 KB, /* ZSTD_btlazy2 */ 32 KB, /* ZSTD_btlazy2 */
32 KB, /* ZSTD_btopt */ 32 KB, /* ZSTD_btopt */
8 KB /* ZSTD_btultra */ 8 KB, /* ZSTD_btultra */
8 KB /* ZSTD_btultra2 */
}; };
static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
@ -1829,7 +1841,9 @@ static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, cons
* note : use same formula for both situations */ * note : use same formula for both situations */
static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
{ {
U32 const minlog = (strat==ZSTD_btultra) ? 7 : 6; U32 const minlog = (U32)strat - 1;
ZSTD_STATIC_ASSERT(ZSTD_btopt == 7);
assert(strat >= ZSTD_btopt);
return (srcSize >> minlog) + 2; return (srcSize >> minlog) + 2;
} }
@ -2536,7 +2550,7 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr,
* assumption : strat is a valid strategy */ * assumption : strat is a valid strategy */
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
{ {
static const ZSTD_blockCompressor blockCompressor[3][(unsigned)ZSTD_btultra+1] = { static const ZSTD_blockCompressor blockCompressor[3][(unsigned)ZSTD_btultra2+1] = {
{ ZSTD_compressBlock_fast /* default for 0 */, { ZSTD_compressBlock_fast /* default for 0 */,
ZSTD_compressBlock_fast, ZSTD_compressBlock_fast,
ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_doubleFast,
@ -2545,6 +2559,7 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo
ZSTD_compressBlock_lazy2, ZSTD_compressBlock_lazy2,
ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btlazy2,
ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt,
ZSTD_compressBlock_btultra,
ZSTD_compressBlock_btultra }, ZSTD_compressBlock_btultra },
{ ZSTD_compressBlock_fast_extDict /* default for 0 */, { ZSTD_compressBlock_fast_extDict /* default for 0 */,
ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_fast_extDict,
@ -2554,6 +2569,7 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo
ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_lazy2_extDict,
ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btlazy2_extDict,
ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt_extDict,
ZSTD_compressBlock_btultra_extDict,
ZSTD_compressBlock_btultra_extDict }, ZSTD_compressBlock_btultra_extDict },
{ ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,
ZSTD_compressBlock_fast_dictMatchState, ZSTD_compressBlock_fast_dictMatchState,
@ -2563,14 +2579,14 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo
ZSTD_compressBlock_lazy2_dictMatchState, ZSTD_compressBlock_lazy2_dictMatchState,
ZSTD_compressBlock_btlazy2_dictMatchState, ZSTD_compressBlock_btlazy2_dictMatchState,
ZSTD_compressBlock_btopt_dictMatchState, ZSTD_compressBlock_btopt_dictMatchState,
ZSTD_compressBlock_btultra_dictMatchState,
ZSTD_compressBlock_btultra_dictMatchState } ZSTD_compressBlock_btultra_dictMatchState }
}; };
ZSTD_blockCompressor selectedCompressor; ZSTD_blockCompressor selectedCompressor;
ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
assert((U32)strat >= (U32)ZSTD_fast); assert(ZSTD_cParam_withinBounds(ZSTD_c_compressionStrategy, strat));
assert((U32)strat <= (U32)ZSTD_btultra); selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
selectedCompressor = blockCompressor[(int)dictMode][(U32)strat];
assert(selectedCompressor != NULL); assert(selectedCompressor != NULL);
return selectedCompressor; return selectedCompressor;
} }
@ -2967,6 +2983,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
case ZSTD_btopt: case ZSTD_btopt:
case ZSTD_btultra: case ZSTD_btultra:
case ZSTD_btultra2:
if (srcSize >= HASH_READ_SIZE) if (srcSize >= HASH_READ_SIZE)
ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
break; break;

View File

@ -1071,14 +1071,54 @@ MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1); optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1);
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1); optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1);
} }
/* ZSTD_initStats_ultra():
* make a first compression pass, just to seed stats with more accurate starting values.
* only works on first block, with no dictionary and no ldm.
* this function must not fail, hence its usage conditions must be respected.
*/
static void ZSTD_initStats_ultra(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
{
U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
DEBUGLOG(5, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
assert(ms->opt.litLengthSum == 0); /* first block */
assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
assert(ms->nextToUpdate >= ms->window.dictLimit
&& ms->nextToUpdate <= ms->window.dictLimit + 1);
memcpy(tmpRep, rep, sizeof(tmpRep));
ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
/* invalidate first scan from history */
ZSTD_resetSeqStore(seqStore);
ms->window.base -= srcSize;
ms->window.dictLimit += (U32)srcSize;
ms->window.lowLimit = ms->window.dictLimit;
ms->nextToUpdate = ms->window.dictLimit;
ms->nextToUpdate3 = ms->window.dictLimit;
/* re-inforce weight of collected statistics */
ZSTD_upscaleStats(&ms->opt);
}
size_t ZSTD_compressBlock_btultra( size_t ZSTD_compressBlock_btultra(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize) const void* src, size_t srcSize)
{ {
DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
#if 0 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
/* 2-pass strategy (disabled) }
size_t ZSTD_compressBlock_btultra2(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
const void* src, size_t srcSize)
{
DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
/* 2-pass strategy
* this strategy makes a first pass over first block to collect statistics * this strategy makes a first pass over first block to collect statistics
* and seed next round's statistics with it. * and seed next round's statistics with it.
* The compression ratio gain is generally small (~0.5% on first block), * The compression ratio gain is generally small (~0.5% on first block),
@ -1087,23 +1127,9 @@ size_t ZSTD_compressBlock_btultra(
if ( (ms->opt.litLengthSum==0) /* first block */ if ( (ms->opt.litLengthSum==0) /* first block */
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
&& (ms->window.dictLimit == ms->window.lowLimit) ) { /* no dictionary */ && (ms->window.dictLimit == ms->window.lowLimit) ) { /* no dictionary */
U32 tmpRep[ZSTD_REP_NUM]; ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
DEBUGLOG(5, "ZSTD_compressBlock_btultra: first block: collecting statistics");
assert(ms->nextToUpdate >= ms->window.dictLimit
&& ms->nextToUpdate <= ms->window.dictLimit + 1);
memcpy(tmpRep, rep, sizeof(tmpRep));
ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
ZSTD_resetSeqStore(seqStore);
/* invalidate first scan from history */
ms->window.base -= srcSize;
ms->window.dictLimit += (U32)srcSize;
ms->window.lowLimit = ms->window.dictLimit;
ms->nextToUpdate = ms->window.dictLimit;
ms->nextToUpdate3 = ms->window.dictLimit;
/* re-inforce weight of collected statistics */
ZSTD_upscaleStats(&ms->opt);
} }
#endif
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
} }
@ -1134,3 +1160,7 @@ size_t ZSTD_compressBlock_btultra_extDict(
{ {
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
} }
/* note : no btultra2 variant for extDict nor dictMatchState,
* tbecause btultra2 is not meant to work with dictionaries
* and is only specific for the first block (no prefix) */

View File

@ -494,7 +494,8 @@ typedef enum { ZSTD_fast=1,
ZSTD_lazy2=5, ZSTD_lazy2=5,
ZSTD_btlazy2=6, ZSTD_btlazy2=6,
ZSTD_btopt=7, ZSTD_btopt=7,
ZSTD_btultra=8 ZSTD_btultra=8,
ZSTD_btultra2=9
/* note : new strategies might be added in the future. /* note : new strategies might be added in the future.
Only the order (from fast to strong) is guaranteed, not the exact position. Only the order (from fast to strong) is guaranteed, not the exact position.
new strategy names might be introduced, pushing the maximum number upward */ new strategy names might be introduced, pushing the maximum number upward */