first implementation of small window size for btopt
noticeably improves compression ratio when window size is small (< 18). enwik7 level 19 windowLog `dev` `smallwlog` improvement 23 3.577 3.577 0.02% 22 3.536 3.538 0.06% 21 3.462 3.467 0.14% 20 3.364 3.377 0.39% 19 3.244 3.272 0.86% 18 3.110 3.166 1.80% 17 2.843 3.057 7.53% 16 2.724 2.943 8.04% 15 2.594 2.822 8.79% 14 2.456 2.686 9.36% 13 2.312 2.523 9.13% 12 2.162 2.361 9.20% 11 2.003 2.182 8.94%
This commit is contained in:
parent
b13a9207f9
commit
bc601bdc6d
@ -1387,7 +1387,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
|||||||
note : `params` are assumed fully validated at this stage */
|
note : `params` are assumed fully validated at this stage */
|
||||||
static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||||
ZSTD_CCtx_params params,
|
ZSTD_CCtx_params params,
|
||||||
U64 pledgedSrcSize,
|
U64 const pledgedSrcSize,
|
||||||
ZSTD_compResetPolicy_e const crp,
|
ZSTD_compResetPolicy_e const crp,
|
||||||
ZSTD_buffered_policy_e const zbuff)
|
ZSTD_buffered_policy_e const zbuff)
|
||||||
{
|
{
|
||||||
@ -2868,7 +2868,8 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
|
|||||||
ms->dictMatchState = NULL;
|
ms->dictMatchState = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
|
//ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
|
||||||
|
ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
|
||||||
/* Ensure hash/chain table insertion resumes no sooner than lowlimit */
|
/* Ensure hash/chain table insertion resumes no sooner than lowlimit */
|
||||||
if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
|
if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
|
||||||
|
|
||||||
@ -3296,12 +3297,11 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
|
|||||||
|
|
||||||
FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
|
FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
|
||||||
ZSTDcrp_continue, zbuff) );
|
ZSTDcrp_continue, zbuff) );
|
||||||
{
|
{ size_t const dictID = ZSTD_compress_insertDictionary(
|
||||||
size_t const dictID = ZSTD_compress_insertDictionary(
|
|
||||||
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
|
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
|
||||||
¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace);
|
¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace);
|
||||||
FORWARD_IF_ERROR(dictID);
|
FORWARD_IF_ERROR(dictID);
|
||||||
assert(dictID <= (size_t)(U32)-1);
|
assert(dictID <= UINT32_MAX);
|
||||||
cctx->dictID = (U32)dictID;
|
cctx->dictID = (U32)dictID;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -141,7 +141,7 @@ struct ZSTD_matchState_t {
|
|||||||
U32* hashTable3;
|
U32* hashTable3;
|
||||||
U32* chainTable;
|
U32* chainTable;
|
||||||
optState_t opt; /* optimal parser state */
|
optState_t opt; /* optimal parser state */
|
||||||
const ZSTD_matchState_t * dictMatchState;
|
const ZSTD_matchState_t* dictMatchState;
|
||||||
ZSTD_compressionParameters cParams;
|
ZSTD_compressionParameters cParams;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -731,6 +731,38 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MEM_STATIC void
|
||||||
|
ZSTD_checkDictValidity(ZSTD_window_t* window,
|
||||||
|
const void* blockEnd,
|
||||||
|
U32 maxDist,
|
||||||
|
U32* loadedDictEndPtr,
|
||||||
|
const ZSTD_matchState_t** dictMatchStatePtr)
|
||||||
|
{
|
||||||
|
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
|
||||||
|
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
|
||||||
|
DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
|
||||||
|
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
|
||||||
|
|
||||||
|
/* - When there is no dictionary : loadedDictEnd == 0.
|
||||||
|
In which case, the test (blockEndIdx > maxDist) is merely to avoid
|
||||||
|
overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
|
||||||
|
- When there is a standard dictionary :
|
||||||
|
Index referential is copied from the dictionary,
|
||||||
|
which means it starts from 0.
|
||||||
|
In which case, loadedDictEnd == dictSize,
|
||||||
|
and it makes sense to compare `blockEndIdx > maxDist + dictSize`
|
||||||
|
since `blockEndIdx` also starts from zero.
|
||||||
|
- When there is an attached dictionary :
|
||||||
|
loadedDictEnd is expressed within the referential of the context,
|
||||||
|
so it can be directly compared against blockEndIdx.
|
||||||
|
*/
|
||||||
|
if (loadedDictEnd && (blockEndIdx > maxDist + loadedDictEnd)) {
|
||||||
|
/* On reaching window size, dictionaries are invalidated */
|
||||||
|
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
|
||||||
|
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ZSTD_window_update():
|
* ZSTD_window_update():
|
||||||
* Updates the window by appending [src, src + srcSize) to the window.
|
* Updates the window by appending [src, src + srcSize) to the window.
|
||||||
|
@ -545,6 +545,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|||||||
{
|
{
|
||||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||||
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
||||||
|
U32 const maxDistance = 1U << cParams->windowLog;
|
||||||
const BYTE* const base = ms->window.base;
|
const BYTE* const base = ms->window.base;
|
||||||
U32 const current = (U32)(ip-base);
|
U32 const current = (U32)(ip-base);
|
||||||
U32 const hashLog = cParams->hashLog;
|
U32 const hashLog = cParams->hashLog;
|
||||||
@ -560,8 +561,9 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|||||||
U32 const dictLimit = ms->window.dictLimit;
|
U32 const dictLimit = ms->window.dictLimit;
|
||||||
const BYTE* const dictEnd = dictBase + dictLimit;
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
||||||
const BYTE* const prefixStart = base + dictLimit;
|
const BYTE* const prefixStart = base + dictLimit;
|
||||||
U32 const btLow = btMask >= current ? 0 : current - btMask;
|
U32 const btLow = (btMask >= current) ? 0 : current - btMask;
|
||||||
U32 const windowLow = ms->window.lowLimit;
|
U32 const windowValid = ms->window.lowLimit;
|
||||||
|
U32 const windowLow = ((current - windowValid) > maxDistance) ? current - maxDistance : windowValid;
|
||||||
U32 const matchLow = windowLow ? windowLow : 1;
|
U32 const matchLow = windowLow ? windowLow : 1;
|
||||||
U32* smallerPtr = bt + 2*(current&btMask);
|
U32* smallerPtr = bt + 2*(current&btMask);
|
||||||
U32* largerPtr = bt + 2*(current&btMask) + 1;
|
U32* largerPtr = bt + 2*(current&btMask) + 1;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user