improved btlazy2 : list of unsorted candidates can reach extDict
It used to stop on reaching extDict, for simplification. As a consequence, there was a small loss of performance each time the round buffer would restart from beginning. It's not a large difference though, just several hundreds of bytes on silesia. This patch fixes it.
This commit is contained in:
parent
a68b76afef
commit
f597f55675
@ -62,6 +62,7 @@ void ZSTD_updateDUBT(ZSTD_CCtx* zc,
|
|||||||
U32* const nextCandidatePtr = bt + 2*(idx&btMask);
|
U32* const nextCandidatePtr = bt + 2*(idx&btMask);
|
||||||
U32* const sortMarkPtr = nextCandidatePtr + 1;
|
U32* const sortMarkPtr = nextCandidatePtr + 1;
|
||||||
|
|
||||||
|
DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
|
||||||
hashTable[h] = idx; /* Update Hash Table */
|
hashTable[h] = idx; /* Update Hash Table */
|
||||||
*nextCandidatePtr = matchIndex; /* update BT like a chain */
|
*nextCandidatePtr = matchIndex; /* update BT like a chain */
|
||||||
*sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
|
*sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
|
||||||
@ -75,7 +76,7 @@ void ZSTD_updateDUBT(ZSTD_CCtx* zc,
|
|||||||
* assumption : current >= btlow == (current - btmask)
|
* assumption : current >= btlow == (current - btmask)
|
||||||
* doesn't fail */
|
* doesn't fail */
|
||||||
static void ZSTD_insertDUBT1(ZSTD_CCtx* zc,
|
static void ZSTD_insertDUBT1(ZSTD_CCtx* zc,
|
||||||
U32 current, const BYTE* iend,
|
U32 current, const BYTE* inputEnd,
|
||||||
U32 nbCompares, U32 btLow, int extDict)
|
U32 nbCompares, U32 btLow, int extDict)
|
||||||
{
|
{
|
||||||
U32* const bt = zc->chainTable;
|
U32* const bt = zc->chainTable;
|
||||||
@ -83,9 +84,10 @@ static void ZSTD_insertDUBT1(ZSTD_CCtx* zc,
|
|||||||
U32 const btMask = (1 << btLog) - 1;
|
U32 const btMask = (1 << btLog) - 1;
|
||||||
size_t commonLengthSmaller=0, commonLengthLarger=0;
|
size_t commonLengthSmaller=0, commonLengthLarger=0;
|
||||||
const BYTE* const base = zc->base;
|
const BYTE* const base = zc->base;
|
||||||
const BYTE* const ip = base + current;
|
|
||||||
const BYTE* const dictBase = zc->dictBase;
|
const BYTE* const dictBase = zc->dictBase;
|
||||||
const U32 dictLimit = zc->dictLimit;
|
const U32 dictLimit = zc->dictLimit;
|
||||||
|
const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
|
||||||
|
const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
|
||||||
const BYTE* const dictEnd = dictBase + dictLimit;
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
||||||
const BYTE* const prefixStart = base + dictLimit;
|
const BYTE* const prefixStart = base + dictLimit;
|
||||||
const BYTE* match;
|
const BYTE* match;
|
||||||
@ -98,22 +100,20 @@ static void ZSTD_insertDUBT1(ZSTD_CCtx* zc,
|
|||||||
DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
|
DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
|
||||||
current, dictLimit, windowLow);
|
current, dictLimit, windowLow);
|
||||||
assert(current >= btLow);
|
assert(current >= btLow);
|
||||||
|
assert(ip < iend); /* condition for ZSTD_count */
|
||||||
if (extDict && (current < dictLimit)) { /* candidates in _extDict are not sorted (simplification, for easier ZSTD_count, detrimental to compression ratio in streaming mode) */
|
|
||||||
*largerPtr = *smallerPtr = 0;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
assert(current >= dictLimit); /* ip=base+current within current memory segment */
|
|
||||||
|
|
||||||
while (nbCompares-- && (matchIndex > windowLow)) {
|
while (nbCompares-- && (matchIndex > windowLow)) {
|
||||||
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
||||||
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
||||||
DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u", current, matchIndex);
|
|
||||||
assert(matchIndex < current);
|
assert(matchIndex < current);
|
||||||
|
|
||||||
if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
|
if ( (!extDict)
|
||||||
assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */
|
|| (matchIndex+matchLength >= dictLimit) /* both in current segment*/
|
||||||
match = base + matchIndex;
|
|| (current < dictLimit) /* both in extDict */) {
|
||||||
|
const BYTE* const mBase = !extDict || (matchIndex >= dictLimit) ? base : dictBase;
|
||||||
|
assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
|
||||||
|
|| (current < dictLimit) );
|
||||||
|
match = mBase + matchIndex;
|
||||||
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
|
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
|
||||||
} else {
|
} else {
|
||||||
match = dictBase + matchIndex;
|
match = dictBase + matchIndex;
|
||||||
@ -122,6 +122,9 @@ static void ZSTD_insertDUBT1(ZSTD_CCtx* zc,
|
|||||||
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
|
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
|
||||||
|
current, matchIndex, (U32)matchLength);
|
||||||
|
|
||||||
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
|
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
|
||||||
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
|
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
|
||||||
}
|
}
|
||||||
@ -131,7 +134,7 @@ static void ZSTD_insertDUBT1(ZSTD_CCtx* zc,
|
|||||||
*smallerPtr = matchIndex; /* update smaller idx */
|
*smallerPtr = matchIndex; /* update smaller idx */
|
||||||
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
|
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
|
||||||
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
|
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
|
||||||
DEBUGLOG(8, "ZSTD_insertDUBT1: selecting next candidate from %u (>btLow=%u) => %u",
|
DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
|
||||||
matchIndex, btLow, nextPtr[1]);
|
matchIndex, btLow, nextPtr[1]);
|
||||||
smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
|
smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
|
||||||
matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
|
matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
|
||||||
@ -140,7 +143,7 @@ static void ZSTD_insertDUBT1(ZSTD_CCtx* zc,
|
|||||||
*largerPtr = matchIndex;
|
*largerPtr = matchIndex;
|
||||||
commonLengthLarger = matchLength;
|
commonLengthLarger = matchLength;
|
||||||
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
|
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
|
||||||
DEBUGLOG(8, "ZSTD_insertDUBT1: selecting next candidate from %u (>btLow=%u) => %u",
|
DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
|
||||||
matchIndex, btLow, nextPtr[0]);
|
matchIndex, btLow, nextPtr[0]);
|
||||||
largerPtr = nextPtr;
|
largerPtr = nextPtr;
|
||||||
matchIndex = nextPtr[0];
|
matchIndex = nextPtr[0];
|
||||||
@ -196,7 +199,7 @@ static size_t ZSTD_insertBtAndFindBestMatch (
|
|||||||
|
|
||||||
if ( (matchIndex > unsortLimit)
|
if ( (matchIndex > unsortLimit)
|
||||||
&& (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
|
&& (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
|
||||||
DEBUGLOG(8, "ZSTD_insertBtAndFindBestMatch: nullify last unsorted candidate %u",
|
DEBUGLOG(7, "ZSTD_insertBtAndFindBestMatch: nullify last unsorted candidate %u",
|
||||||
matchIndex);
|
matchIndex);
|
||||||
*nextCandidate = *unsortedMark = 0; /* nullify next candidate if it's still unsorted (note : simplification, detrimental to compression ratio, beneficial for speed) */
|
*nextCandidate = *unsortedMark = 0; /* nullify next candidate if it's still unsorted (note : simplification, detrimental to compression ratio, beneficial for speed) */
|
||||||
}
|
}
|
||||||
@ -272,9 +275,11 @@ static size_t ZSTD_insertBtAndFindBestMatch (
|
|||||||
|
|
||||||
assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
|
assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
|
||||||
zc->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
zc->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
||||||
if (bestLength)
|
if (bestLength >= MINMATCH) {
|
||||||
DEBUGLOG(7, "ZSTD_insertBtAndFindBestMatch(%u) : found match of length %u",
|
U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
|
||||||
current, (U32)bestLength);
|
DEBUGLOG(8, "ZSTD_insertBtAndFindBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
|
||||||
|
current, (U32)bestLength, (U32)*offsetPtr, mIndex);
|
||||||
|
}
|
||||||
return bestLength;
|
return bestLength;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -287,6 +292,7 @@ static size_t ZSTD_BtFindBestMatch (
|
|||||||
size_t* offsetPtr,
|
size_t* offsetPtr,
|
||||||
const U32 maxNbAttempts, const U32 mls)
|
const U32 maxNbAttempts, const U32 mls)
|
||||||
{
|
{
|
||||||
|
DEBUGLOG(7, "ZSTD_BtFindBestMatch");
|
||||||
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
|
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
|
||||||
ZSTD_updateDUBT(zc, ip, iLimit, mls);
|
ZSTD_updateDUBT(zc, ip, iLimit, mls);
|
||||||
return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
|
return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
|
||||||
@ -317,6 +323,7 @@ static size_t ZSTD_BtFindBestMatch_extDict (
|
|||||||
size_t* offsetPtr,
|
size_t* offsetPtr,
|
||||||
const U32 maxNbAttempts, const U32 mls)
|
const U32 maxNbAttempts, const U32 mls)
|
||||||
{
|
{
|
||||||
|
DEBUGLOG(7, "ZSTD_BtFindBestMatch_extDict");
|
||||||
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
|
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
|
||||||
ZSTD_updateDUBT(zc, ip, iLimit, mls);
|
ZSTD_updateDUBT(zc, ip, iLimit, mls);
|
||||||
return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
|
return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
|
||||||
|
@ -204,6 +204,7 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
|
|||||||
|
|
||||||
ZSTD_DCtx* ZSTD_createDCtx(void)
|
ZSTD_DCtx* ZSTD_createDCtx(void)
|
||||||
{
|
{
|
||||||
|
DEBUGLOG(3, "ZSTD_createDCtx");
|
||||||
return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
|
return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2247,6 +2248,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
|
|||||||
|
|
||||||
ZSTD_DStream* ZSTD_createDStream(void)
|
ZSTD_DStream* ZSTD_createDStream(void)
|
||||||
{
|
{
|
||||||
|
DEBUGLOG(3, "ZSTD_createDStream");
|
||||||
return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
|
return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2273,6 +2275,7 @@ size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
|
|||||||
|
|
||||||
size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
|
size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
|
||||||
{
|
{
|
||||||
|
DEBUGLOG(4, "ZSTD_initDStream_usingDict");
|
||||||
zds->streamStage = zdss_loadHeader;
|
zds->streamStage = zdss_loadHeader;
|
||||||
zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
|
zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
|
||||||
ZSTD_freeDDict(zds->ddictLocal);
|
ZSTD_freeDDict(zds->ddictLocal);
|
||||||
@ -2289,6 +2292,7 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di
|
|||||||
/* note : this variant can't fail */
|
/* note : this variant can't fail */
|
||||||
size_t ZSTD_initDStream(ZSTD_DStream* zds)
|
size_t ZSTD_initDStream(ZSTD_DStream* zds)
|
||||||
{
|
{
|
||||||
|
DEBUGLOG(4, "ZSTD_initDStream");
|
||||||
return ZSTD_initDStream_usingDict(zds, NULL, 0);
|
return ZSTD_initDStream_usingDict(zds, NULL, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2304,6 +2308,7 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict)
|
|||||||
|
|
||||||
size_t ZSTD_resetDStream(ZSTD_DStream* zds)
|
size_t ZSTD_resetDStream(ZSTD_DStream* zds)
|
||||||
{
|
{
|
||||||
|
DEBUGLOG(4, "ZSTD_resetDStream");
|
||||||
zds->streamStage = zdss_loadHeader;
|
zds->streamStage = zdss_loadHeader;
|
||||||
zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
|
zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
|
||||||
zds->legacyVersion = 0;
|
zds->legacyVersion = 0;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user