improved btlazy2 : list of unsorted candidates can reach extDict

It used to stop on reaching extDict, for simplification.
As a consequence, there was a small loss of performance each time the round buffer would restart from beginning.
It's not a large difference though, just several hundreds of bytes on silesia.
This patch fixes it.
This commit is contained in:
Yann Collet 2017-12-30 15:12:59 +01:00
parent a68b76afef
commit f597f55675
2 changed files with 30 additions and 18 deletions

View File

@ -62,6 +62,7 @@ void ZSTD_updateDUBT(ZSTD_CCtx* zc,
U32* const nextCandidatePtr = bt + 2*(idx&btMask); U32* const nextCandidatePtr = bt + 2*(idx&btMask);
U32* const sortMarkPtr = nextCandidatePtr + 1; U32* const sortMarkPtr = nextCandidatePtr + 1;
DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
hashTable[h] = idx; /* Update Hash Table */ hashTable[h] = idx; /* Update Hash Table */
*nextCandidatePtr = matchIndex; /* update BT like a chain */ *nextCandidatePtr = matchIndex; /* update BT like a chain */
*sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
@ -75,7 +76,7 @@ void ZSTD_updateDUBT(ZSTD_CCtx* zc,
* assumption : current >= btlow == (current - btmask) * assumption : current >= btlow == (current - btmask)
* doesn't fail */ * doesn't fail */
static void ZSTD_insertDUBT1(ZSTD_CCtx* zc, static void ZSTD_insertDUBT1(ZSTD_CCtx* zc,
U32 current, const BYTE* iend, U32 current, const BYTE* inputEnd,
U32 nbCompares, U32 btLow, int extDict) U32 nbCompares, U32 btLow, int extDict)
{ {
U32* const bt = zc->chainTable; U32* const bt = zc->chainTable;
@ -83,9 +84,10 @@ static void ZSTD_insertDUBT1(ZSTD_CCtx* zc,
U32 const btMask = (1 << btLog) - 1; U32 const btMask = (1 << btLog) - 1;
size_t commonLengthSmaller=0, commonLengthLarger=0; size_t commonLengthSmaller=0, commonLengthLarger=0;
const BYTE* const base = zc->base; const BYTE* const base = zc->base;
const BYTE* const ip = base + current;
const BYTE* const dictBase = zc->dictBase; const BYTE* const dictBase = zc->dictBase;
const U32 dictLimit = zc->dictLimit; const U32 dictLimit = zc->dictLimit;
const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit; const BYTE* const prefixStart = base + dictLimit;
const BYTE* match; const BYTE* match;
@ -98,22 +100,20 @@ static void ZSTD_insertDUBT1(ZSTD_CCtx* zc,
DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
current, dictLimit, windowLow); current, dictLimit, windowLow);
assert(current >= btLow); assert(current >= btLow);
assert(ip < iend); /* condition for ZSTD_count */
if (extDict && (current < dictLimit)) { /* candidates in _extDict are not sorted (simplification, for easier ZSTD_count, detrimental to compression ratio in streaming mode) */
*largerPtr = *smallerPtr = 0;
return;
}
assert(current >= dictLimit); /* ip=base+current within current memory segment */
while (nbCompares-- && (matchIndex > windowLow)) { while (nbCompares-- && (matchIndex > windowLow)) {
U32* const nextPtr = bt + 2*(matchIndex & btMask); U32* const nextPtr = bt + 2*(matchIndex & btMask);
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u", current, matchIndex);
assert(matchIndex < current); assert(matchIndex < current);
if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { if ( (!extDict)
assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */ || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
match = base + matchIndex; || (current < dictLimit) /* both in extDict */) {
const BYTE* const mBase = !extDict || (matchIndex >= dictLimit) ? base : dictBase;
assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
|| (current < dictLimit) );
match = mBase + matchIndex;
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
} else { } else {
match = dictBase + matchIndex; match = dictBase + matchIndex;
@ -122,6 +122,9 @@ static void ZSTD_insertDUBT1(ZSTD_CCtx* zc,
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
} }
DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
current, matchIndex, (U32)matchLength);
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
} }
@ -131,7 +134,7 @@ static void ZSTD_insertDUBT1(ZSTD_CCtx* zc,
*smallerPtr = matchIndex; /* update smaller idx */ *smallerPtr = matchIndex; /* update smaller idx */
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
DEBUGLOG(8, "ZSTD_insertDUBT1: selecting next candidate from %u (>btLow=%u) => %u", DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
matchIndex, btLow, nextPtr[1]); matchIndex, btLow, nextPtr[1]);
smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
@ -140,7 +143,7 @@ static void ZSTD_insertDUBT1(ZSTD_CCtx* zc,
*largerPtr = matchIndex; *largerPtr = matchIndex;
commonLengthLarger = matchLength; commonLengthLarger = matchLength;
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
DEBUGLOG(8, "ZSTD_insertDUBT1: selecting next candidate from %u (>btLow=%u) => %u", DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
matchIndex, btLow, nextPtr[0]); matchIndex, btLow, nextPtr[0]);
largerPtr = nextPtr; largerPtr = nextPtr;
matchIndex = nextPtr[0]; matchIndex = nextPtr[0];
@ -196,7 +199,7 @@ static size_t ZSTD_insertBtAndFindBestMatch (
if ( (matchIndex > unsortLimit) if ( (matchIndex > unsortLimit)
&& (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
DEBUGLOG(8, "ZSTD_insertBtAndFindBestMatch: nullify last unsorted candidate %u", DEBUGLOG(7, "ZSTD_insertBtAndFindBestMatch: nullify last unsorted candidate %u",
matchIndex); matchIndex);
*nextCandidate = *unsortedMark = 0; /* nullify next candidate if it's still unsorted (note : simplification, detrimental to compression ratio, beneficial for speed) */ *nextCandidate = *unsortedMark = 0; /* nullify next candidate if it's still unsorted (note : simplification, detrimental to compression ratio, beneficial for speed) */
} }
@ -272,9 +275,11 @@ static size_t ZSTD_insertBtAndFindBestMatch (
assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */ assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
zc->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ zc->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
if (bestLength) if (bestLength >= MINMATCH) {
DEBUGLOG(7, "ZSTD_insertBtAndFindBestMatch(%u) : found match of length %u", U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
current, (U32)bestLength); DEBUGLOG(8, "ZSTD_insertBtAndFindBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
current, (U32)bestLength, (U32)*offsetPtr, mIndex);
}
return bestLength; return bestLength;
} }
} }
@ -287,6 +292,7 @@ static size_t ZSTD_BtFindBestMatch (
size_t* offsetPtr, size_t* offsetPtr,
const U32 maxNbAttempts, const U32 mls) const U32 maxNbAttempts, const U32 mls)
{ {
DEBUGLOG(7, "ZSTD_BtFindBestMatch");
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
ZSTD_updateDUBT(zc, ip, iLimit, mls); ZSTD_updateDUBT(zc, ip, iLimit, mls);
return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0); return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
@ -317,6 +323,7 @@ static size_t ZSTD_BtFindBestMatch_extDict (
size_t* offsetPtr, size_t* offsetPtr,
const U32 maxNbAttempts, const U32 mls) const U32 maxNbAttempts, const U32 mls)
{ {
DEBUGLOG(7, "ZSTD_BtFindBestMatch_extDict");
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
ZSTD_updateDUBT(zc, ip, iLimit, mls); ZSTD_updateDUBT(zc, ip, iLimit, mls);
return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1); return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);

View File

@ -204,6 +204,7 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
ZSTD_DCtx* ZSTD_createDCtx(void) ZSTD_DCtx* ZSTD_createDCtx(void)
{ {
DEBUGLOG(3, "ZSTD_createDCtx");
return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
} }
@ -2247,6 +2248,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
ZSTD_DStream* ZSTD_createDStream(void) ZSTD_DStream* ZSTD_createDStream(void)
{ {
DEBUGLOG(3, "ZSTD_createDStream");
return ZSTD_createDStream_advanced(ZSTD_defaultCMem); return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
} }
@ -2273,6 +2275,7 @@ size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
{ {
DEBUGLOG(4, "ZSTD_initDStream_usingDict");
zds->streamStage = zdss_loadHeader; zds->streamStage = zdss_loadHeader;
zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
ZSTD_freeDDict(zds->ddictLocal); ZSTD_freeDDict(zds->ddictLocal);
@ -2289,6 +2292,7 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di
/* note : this variant can't fail */ /* note : this variant can't fail */
size_t ZSTD_initDStream(ZSTD_DStream* zds) size_t ZSTD_initDStream(ZSTD_DStream* zds)
{ {
DEBUGLOG(4, "ZSTD_initDStream");
return ZSTD_initDStream_usingDict(zds, NULL, 0); return ZSTD_initDStream_usingDict(zds, NULL, 0);
} }
@ -2304,6 +2308,7 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict)
size_t ZSTD_resetDStream(ZSTD_DStream* zds) size_t ZSTD_resetDStream(ZSTD_DStream* zds)
{ {
DEBUGLOG(4, "ZSTD_resetDStream");
zds->streamStage = zdss_loadHeader; zds->streamStage = zdss_loadHeader;
zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
zds->legacyVersion = 0; zds->legacyVersion = 0;