first implementation of delayed update for btlazy2
This is a pretty nice speed win. The new strategy consists in stacking new candidates as if it was a hash chain. Then, only if there is a need to actually consult the chain, they are batch-updated, before starting the match search itself. This is supposed to be beneficial when skipping positions, which happens a lot when using lazy strategy. The baseline performance for btlazy2 on my laptop is : 15#calgary.tar : 3265536 -> 955985 (3.416), 7.06 MB/s , 618.0 MB/s 15#enwik7 : 10000000 -> 3067341 (3.260), 4.65 MB/s , 521.2 MB/s 15#silesia.tar : 211984896 -> 58095131 (3.649), 6.20 MB/s , 682.4 MB/s (only level 15 remains for btlazy2, as this strategy is squeezed between lazy2 and btopt) After this patch, and keeping all parameters identical, speed is increased by a pretty good margin (+30-50%), but compression ratio suffers a bit : 15#calgary.tar : 3265536 -> 958060 (3.408), 9.12 MB/s , 621.1 MB/s 15#enwik7 : 10000000 -> 3078318 (3.249), 6.37 MB/s , 525.1 MB/s 15#silesia.tar : 211984896 -> 58444111 (3.627), 9.89 MB/s , 680.4 MB/s That's because I kept `1<<searchLog` as a maximum number of candidates to update. But for a hash chain, this represents the total number of candidates in the chain, while for the binary, it represents the maximum depth of searches. Keep in mind that a lot of candidates won't even be visited in the btree, since they are filtered out by the binary sort. As a consequence, in the new implementation, the effective depth of the binary tree is substantially shorter. To compensate, it's enough to increase `searchLog` value. Here is the result after adding just +1 to searchLog (level 15 setting in this patch): 15#calgary.tar : 3265536 -> 956311 (3.415), 8.32 MB/s , 611.4 MB/s 15#enwik7 : 10000000 -> 3067655 (3.260), 5.43 MB/s , 535.5 MB/s 15#silesia.tar : 211984896 -> 58113144 (3.648), 8.35 MB/s , 679.3 MB/s aka, almost the same compression ratio as before, but with a noticeable speed increase (+20-30%). This modification makes btlazy2 more competitive. A new round of paramgrill will be necessary to determine which levels are impacted and could adopt the new strategy.
This commit is contained in:
parent
d55aea3c3b
commit
5235d8d6ba
@ -1922,6 +1922,10 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
|
||||
break;
|
||||
|
||||
case ZSTD_btlazy2:
|
||||
if (srcSize >= HASH_READ_SIZE)
|
||||
ZSTD_updateDUBT(zc, iend-HASH_READ_SIZE, iend, zc->appliedParams.cParams.searchLength);
|
||||
break;
|
||||
|
||||
case ZSTD_btopt:
|
||||
case ZSTD_btultra:
|
||||
if (srcSize >= HASH_READ_SIZE)
|
||||
@ -2974,7 +2978,7 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
|
||||
{ 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
|
||||
{ 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 13 */
|
||||
{ 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 14 */
|
||||
{ 22, 21, 22, 4, 5, 16, ZSTD_btlazy2 }, /* level 15 */
|
||||
{ 22, 21, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 15 */
|
||||
{ 22, 21, 22, 4, 5, 48, ZSTD_btopt }, /* level 16 */
|
||||
{ 23, 22, 22, 4, 4, 48, ZSTD_btopt }, /* level 17 */
|
||||
{ 23, 22, 22, 5, 3, 64, ZSTD_btopt }, /* level 18 */
|
||||
|
@ -15,73 +15,85 @@
|
||||
/*-*************************************
|
||||
* Binary Tree search
|
||||
***************************************/
|
||||
/** ZSTD_insertBt1() : add one or multiple positions to tree.
|
||||
* ip : assumed <= iend-8 .
|
||||
* @return : nb of positions added */
|
||||
static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
|
||||
const BYTE* const ip, const BYTE* const iend,
|
||||
U32 nbCompares, U32 const mls, U32 const extDict)
|
||||
#define ZSTD_DUBT_UNSORTED ((U32)(-1))
|
||||
|
||||
void ZSTD_updateDUBT(ZSTD_CCtx* zc,
|
||||
const BYTE* ip, const BYTE* iend,
|
||||
U32 mls)
|
||||
{
|
||||
U32* const hashTable = zc->hashTable;
|
||||
U32 const hashLog = zc->appliedParams.cParams.hashLog;
|
||||
size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
|
||||
|
||||
U32* const bt = zc->chainTable;
|
||||
U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
|
||||
U32 const btMask = (1 << btLog) - 1;
|
||||
|
||||
const BYTE* const base = zc->base;
|
||||
U32 const target = (U32)(ip - base);
|
||||
U32 idx = zc->nextToUpdate;
|
||||
|
||||
DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u ",
|
||||
idx, target);
|
||||
assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */
|
||||
(void)iend;
|
||||
|
||||
|
||||
assert(idx >= zc->dictLimit); /* condition for valid base+idx */
|
||||
for ( ; idx < target ; idx++) {
|
||||
size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */
|
||||
U32 const matchIndex = hashTable[h];
|
||||
|
||||
U32* const nextCandidatePtr = bt + 2*(idx&btMask);
|
||||
U32* const sortMarkPtr = nextCandidatePtr + 1;
|
||||
|
||||
hashTable[h] = idx; /* Update Hash Table */
|
||||
*nextCandidatePtr = matchIndex; /* update BT like a chain */
|
||||
*sortMarkPtr = ZSTD_DUBT_UNSORTED;
|
||||
}
|
||||
zc->nextToUpdate = target;
|
||||
}
|
||||
|
||||
|
||||
/** ZSTD_insertDUBT1() :
|
||||
* sort one already inserted but unsorted position
|
||||
* assumption : current >= btlow == (current - btmask)
|
||||
* doesn't fail */
|
||||
static void ZSTD_insertDUBT1(ZSTD_CCtx* zc,
|
||||
U32 current, const BYTE* iend,
|
||||
U32 nbCompares, U32 btLow, int extDict)
|
||||
{
|
||||
U32* const bt = zc->chainTable;
|
||||
U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
|
||||
U32 const btMask = (1 << btLog) - 1;
|
||||
U32 matchIndex = hashTable[h];
|
||||
size_t commonLengthSmaller=0, commonLengthLarger=0;
|
||||
const BYTE* const base = zc->base;
|
||||
const BYTE* const ip = base + current;
|
||||
const BYTE* const dictBase = zc->dictBase;
|
||||
const U32 dictLimit = zc->dictLimit;
|
||||
const BYTE* const dictEnd = dictBase + dictLimit;
|
||||
const BYTE* const prefixStart = base + dictLimit;
|
||||
const BYTE* match;
|
||||
const U32 current = (U32)(ip-base);
|
||||
const U32 btLow = btMask >= current ? 0 : current - btMask;
|
||||
U32* smallerPtr = bt + 2*(current&btMask);
|
||||
U32* largerPtr = smallerPtr + 1;
|
||||
U32 matchIndex = *smallerPtr;
|
||||
U32 dummy32; /* to be nullified at the end */
|
||||
U32 const windowLow = zc->lowLimit;
|
||||
U32 matchEndIdx = current+8+1;
|
||||
size_t bestLength = 8;
|
||||
#ifdef ZSTD_C_PREDICT
|
||||
U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
|
||||
U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
|
||||
predictedSmall += (predictedSmall>0);
|
||||
predictedLarge += (predictedLarge>0);
|
||||
#endif /* ZSTD_C_PREDICT */
|
||||
|
||||
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
|
||||
DEBUGLOG(8, "ZSTD_insertDUBT1 (%u)", current);
|
||||
assert(current >= btLow);
|
||||
|
||||
assert(ip <= iend-8); /* required for h calculation */
|
||||
hashTable[h] = current; /* Update Hash Table */
|
||||
if (extDict && (current < dictLimit)) { /* do not sort candidates in _extDict (simplification, for easier ZSTD_count, detrimental to compression ratio in streaming mode) */
|
||||
*largerPtr = *smallerPtr = 0;
|
||||
return;
|
||||
}
|
||||
assert(current >= dictLimit); /* ip=base+current within current memory segment */
|
||||
|
||||
while (nbCompares-- && (matchIndex > windowLow)) {
|
||||
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
||||
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
||||
DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u", current, matchIndex);
|
||||
assert(matchIndex < current);
|
||||
|
||||
#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
|
||||
const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
|
||||
if (matchIndex == predictedSmall) {
|
||||
/* no need to check length, result known */
|
||||
*smallerPtr = matchIndex;
|
||||
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
|
||||
smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
|
||||
matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
|
||||
predictedSmall = predictPtr[1] + (predictPtr[1]>0);
|
||||
continue;
|
||||
}
|
||||
if (matchIndex == predictedLarge) {
|
||||
*largerPtr = matchIndex;
|
||||
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
|
||||
largerPtr = nextPtr;
|
||||
matchIndex = nextPtr[0];
|
||||
predictedLarge = predictPtr[0] + (predictPtr[0]>0);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
|
||||
assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */
|
||||
match = base + matchIndex;
|
||||
@ -93,12 +105,6 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
|
||||
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
|
||||
}
|
||||
|
||||
if (matchLength > bestLength) {
|
||||
bestLength = matchLength;
|
||||
if (matchLength > matchEndIdx - matchIndex)
|
||||
matchEndIdx = matchIndex + (U32)matchLength;
|
||||
}
|
||||
|
||||
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
|
||||
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
|
||||
}
|
||||
@ -108,6 +114,8 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
|
||||
*smallerPtr = matchIndex; /* update smaller idx */
|
||||
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
|
||||
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
|
||||
DEBUGLOG(8, "ZSTD_insertDUBT1: selecting next candidate from %u (>btLow=%u) => %u",
|
||||
matchIndex, btLow, nextPtr[1]);
|
||||
smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
|
||||
matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
|
||||
} else {
|
||||
@ -115,44 +123,13 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
|
||||
*largerPtr = matchIndex;
|
||||
commonLengthLarger = matchLength;
|
||||
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
|
||||
DEBUGLOG(8, "ZSTD_insertDUBT1: selecting next candidate from %u (>btLow=%u) => %u",
|
||||
matchIndex, btLow, nextPtr[0]);
|
||||
largerPtr = nextPtr;
|
||||
matchIndex = nextPtr[0];
|
||||
} }
|
||||
|
||||
*smallerPtr = *largerPtr = 0;
|
||||
if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
|
||||
assert(matchEndIdx > current + 8);
|
||||
return matchEndIdx - (current + 8);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
void ZSTD_updateTree_internal(ZSTD_CCtx* zc,
|
||||
const BYTE* const ip, const BYTE* const iend,
|
||||
const U32 nbCompares, const U32 mls, const U32 extDict)
|
||||
{
|
||||
const BYTE* const base = zc->base;
|
||||
U32 const target = (U32)(ip - base);
|
||||
U32 idx = zc->nextToUpdate;
|
||||
DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)",
|
||||
idx, target, extDict);
|
||||
|
||||
while(idx < target)
|
||||
idx += ZSTD_insertBt1(zc, base+idx, iend, nbCompares, mls, extDict);
|
||||
zc->nextToUpdate = target;
|
||||
}
|
||||
|
||||
void ZSTD_updateTree(ZSTD_CCtx* zc,
|
||||
const BYTE* const ip, const BYTE* const iend,
|
||||
const U32 nbCompares, const U32 mls)
|
||||
{
|
||||
ZSTD_updateTree_internal(zc, ip, iend, nbCompares, mls, 0 /*extDict*/);
|
||||
}
|
||||
|
||||
void ZSTD_updateTree_extDict(ZSTD_CCtx* zc,
|
||||
const BYTE* const ip, const BYTE* const iend,
|
||||
const U32 nbCompares, const U32 mls)
|
||||
{
|
||||
ZSTD_updateTree_internal(zc, ip, iend, nbCompares, mls, 1 /*extDict*/);
|
||||
}
|
||||
|
||||
|
||||
@ -166,18 +143,62 @@ static size_t ZSTD_insertBtAndFindBestMatch (
|
||||
U32* const hashTable = zc->hashTable;
|
||||
U32 const hashLog = zc->appliedParams.cParams.hashLog;
|
||||
size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
|
||||
U32 matchIndex = hashTable[h];
|
||||
|
||||
const BYTE* const base = zc->base;
|
||||
U32 const current = (U32)(ip-base);
|
||||
|
||||
U32* const bt = zc->chainTable;
|
||||
U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
|
||||
U32 const btMask = (1 << btLog) - 1;
|
||||
U32 matchIndex = hashTable[h];
|
||||
size_t commonLengthSmaller=0, commonLengthLarger=0;
|
||||
const BYTE* const base = zc->base;
|
||||
U32 const btLow = (btMask >= current) ? 0 : current - btMask;
|
||||
|
||||
U32* nextCandidate = bt + 2*(matchIndex&btMask);
|
||||
U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1;
|
||||
U32 nbCandidates = nbCompares;
|
||||
U32 previousCandidate = 0;
|
||||
|
||||
DEBUGLOG(7, "ZSTD_insertBtAndFindBestMatch (%u) ", current);
|
||||
assert(ip <= iend-8); /* required for h calculation */
|
||||
|
||||
/* reach end of unsorted candidates list */
|
||||
while ( (matchIndex > btLow)
|
||||
&& (*unsortedMark == ZSTD_DUBT_UNSORTED)
|
||||
&& (nbCandidates > 1) ) {
|
||||
DEBUGLOG(8, "ZSTD_insertBtAndFindBestMatch: candidate %u is unsorted",
|
||||
matchIndex);
|
||||
*unsortedMark = previousCandidate;
|
||||
previousCandidate = matchIndex;
|
||||
matchIndex = *nextCandidate;
|
||||
nextCandidate = bt + 2*(matchIndex&btMask);
|
||||
unsortedMark = bt + 2*(matchIndex&btMask) + 1;
|
||||
nbCandidates --;
|
||||
}
|
||||
|
||||
if ( (matchIndex > btLow)
|
||||
&& (*unsortedMark==ZSTD_DUBT_UNSORTED) ) {
|
||||
DEBUGLOG(8, "ZSTD_insertBtAndFindBestMatch: nullify last unsorted candidate %u",
|
||||
matchIndex);
|
||||
*nextCandidate = *unsortedMark = 0; /* nullify last candidate if it's still unsorted (note : detrimental to compression ratio) */
|
||||
}
|
||||
|
||||
/* batch sort stacked candidates */
|
||||
matchIndex = previousCandidate;
|
||||
while (matchIndex) { /* will end on matchIndex == 0 */
|
||||
U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
|
||||
U32 const nextCandidateIdx = *nextCandidateIdxPtr;
|
||||
ZSTD_insertDUBT1(zc, matchIndex, iend,
|
||||
nbCandidates, btLow, extDict);
|
||||
matchIndex = nextCandidateIdx;
|
||||
nbCandidates++;
|
||||
}
|
||||
|
||||
/* find longest match */
|
||||
{ size_t commonLengthSmaller=0, commonLengthLarger=0;
|
||||
const BYTE* const dictBase = zc->dictBase;
|
||||
const U32 dictLimit = zc->dictLimit;
|
||||
const BYTE* const dictEnd = dictBase + dictLimit;
|
||||
const BYTE* const prefixStart = base + dictLimit;
|
||||
const U32 current = (U32)(ip-base);
|
||||
const U32 btLow = btMask >= current ? 0 : current - btMask;
|
||||
const U32 windowLow = zc->lowLimit;
|
||||
U32* smallerPtr = bt + 2*(current&btMask);
|
||||
U32* largerPtr = bt + 2*(current&btMask) + 1;
|
||||
@ -185,7 +206,7 @@ static size_t ZSTD_insertBtAndFindBestMatch (
|
||||
U32 dummy32; /* to be nullified at the end */
|
||||
size_t bestLength = 0;
|
||||
|
||||
assert(ip <= iend-8); /* required for h calculation */
|
||||
matchIndex = hashTable[h];
|
||||
hashTable[h] = current; /* Update Hash Table */
|
||||
|
||||
while (nbCompares-- && (matchIndex > windowLow)) {
|
||||
@ -231,10 +252,14 @@ static size_t ZSTD_insertBtAndFindBestMatch (
|
||||
|
||||
*smallerPtr = *largerPtr = 0;
|
||||
|
||||
assert(matchEndIdx > current+8);
|
||||
assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
|
||||
zc->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
||||
if (bestLength)
|
||||
DEBUGLOG(7, "ZSTD_insertBtAndFindBestMatch(%u) : found match of length %u",
|
||||
current, (U32)bestLength);
|
||||
return bestLength;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
|
||||
@ -245,7 +270,7 @@ static size_t ZSTD_BtFindBestMatch (
|
||||
const U32 maxNbAttempts, const U32 mls)
|
||||
{
|
||||
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
|
||||
ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
|
||||
ZSTD_updateDUBT(zc, ip, iLimit, mls);
|
||||
return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
|
||||
}
|
||||
|
||||
@ -275,7 +300,7 @@ static size_t ZSTD_BtFindBestMatch_extDict (
|
||||
const U32 maxNbAttempts, const U32 mls)
|
||||
{
|
||||
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
|
||||
ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
|
||||
ZSTD_updateDUBT(zc, ip, iLimit, mls);
|
||||
return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
|
||||
}
|
||||
|
||||
|
@ -18,9 +18,8 @@ extern "C" {
|
||||
#include "mem.h" /* U32 */
|
||||
#include "zstd.h" /* ZSTD_CCtx, size_t */
|
||||
|
||||
U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls);
|
||||
void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls);
|
||||
void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls);
|
||||
U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls); /* used in ZSTD_loadDictionaryContent() */
|
||||
void ZSTD_updateDUBT(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iend, U32 mls); /* used in ZSTD_loadDictionaryContent() */
|
||||
|
||||
size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
|
||||
|
@ -265,6 +265,147 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* const cctx, const BYTE*
|
||||
/*-*************************************
|
||||
* Binary Tree search
|
||||
***************************************/
|
||||
/** ZSTD_insertBt1() : add one or multiple positions to tree.
|
||||
* ip : assumed <= iend-8 .
|
||||
* @return : nb of positions added */
|
||||
static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
|
||||
const BYTE* const ip, const BYTE* const iend,
|
||||
U32 nbCompares, U32 const mls, U32 const extDict)
|
||||
{
|
||||
U32* const hashTable = zc->hashTable;
|
||||
U32 const hashLog = zc->appliedParams.cParams.hashLog;
|
||||
size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
|
||||
U32* const bt = zc->chainTable;
|
||||
U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
|
||||
U32 const btMask = (1 << btLog) - 1;
|
||||
U32 matchIndex = hashTable[h];
|
||||
size_t commonLengthSmaller=0, commonLengthLarger=0;
|
||||
const BYTE* const base = zc->base;
|
||||
const BYTE* const dictBase = zc->dictBase;
|
||||
const U32 dictLimit = zc->dictLimit;
|
||||
const BYTE* const dictEnd = dictBase + dictLimit;
|
||||
const BYTE* const prefixStart = base + dictLimit;
|
||||
const BYTE* match;
|
||||
const U32 current = (U32)(ip-base);
|
||||
const U32 btLow = btMask >= current ? 0 : current - btMask;
|
||||
U32* smallerPtr = bt + 2*(current&btMask);
|
||||
U32* largerPtr = smallerPtr + 1;
|
||||
U32 dummy32; /* to be nullified at the end */
|
||||
U32 const windowLow = zc->lowLimit;
|
||||
U32 matchEndIdx = current+8+1;
|
||||
size_t bestLength = 8;
|
||||
#ifdef ZSTD_C_PREDICT
|
||||
U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
|
||||
U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
|
||||
predictedSmall += (predictedSmall>0);
|
||||
predictedLarge += (predictedLarge>0);
|
||||
#endif /* ZSTD_C_PREDICT */
|
||||
|
||||
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
|
||||
|
||||
assert(ip <= iend-8); /* required for h calculation */
|
||||
hashTable[h] = current; /* Update Hash Table */
|
||||
|
||||
while (nbCompares-- && (matchIndex > windowLow)) {
|
||||
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
||||
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
||||
assert(matchIndex < current);
|
||||
|
||||
#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
|
||||
const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
|
||||
if (matchIndex == predictedSmall) {
|
||||
/* no need to check length, result known */
|
||||
*smallerPtr = matchIndex;
|
||||
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
|
||||
smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
|
||||
matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
|
||||
predictedSmall = predictPtr[1] + (predictPtr[1]>0);
|
||||
continue;
|
||||
}
|
||||
if (matchIndex == predictedLarge) {
|
||||
*largerPtr = matchIndex;
|
||||
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
|
||||
largerPtr = nextPtr;
|
||||
matchIndex = nextPtr[0];
|
||||
predictedLarge = predictPtr[0] + (predictPtr[0]>0);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
|
||||
assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */
|
||||
match = base + matchIndex;
|
||||
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
|
||||
} else {
|
||||
match = dictBase + matchIndex;
|
||||
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
|
||||
if (matchIndex+matchLength >= dictLimit)
|
||||
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
|
||||
}
|
||||
|
||||
if (matchLength > bestLength) {
|
||||
bestLength = matchLength;
|
||||
if (matchLength > matchEndIdx - matchIndex)
|
||||
matchEndIdx = matchIndex + (U32)matchLength;
|
||||
}
|
||||
|
||||
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
|
||||
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
|
||||
}
|
||||
|
||||
if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
|
||||
/* match is smaller than current */
|
||||
*smallerPtr = matchIndex; /* update smaller idx */
|
||||
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
|
||||
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
|
||||
smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
|
||||
matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
|
||||
} else {
|
||||
/* match is larger than current */
|
||||
*largerPtr = matchIndex;
|
||||
commonLengthLarger = matchLength;
|
||||
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
|
||||
largerPtr = nextPtr;
|
||||
matchIndex = nextPtr[0];
|
||||
} }
|
||||
|
||||
*smallerPtr = *largerPtr = 0;
|
||||
if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
|
||||
assert(matchEndIdx > current + 8);
|
||||
return matchEndIdx - (current + 8);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
void ZSTD_updateTree_internal(ZSTD_CCtx* zc,
|
||||
const BYTE* const ip, const BYTE* const iend,
|
||||
const U32 nbCompares, const U32 mls, const U32 extDict)
|
||||
{
|
||||
const BYTE* const base = zc->base;
|
||||
U32 const target = (U32)(ip - base);
|
||||
U32 idx = zc->nextToUpdate;
|
||||
DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)",
|
||||
idx, target, extDict);
|
||||
|
||||
while(idx < target)
|
||||
idx += ZSTD_insertBt1(zc, base+idx, iend, nbCompares, mls, extDict);
|
||||
zc->nextToUpdate = target;
|
||||
}
|
||||
|
||||
void ZSTD_updateTree(ZSTD_CCtx* zc,
|
||||
const BYTE* const ip, const BYTE* const iend,
|
||||
const U32 nbCompares, const U32 mls)
|
||||
{
|
||||
ZSTD_updateTree_internal(zc, ip, iend, nbCompares, mls, 0 /*extDict*/);
|
||||
}
|
||||
|
||||
void ZSTD_updateTree_extDict(ZSTD_CCtx* zc,
|
||||
const BYTE* const ip, const BYTE* const iend,
|
||||
const U32 nbCompares, const U32 mls)
|
||||
{
|
||||
ZSTD_updateTree_internal(zc, ip, iend, nbCompares, mls, 1 /*extDict*/);
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
U32 ZSTD_insertBtAndGetAllMatches (
|
||||
ZSTD_CCtx* zc,
|
||||
|
@ -15,8 +15,11 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "mem.h" /* U32 */
|
||||
#include "zstd.h" /* ZSTD_CCtx, size_t */
|
||||
|
||||
void ZSTD_updateTree(ZSTD_CCtx* ctx, const BYTE* ip, const BYTE* iend, U32 nbCompares, U32 mls); /* used in ZSTD_loadDictionaryContent() */
|
||||
|
||||
size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user