Split Lookups in Hash Cache and Chain Table into Two Loops

Sliiiight speedup.
This commit is contained in:
W. Felix Handte 2020-08-20 12:31:25 -04:00
parent 9e83c782f8
commit e8b4011b52

View File

@ -570,30 +570,63 @@ size_t ZSTD_HcFindBestMatch_generic (
}
if (dictMode == ZSTD_dedicatedDictSearch) {
const U32 ddsChainSize = (1 << dms->cParams.chainLog);
const U32 ddsChainMask = ddsChainSize - 1;
const U32 ddsLowestIndex = dms->window.dictLimit;
const BYTE* const ddsBase = dms->window.base;
const BYTE* const ddsEnd = dms->window.nextSrc;
const U32 ddsSize = (U32)(ddsEnd - ddsBase);
const U32 ddsIndexDelta = dictLimit - ddsSize;
const U32 ddsMinChain = ddsSize > ddsChainSize ? ddsSize - ddsChainSize : 0;
const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
U32 attemptNb;
const U32 ddsChainSize = (1 << dms->cParams.chainLog);
const U32 ddsChainMask = ddsChainSize - 1;
const U32 ddsLowestIndex = dms->window.dictLimit;
const BYTE* const ddsBase = dms->window.base;
const BYTE* const ddsEnd = dms->window.nextSrc;
const U32 ddsSize = (U32)(ddsEnd - ddsBase);
const U32 ddsIndexDelta = dictLimit - ddsSize;
const U32 ddsMinChain = ddsSize > ddsChainSize ? ddsSize - ddsChainSize : 0;
const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
const U32 bucketLimit = nbAttempts < bucketSize ? nbAttempts : bucketSize;
U32 ddsAttempt;
matchIndex = dms->hashTable[ddsIdx];
/* Empty chain */
if (!matchIndex)
return ml;
for (attemptNb = 0; attemptNb < bucketSize; attemptNb++) {
PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + attemptNb]);
for (ddsAttempt = 0; ddsAttempt < bucketSize; ddsAttempt++) {
PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
}
for (attemptNb = 1; (matchIndex>ddsLowestIndex) & (nbAttempts>0) ; nbAttempts--, attemptNb++) {
for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
size_t currentMl=0;
const BYTE* const match = ddsBase + matchIndex;
const BYTE* match;
matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
match = ddsBase + matchIndex;
if (matchIndex < ddsLowestIndex) {
return ml;
}
assert(match+4 <= ddsEnd);
if (MEM_read32(match) == MEM_read32(ip)) {
/* assumption : matchIndex <= dictLimit-4 (by table construction) */
currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
}
/* save best solution */
if (currentMl > ml) {
ml = currentMl;
*offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
if (ip+currentMl == iLimit) {
/* best possible, avoids read overflow on next attempt */
return ml;
}
}
if (matchIndex <= ddsMinChain) {
return ml;
}
}
for ( ; (ddsAttempt < nbAttempts) & (matchIndex >= ddsMinChain); ddsAttempt++) {
size_t currentMl=0;
const BYTE* match;
matchIndex = dms->chainTable[matchIndex & ddsChainMask];
match = ddsBase + matchIndex;
if (matchIndex < ddsLowestIndex) {
break;
}
assert(match+4 <= ddsEnd);
if (MEM_read32(match) == MEM_read32(ip)) {
/* assumption : matchIndex <= dictLimit-4 (by table construction) */
@ -606,16 +639,6 @@ size_t ZSTD_HcFindBestMatch_generic (
*offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
}
if (matchIndex <= ddsMinChain) {
break;
}
if (attemptNb < bucketSize) {
matchIndex = dms->hashTable[ddsIdx + attemptNb];
} else {
matchIndex = dms->chainTable[matchIndex & ddsChainMask];
}
}
} else if (dictMode == ZSTD_dictMatchState) {
const U32* const dmsChainTable = dms->chainTable;