changed PREFETCH() macro into PREFETCH_L2()

which is more accurate
dev
Yann Collet 2018-11-12 17:05:32 -08:00
parent 7b0c551bff
commit 626040ab53
2 changed files with 23 additions and 16 deletions

View File

@ -92,18 +92,18 @@
* can be disabled, by declaring NO_PREFETCH build macro */
#if defined(NO_PREFETCH)
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
# define PREFETCH(ptr) (void)(ptr) /* disabled */
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
#else
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
# define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
# define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
# else
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
# define PREFETCH(ptr) (void)(ptr) /* disabled */
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
# endif
#endif /* NO_PREFETCH */
@ -114,7 +114,7 @@
size_t const _size = (size_t)(s); \
size_t _pos; \
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
PREFETCH(_ptr + _pos); \
PREFETCH_L2(_ptr + _pos); \
} \
}

View File

@ -63,12 +63,13 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
static void
ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
U32 current, const BYTE* inputEnd,
U32 nbCompares, U32 btLow, const ZSTD_dictMode_e dictMode)
U32 nbCompares, U32 btLow,
const ZSTD_dictMode_e dictMode)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const bt = ms->chainTable;
U32 const btLog = cParams->chainLog - 1;
U32 const btMask = (1 << btLog) - 1;
U32* const bt = ms->chainTable;
U32 const btLog = cParams->chainLog - 1;
U32 const btMask = (1 << btLog) - 1;
size_t commonLengthSmaller=0, commonLengthLarger=0;
const BYTE* const base = ms->window.base;
const BYTE* const dictBase = ms->window.dictBase;
@ -80,7 +81,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
const BYTE* match;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = smallerPtr + 1;
U32 matchIndex = *smallerPtr;
U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
U32 dummy32; /* to be nullified at the end */
U32 const windowLow = ms->window.lowLimit;
@ -93,6 +94,9 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
U32* const nextPtr = bt + 2*(matchIndex & btMask);
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
assert(matchIndex < current);
/* note : all candidates are now supposed sorted,
* but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
* when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
if ( (dictMode != ZSTD_extDict)
|| (matchIndex+matchLength >= dictLimit) /* both in current segment*/
@ -108,7 +112,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
match = dictBase + matchIndex;
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
if (matchIndex+matchLength >= dictLimit)
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
match = base + matchIndex; /* preparation for next read of match[matchLength] */
}
DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
@ -258,7 +262,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
&& (nbCandidates > 1) ) {
DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
matchIndex);
*unsortedMark = previousCandidate;
*unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */
previousCandidate = matchIndex;
matchIndex = *nextCandidate;
nextCandidate = bt + 2*(matchIndex&btMask);
@ -266,11 +270,13 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
nbCandidates --;
}
/* nullify last candidate if it's still unsorted
* simplification, detrimental to compression ratio, beneficial for speed */
if ( (matchIndex > unsortLimit)
&& (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
matchIndex);
*nextCandidate = *unsortedMark = 0; /* nullify next candidate if it's still unsorted (note : simplification, detrimental to compression ratio, beneficial for speed) */
*nextCandidate = *unsortedMark = 0;
}
/* batch sort stacked candidates */
@ -285,14 +291,14 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
}
/* find longest match */
{ size_t commonLengthSmaller=0, commonLengthLarger=0;
{ size_t commonLengthSmaller = 0, commonLengthLarger = 0;
const BYTE* const dictBase = ms->window.dictBase;
const U32 dictLimit = ms->window.dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = bt + 2*(current&btMask) + 1;
U32 matchEndIdx = current+8+1;
U32 matchEndIdx = current + 8 + 1;
U32 dummy32; /* to be nullified at the end */
size_t bestLength = 0;
@ -433,7 +439,7 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
/* *********************************
* Hash Chain
***********************************/
#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
/* Update chains up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */
@ -497,6 +503,7 @@ size_t ZSTD_HcFindBestMatch_generic (
size_t currentMl=0;
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
const BYTE* const match = base + matchIndex;
assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
if (match[ml] == ip[ml]) /* potentially better */
currentMl = ZSTD_count(ip, match, iLimit);
} else {