Widen ZSTD_wildcopy to 32 bytes

dev
Nick Terrell 2019-09-20 00:52:15 -07:00
parent efd37a64ea
commit cdad7fa512
2 changed files with 18 additions and 16 deletions

View File

@ -197,7 +197,7 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
#define WILDCOPY_OVERLENGTH 16
#define WILDCOPY_OVERLENGTH 32
#define WILDCOPY_VECLEN 16
typedef enum {
@ -237,11 +237,11 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
* On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%.
*/
COPY16(op, ip);
if (op >= oend) return;
COPY16(op, ip);
if (op >= oend) return;
do {
COPY16(op, ip);
COPY16(op, ip);
}
while (op < oend);
}
@ -257,7 +257,7 @@ MEM_STATIC void ZSTD_wildcopy8(void* dst, const void* src, ptrdiff_t length)
BYTE* op = (BYTE*)dst;
BYTE* const oend = (BYTE*)op + length;
do {
COPY8(op, ip)
COPY8(op, ip);
} while (op < oend);
}

View File

@ -724,12 +724,14 @@ size_t ZSTD_execSequence(BYTE* op,
assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
/* Copy Literals:
* Split out litLength <= 16 since it is nearly always true. +1% on gcc-9.
* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
* We likely don't need the full 32-byte wildcopy.
*/
if (sequence.litLength <= 16)
ZSTD_copy16(op, *litPtr);
else
ZSTD_wildcopy(op, (*litPtr), sequence.litLength, ZSTD_no_overlap);
assert(WILDCOPY_OVERLENGTH >= 16);
ZSTD_copy16(op, (*litPtr));
if (sequence.litLength > 16) {
ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
}
op = oLitEnd;
*litPtr = iLitEnd; /* update for next sequence */
@ -755,18 +757,18 @@ size_t ZSTD_execSequence(BYTE* op,
assert(match >= prefixStart);
assert(sequence.matchLength >= 1);
/* Nearly all offsets are >= 16 bytes, which means we can use wildcopy
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
* without overlap checking.
*/
if (sequence.offset >= 16) {
/* Split out matchLength <= 16 since it is nearly always true. +1% on gcc-9. */
if (sequence.matchLength <= 16)
ZSTD_copy16(op, match);
else
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
if (sequence.offset >= WILDCOPY_VECLEN) {
/* Split out matchLength <= 32 since it is nearly always true. +1% on gcc-9.
* We copy 32 bytes here since matches are generally longer than literals.
* In silesia, for example ~10% of matches are longer than 16 bytes.
*/
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
return sequenceLength;
}
assert(sequence.offset < 16);
assert(sequence.offset < WILDCOPY_VECLEN);
/* Copy 8 bytes and spread the offset to be >= 8. */
ZSTD_overlapCopy8(&op, &match, sequence.offset);