Remove literals overread in ZSTD_storeSeq() for ~neutral perf

dev
Nick Terrell 2019-09-20 12:23:25 -07:00
parent fde217df04
commit 44c65da97e
2 changed files with 31 additions and 7 deletions

View File

@ -214,7 +214,7 @@ typedef enum {
* The src buffer must be before the dst buffer. * The src buffer must be before the dst buffer.
*/ */
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
{ {
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;

View File

@ -340,6 +340,21 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
return (srcSize >> minlog) + 2; return (srcSize >> minlog) + 2;
} }
/*! ZSTD_safecopyLiterals() :
* memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
* Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
* large copies.
*/
static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
assert(iend > ilimit_w);
if (ip <= ilimit_w) {
ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
op += ilimit_w - ip;
ip = ilimit_w;
}
while (ip < iend) *op++ = *ip++;
}
/*! ZSTD_storeSeq() : /*! ZSTD_storeSeq() :
* Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
* `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
@ -348,6 +363,8 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
*/ */
MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
{ {
BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
BYTE const* const litEnd = literals + litLength;
#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
static const BYTE* g_start = NULL; static const BYTE* g_start = NULL;
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
@ -360,12 +377,19 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B
/* copy Literals */ /* copy Literals */
assert(seqStorePtr->maxNbLit <= 128 KB); assert(seqStorePtr->maxNbLit <= 128 KB);
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
/* We are guaranteed at least 8 bytes of literals space because of HASH_READ_SIZE. */ assert(literals + litLength <= litLimit);
assert(literals + litLength + HASH_READ_SIZE <= litLimit); if (litEnd <= litLimit_w) {
if (literals + litLength + WILDCOPY_OVERLENGTH <= litLimit) /* Common case we can use wildcopy.
ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap); * First copy 16 bytes, because literals are likely short.
else */
ZSTD_wildcopy8(seqStorePtr->lit, literals, (ptrdiff_t)litLength); assert(WILDCOPY_OVERLENGTH >= 16);
ZSTD_copy16(seqStorePtr->lit, literals);
if (litLength > 16) {
ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
}
} else {
ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
}
seqStorePtr->lit += litLength; seqStorePtr->lit += litLength;
/* literal Length */ /* literal Length */