From 44c65da97ea36e4ba8ca6968979123fafca5485c Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 20 Sep 2019 12:23:25 -0700 Subject: [PATCH] Remove literals overread in ZSTD_storeSeq() for ~neutral perf --- lib/common/zstd_internal.h | 2 +- lib/compress/zstd_compress_internal.h | 36 ++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 522c1fda..9dc9c09e 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -214,7 +214,7 @@ typedef enum { * The src buffer must be before the dst buffer. */ MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE -void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) +void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype) { ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; const BYTE* ip = (const BYTE*)src; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 83221757..e80686cc 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -340,6 +340,21 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) return (srcSize >> minlog) + 2; } +/*! ZSTD_safecopyLiterals() : + * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. + * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single + * large copies. + */ +static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { + assert(iend > ilimit_w); + if (ip <= ilimit_w) { + ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); + op += ilimit_w - ip; + ip = ilimit_w; + } + while (ip < iend) *op++ = *ip++; +} + /*! ZSTD_storeSeq() : * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). @@ -348,6 +363,8 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) */ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) { + BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; + BYTE const* const litEnd = literals + litLength; #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) static const BYTE* g_start = NULL; if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ @@ -360,12 +377,19 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B /* copy Literals */ assert(seqStorePtr->maxNbLit <= 128 KB); assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); - /* We are guaranteed at least 8 bytes of literals space because of HASH_READ_SIZE. */ - assert(literals + litLength + HASH_READ_SIZE <= litLimit); - if (literals + litLength + WILDCOPY_OVERLENGTH <= litLimit) - ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap); - else - ZSTD_wildcopy8(seqStorePtr->lit, literals, (ptrdiff_t)litLength); + assert(literals + litLength <= litLimit); + if (litEnd <= litLimit_w) { + /* Common case we can use wildcopy. + * First copy 16 bytes, because literals are likely short. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(seqStorePtr->lit, literals); + if (litLength > 16) { + ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); + } + } else { + ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); + } seqStorePtr->lit += litLength; /* literal Length */