ZSTD_copy16() uses SSE2 instructions

This accelerates the decompression speed of MSVC build.
dev
Ma Lin 2021-10-28 18:53:12 +08:00
parent 23dd28df67
commit b10357ce65
1 changed files with 7 additions and 1 deletions

View File

@ -176,11 +176,17 @@ static void ZSTD_copy8(void* dst, const void* src) {
ZSTD_memcpy(dst, src, 8);
#endif
}
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
/* Need to use memmove here since the literal buffer can now be located within
the dst buffer. In circumstances where the op "catches up" to where the
literal buffer is, there can be partial overlaps in this call on the final
copy if the literal is being shifted by less than 16 bytes. */
static void ZSTD_copy16(void* dst, const void* src) {
#if defined(ZSTD_ARCH_ARM_NEON)
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
#elif defined(ZSTD_ARCH_X86_SSE2)
_mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
#else
ZSTD_memmove(dst, src, 16);
#endif