ZSTD_copy16() uses SSE2 instructions
This accelerates the decompression speed of MSVC build.dev
parent
23dd28df67
commit
b10357ce65
|
@ -176,11 +176,17 @@ static void ZSTD_copy8(void* dst, const void* src) {
|
|||
ZSTD_memcpy(dst, src, 8);
|
||||
#endif
|
||||
}
|
||||
|
||||
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
|
||||
|
||||
/* Need to use memmove here since the literal buffer can now be located within
|
||||
the dst buffer. In circumstances where the op "catches up" to where the
|
||||
literal buffer is, there can be partial overlaps in this call on the final
|
||||
copy if the literal is being shifted by less than 16 bytes. */
|
||||
static void ZSTD_copy16(void* dst, const void* src) {
|
||||
#if defined(ZSTD_ARCH_ARM_NEON)
|
||||
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
|
||||
#elif defined(ZSTD_ARCH_X86_SSE2)
|
||||
_mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
|
||||
#else
|
||||
ZSTD_memmove(dst, src, 16);
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue