ZSTD_copy16() uses SSE2 instructions
This accelerates the decompression speed of MSVC build.
This commit is contained in:
parent
23dd28df67
commit
b10357ce65
@ -176,11 +176,17 @@ static void ZSTD_copy8(void* dst, const void* src) {
|
|||||||
ZSTD_memcpy(dst, src, 8);
|
ZSTD_memcpy(dst, src, 8);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
|
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
|
||||||
|
|
||||||
|
/* Need to use memmove here since the literal buffer can now be located within
|
||||||
|
the dst buffer. In circumstances where the op "catches up" to where the
|
||||||
|
literal buffer is, there can be partial overlaps in this call on the final
|
||||||
|
copy if the literal is being shifted by less than 16 bytes. */
|
||||||
static void ZSTD_copy16(void* dst, const void* src) {
|
static void ZSTD_copy16(void* dst, const void* src) {
|
||||||
#if defined(ZSTD_ARCH_ARM_NEON)
|
#if defined(ZSTD_ARCH_ARM_NEON)
|
||||||
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
|
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
|
||||||
|
#elif defined(ZSTD_ARCH_X86_SSE2)
|
||||||
|
_mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
|
||||||
#else
|
#else
|
||||||
ZSTD_memmove(dst, src, 16);
|
ZSTD_memmove(dst, src, 16);
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user