Optimize by prefetching on aarch64
parent
66607d0eac
commit
7201980650
|
@ -114,6 +114,9 @@
|
|||
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
|
||||
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
|
||||
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
|
||||
# elif defined(__aarch64__)
|
||||
# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
|
||||
# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
|
||||
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
|
||||
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
|
||||
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
|
||||
|
|
|
@ -198,6 +198,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
|||
} }
|
||||
|
||||
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
||||
#if defined(__aarch64__)
|
||||
PREFETCH_L1(ip+256);
|
||||
#endif
|
||||
continue;
|
||||
|
||||
_search_next_long:
|
||||
|
|
|
@ -102,6 +102,11 @@ ZSTD_compressBlock_fast_generic(
|
|||
const BYTE* match0 = base + matchIndex0;
|
||||
const BYTE* match1 = base + matchIndex1;
|
||||
U32 offcode;
|
||||
|
||||
#if defined(__aarch64__)
|
||||
PREFETCH_L1(ip0+256);
|
||||
#endif
|
||||
|
||||
hashTable[h0] = current0; /* update hash table */
|
||||
hashTable[h1] = current1; /* update hash table */
|
||||
|
||||
|
|
Loading…
Reference in New Issue