From 7201980650c454fb5d9b4bcedd786b73b1076712 Mon Sep 17 00:00:00 2001 From: caoyzh Date: Sat, 14 Mar 2020 15:25:09 +0800 Subject: [PATCH] Optimize by prefetching on aarch64 --- lib/common/compiler.h | 3 +++ lib/compress/zstd_double_fast.c | 3 +++ lib/compress/zstd_fast.c | 5 +++++ 3 files changed, 11 insertions(+) diff --git a/lib/common/compiler.h b/lib/common/compiler.h index 4213dfcf..5fd92f04 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -114,6 +114,9 @@ # include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ # define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) # define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) +# elif defined(__aarch64__) +# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) +# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 2e657f7c..ef09a696 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -198,6 +198,9 @@ size_t ZSTD_compressBlock_doubleFast_generic( } } ip += ((ip-anchor) >> kSearchStrength) + 1; +#if defined(__aarch64__) + PREFETCH_L1(ip+256); +#endif continue; _search_next_long: diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 09e220b3..556dc5a9 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -102,6 +102,11 @@ ZSTD_compressBlock_fast_generic( const BYTE* match0 = base + matchIndex0; const BYTE* match1 = base + matchIndex1; U32 offcode; + +#if defined(__aarch64__) + PREFETCH_L1(ip0+256); +#endif + hashTable[h0] = current0; /* update hash table */ hashTable[h1] = current1; /* update hash table */