diff --git a/lib/common/compiler.h b/lib/common/compiler.h index 31eb1ccd..07f875e4 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -89,7 +89,13 @@ #endif /* prefetch - * can be disabled, by declaring NO_PREFETCH macro */ + * can be disabled, by declaring NO_PREFETCH macro + * All prefetch invocations use a single default locality 2, + * generating instruction prefetcht1, + * which, according to Intel, means "load data into L2 cache". + * This is a good enough "middle ground" for the time being, + * though in theory, it would be better to specialize locality depending on data being prefetched. + * Tests could not determine any sensible difference based on locality value. */ #if defined(NO_PREFETCH) # define PREFETCH(ptr) (void)(ptr) /* disabled */ #else diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 9d740833..1382c9c7 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -56,7 +56,8 @@ * Dependencies *********************************************************/ #include /* memcpy, memmove, memset */ -#include "cpu.h" /* prefetch */ +#include "compiler.h" /* prefetch */ +#include "cpu.h" /* bmi2 */ #include "mem.h" /* low level memory routines */ #define FSE_STATIC_LINKING_ONLY #include "fse.h"