From 7ef6d7b36ca34eb4adef6f9780b0953d51643bb7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 5 May 2021 10:04:03 -0700 Subject: [PATCH] deeper prefetching pipeline for decompressSequencesLong pipeline increased from 4 to 8 slots. This change substantially improves decompression speed when there are long distance offsets. example with enwik9 compressed at level 22 : gcc-9 : 947 -> 1039 MB/s clang-10: 884 -> 946 MB/s I also checked the "cold dictionary" scenario, and found a smaller benefit, around ~2% (measurements are more noisy for this scenario). --- lib/decompress/zstd_decompress_block.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index b980339a..5419724d 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -1254,9 +1254,9 @@ ZSTD_decompressSequencesLong_body( /* Regen sequences */ if (nbSeq) { -#define STORED_SEQS 4 +#define STORED_SEQS 8 #define STORED_SEQS_MASK (STORED_SEQS-1) -#define ADVANCED_SEQS 4 +#define ADVANCED_SEQS STORED_SEQS seq_t sequences[STORED_SEQS]; int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); seqState_t seqState;