From de68c2ff10f3800f5aec60abd26e63f00b5f32bc Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 7 Feb 2018 14:22:35 -0800 Subject: [PATCH] Merged ZSTD_preserveUnsortedMark() into ZSTD_reduceIndex() as it's faster, due to one memory scan instead of two (confirmed by microbenchmark). Note : as ZSTD_reduceIndex() is rarely invoked, it does not translate into a visible gain. Consider it an exercise in auto-vectorization and micro-benchmarking. --- lib/compress/zstd_compress.c | 39 ++++++++++++++++--------- lib/compress/zstd_compress_internal.h | 6 ++++ lib/compress/zstd_lazy.c | 42 --------------------------- 3 files changed, 32 insertions(+), 55 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 4a825f65..3f7ac849 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1223,32 +1223,44 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long #define ZSTD_ROWSIZE 16 -/*! ZSTD_reduceTable_internal() : - * reduce table indexes by `reducerValue` - * presume table size is a multiple of ZSTD_ROWSIZE. - * Helps auto-vectorization */ -static void ZSTD_reduceTable_internal (U32* const table, int const nbRows, U32 const reducerValue) +/*! ZSTD_reduceTable() : + * reduce table indexes by `reducerValue`, or squash to zero. + * PreserveMark preserves "unsorted mark" for btlazy2 strategy. + * It must be set to a clear 0/1 value, to remove branch during inlining. + * Presume table size is a multiple of ZSTD_ROWSIZE + * to help auto-vectorization */ +FORCE_INLINE_TEMPLATE void +ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) { + int const nbRows = (int)size / ZSTD_ROWSIZE; int cellNb = 0; int rowNb; + assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ + assert(size < (1U<<31)); /* can be casted to int */ for (rowNb=0 ; rowNb < nbRows ; rowNb++) { int column; for (column=0; columnappliedParams.cParams.strategy != ZSTD_fast) { U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog; if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2) - ZSTD_preserveUnsortedMark(ms->chainTable, chainSize, reducerValue); - ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); + ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); + else + ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); } if (ms->hashLog3) { diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index cf593658..35b153fa 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -32,6 +32,12 @@ extern "C" { ***************************************/ static const U32 g_searchStrength = 8; #define HASH_READ_SIZE 8 +#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted". + It could be confused for a real successor at index "1", if sorted as larger than its predecessor. + It's not a big deal though : candidate will just be sorted again. + Additionnally, candidate position 1 will be lost. + But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */ /*-************************************* diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 844afc05..8252135d 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -15,48 +15,6 @@ /*-************************************* * Binary Tree search ***************************************/ -#define ZSTD_DUBT_UNSORTED_MARK 1 /* note : index 1 will now be confused with "unsorted" if sorted as larger than its predecessor. - It's not a big deal though : the candidate will just be considered unsorted, and be sorted again. - Additionnally, candidate position 1 will be lost. - But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. - The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */ - -/*! ZSTD_preserveUnsortedMark() : - * pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK before ZSTD_reduceTable() - * so that combined operation preserves its value. - * Without it, ZSTD_DUBT_UNSORTED_MARK==1 would be squashed to 0. - * As a consequence, the list of unsorted elements would stop at first element, - * removing candidates, resulting in a very small loss to compression ratio - * (since overflow protection with ZSTD_reduceTable() is relatively rare). - * - * Another potential risk is that a position will be promoted from *unsorted* - * to *sorted=>smaller:0*, meaning next candidate will be considered smaller. - * This could be wrong, and result in data corruption. - * - * On second thought, this corruption might be impossible, - * because unsorted elements stand at the beginning of the list, - * and squashing to zero reduces the list to a single element, - * which needs to be sorted anyway. - * I haven't spent much thoughts into this possible scenario, - * and just felt it was safer to implement ZSTD_preserveUnsortedMark() - * - * `size` : must be a positive multiple of ZSTD_ROWSIZE */ -#define ZSTD_ROWSIZE 16 -void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue) -{ - int cellNb = 0; - U32 const nbRows = size / ZSTD_ROWSIZE; - U32 rowNb; - assert((size % ZSTD_ROWSIZE) == 0); - for (rowNb=0 ; rowNb < nbRows ; rowNb++) { - int column; - for (column=0; column