Merged ZSTD_preserveUnsortedMark() into ZSTD_reduceIndex()

as it's faster, due to one memory scan instead of two
(confirmed by microbenchmark).

Note : as ZSTD_reduceIndex() is rarely invoked,
it does not translate into a visible gain.
Consider it an exercise in auto-vectorization and micro-benchmarking.
dev
Yann Collet 2018-02-07 14:22:35 -08:00
parent 0170cf9a7a
commit de68c2ff10
3 changed files with 32 additions and 55 deletions

View File

@ -1223,32 +1223,44 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
#define ZSTD_ROWSIZE 16
/*! ZSTD_reduceTable_internal() :
* reduce table indexes by `reducerValue`
* presume table size is a multiple of ZSTD_ROWSIZE.
* Helps auto-vectorization */
static void ZSTD_reduceTable_internal (U32* const table, int const nbRows, U32 const reducerValue)
/*! ZSTD_reduceTable() :
* reduce table indexes by `reducerValue`, or squash to zero.
* PreserveMark preserves "unsorted mark" for btlazy2 strategy.
* It must be set to a clear 0/1 value, to remove branch during inlining.
* Presume table size is a multiple of ZSTD_ROWSIZE
* to help auto-vectorization */
FORCE_INLINE_TEMPLATE void
ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
{
int const nbRows = (int)size / ZSTD_ROWSIZE;
int cellNb = 0;
int rowNb;
assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
assert(size < (1U<<31)); /* can be casted to int */
for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
int column;
for (column=0; column<ZSTD_ROWSIZE; column++) {
if (preserveMark) {
U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
table[cellNb] += adder;
}
if (table[cellNb] < reducerValue) table[cellNb] = 0;
else table[cellNb] -= reducerValue;
cellNb++;
} }
}
/*! ZSTD_reduceTable() :
* reduce table indexes by `reducerValue` */
static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
{
assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
assert(size < (1U<<31)); /* can be casted to int */
ZSTD_reduceTable_internal(table, size/ZSTD_ROWSIZE, reducerValue);
ZSTD_reduceTable_internal(table, size, reducerValue, 0);
}
static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
{
ZSTD_reduceTable_internal(table, size, reducerValue, 1);
}
/*! ZSTD_ldm_reduceTable() :
* reduce table indexes by `reducerValue` */
static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
@ -1273,8 +1285,9 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
if (zc->appliedParams.cParams.strategy != ZSTD_fast) {
U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2)
ZSTD_preserveUnsortedMark(ms->chainTable, chainSize, reducerValue);
ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
else
ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
}
if (ms->hashLog3) {

View File

@ -32,6 +32,12 @@ extern "C" {
***************************************/
static const U32 g_searchStrength = 8;
#define HASH_READ_SIZE 8
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
It's not a big deal though : candidate will just be sorted again.
Additionnally, candidate position 1 will be lost.
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */
/*-*************************************

View File

@ -15,48 +15,6 @@
/*-*************************************
* Binary Tree search
***************************************/
#define ZSTD_DUBT_UNSORTED_MARK 1 /* note : index 1 will now be confused with "unsorted" if sorted as larger than its predecessor.
It's not a big deal though : the candidate will just be considered unsorted, and be sorted again.
Additionnally, candidate position 1 will be lost.
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */
/*! ZSTD_preserveUnsortedMark() :
* pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK before ZSTD_reduceTable()
* so that combined operation preserves its value.
* Without it, ZSTD_DUBT_UNSORTED_MARK==1 would be squashed to 0.
* As a consequence, the list of unsorted elements would stop at first element,
* removing candidates, resulting in a very small loss to compression ratio
* (since overflow protection with ZSTD_reduceTable() is relatively rare).
*
* Another potential risk is that a position will be promoted from *unsorted*
* to *sorted=>smaller:0*, meaning next candidate will be considered smaller.
* This could be wrong, and result in data corruption.
*
* On second thought, this corruption might be impossible,
* because unsorted elements stand at the beginning of the list,
* and squashing to zero reduces the list to a single element,
* which needs to be sorted anyway.
* I haven't spent much thoughts into this possible scenario,
* and just felt it was safer to implement ZSTD_preserveUnsortedMark()
*
* `size` : must be a positive multiple of ZSTD_ROWSIZE */
#define ZSTD_ROWSIZE 16
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue)
{
int cellNb = 0;
U32 const nbRows = size / ZSTD_ROWSIZE;
U32 rowNb;
assert((size % ZSTD_ROWSIZE) == 0);
for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
int column;
for (column=0; column<ZSTD_ROWSIZE; column++) {
U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
table[cellNb] += adder;
cellNb++;
} }
}
void ZSTD_updateDUBT(
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,