Merged ZSTD_preserveUnsortedMark() into ZSTD_reduceIndex()
as it's faster, due to one memory scan instead of two (confirmed by microbenchmark). Note : as ZSTD_reduceIndex() is rarely invoked, it does not translate into a visible gain. Consider it an exercise in auto-vectorization and micro-benchmarking.
This commit is contained in:
parent
0170cf9a7a
commit
de68c2ff10
@ -1223,32 +1223,44 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
|
||||
|
||||
|
||||
#define ZSTD_ROWSIZE 16
|
||||
/*! ZSTD_reduceTable_internal() :
|
||||
* reduce table indexes by `reducerValue`
|
||||
* presume table size is a multiple of ZSTD_ROWSIZE.
|
||||
* Helps auto-vectorization */
|
||||
static void ZSTD_reduceTable_internal (U32* const table, int const nbRows, U32 const reducerValue)
|
||||
/*! ZSTD_reduceTable() :
|
||||
* reduce table indexes by `reducerValue`, or squash to zero.
|
||||
* PreserveMark preserves "unsorted mark" for btlazy2 strategy.
|
||||
* It must be set to a clear 0/1 value, to remove branch during inlining.
|
||||
* Presume table size is a multiple of ZSTD_ROWSIZE
|
||||
* to help auto-vectorization */
|
||||
FORCE_INLINE_TEMPLATE void
|
||||
ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
|
||||
{
|
||||
int const nbRows = (int)size / ZSTD_ROWSIZE;
|
||||
int cellNb = 0;
|
||||
int rowNb;
|
||||
assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
|
||||
assert(size < (1U<<31)); /* can be casted to int */
|
||||
for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
|
||||
int column;
|
||||
for (column=0; column<ZSTD_ROWSIZE; column++) {
|
||||
if (preserveMark) {
|
||||
U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
|
||||
table[cellNb] += adder;
|
||||
}
|
||||
if (table[cellNb] < reducerValue) table[cellNb] = 0;
|
||||
else table[cellNb] -= reducerValue;
|
||||
cellNb++;
|
||||
} }
|
||||
}
|
||||
|
||||
/*! ZSTD_reduceTable() :
|
||||
* reduce table indexes by `reducerValue` */
|
||||
static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
|
||||
static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
|
||||
{
|
||||
assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
|
||||
assert(size < (1U<<31)); /* can be casted to int */
|
||||
ZSTD_reduceTable_internal(table, size/ZSTD_ROWSIZE, reducerValue);
|
||||
ZSTD_reduceTable_internal(table, size, reducerValue, 0);
|
||||
}
|
||||
|
||||
static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
|
||||
{
|
||||
ZSTD_reduceTable_internal(table, size, reducerValue, 1);
|
||||
}
|
||||
|
||||
|
||||
/*! ZSTD_ldm_reduceTable() :
|
||||
* reduce table indexes by `reducerValue` */
|
||||
static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
|
||||
@ -1273,8 +1285,9 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
|
||||
if (zc->appliedParams.cParams.strategy != ZSTD_fast) {
|
||||
U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
|
||||
if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2)
|
||||
ZSTD_preserveUnsortedMark(ms->chainTable, chainSize, reducerValue);
|
||||
ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
|
||||
ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
|
||||
else
|
||||
ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
|
||||
}
|
||||
|
||||
if (ms->hashLog3) {
|
||||
|
@ -32,6 +32,12 @@ extern "C" {
|
||||
***************************************/
|
||||
static const U32 g_searchStrength = 8;
|
||||
#define HASH_READ_SIZE 8
|
||||
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
|
||||
It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
|
||||
It's not a big deal though : candidate will just be sorted again.
|
||||
Additionnally, candidate position 1 will be lost.
|
||||
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
|
||||
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */
|
||||
|
||||
|
||||
/*-*************************************
|
||||
|
@ -15,48 +15,6 @@
|
||||
/*-*************************************
|
||||
* Binary Tree search
|
||||
***************************************/
|
||||
#define ZSTD_DUBT_UNSORTED_MARK 1 /* note : index 1 will now be confused with "unsorted" if sorted as larger than its predecessor.
|
||||
It's not a big deal though : the candidate will just be considered unsorted, and be sorted again.
|
||||
Additionnally, candidate position 1 will be lost.
|
||||
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
|
||||
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */
|
||||
|
||||
/*! ZSTD_preserveUnsortedMark() :
|
||||
* pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK before ZSTD_reduceTable()
|
||||
* so that combined operation preserves its value.
|
||||
* Without it, ZSTD_DUBT_UNSORTED_MARK==1 would be squashed to 0.
|
||||
* As a consequence, the list of unsorted elements would stop at first element,
|
||||
* removing candidates, resulting in a very small loss to compression ratio
|
||||
* (since overflow protection with ZSTD_reduceTable() is relatively rare).
|
||||
*
|
||||
* Another potential risk is that a position will be promoted from *unsorted*
|
||||
* to *sorted=>smaller:0*, meaning next candidate will be considered smaller.
|
||||
* This could be wrong, and result in data corruption.
|
||||
*
|
||||
* On second thought, this corruption might be impossible,
|
||||
* because unsorted elements stand at the beginning of the list,
|
||||
* and squashing to zero reduces the list to a single element,
|
||||
* which needs to be sorted anyway.
|
||||
* I haven't spent much thoughts into this possible scenario,
|
||||
* and just felt it was safer to implement ZSTD_preserveUnsortedMark()
|
||||
*
|
||||
* `size` : must be a positive multiple of ZSTD_ROWSIZE */
|
||||
#define ZSTD_ROWSIZE 16
|
||||
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue)
|
||||
{
|
||||
int cellNb = 0;
|
||||
U32 const nbRows = size / ZSTD_ROWSIZE;
|
||||
U32 rowNb;
|
||||
assert((size % ZSTD_ROWSIZE) == 0);
|
||||
for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
|
||||
int column;
|
||||
for (column=0; column<ZSTD_ROWSIZE; column++) {
|
||||
U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
|
||||
table[cellNb] += adder;
|
||||
cellNb++;
|
||||
} }
|
||||
}
|
||||
|
||||
|
||||
void ZSTD_updateDUBT(
|
||||
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
|
||||
|
Loading…
x
Reference in New Issue
Block a user