Merged ZSTD_preserveUnsortedMark() into ZSTD_reduceIndex()
as it's faster, due to one memory scan instead of two (confirmed by microbenchmark). Note : as ZSTD_reduceIndex() is rarely invoked, it does not translate into a visible gain. Consider it an exercise in auto-vectorization and micro-benchmarking.
This commit is contained in:
parent
0170cf9a7a
commit
de68c2ff10
@ -1223,32 +1223,44 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
|
|||||||
|
|
||||||
|
|
||||||
#define ZSTD_ROWSIZE 16
|
#define ZSTD_ROWSIZE 16
|
||||||
/*! ZSTD_reduceTable_internal() :
|
/*! ZSTD_reduceTable() :
|
||||||
* reduce table indexes by `reducerValue`
|
* reduce table indexes by `reducerValue`, or squash to zero.
|
||||||
* presume table size is a multiple of ZSTD_ROWSIZE.
|
* PreserveMark preserves "unsorted mark" for btlazy2 strategy.
|
||||||
* Helps auto-vectorization */
|
* It must be set to a clear 0/1 value, to remove branch during inlining.
|
||||||
static void ZSTD_reduceTable_internal (U32* const table, int const nbRows, U32 const reducerValue)
|
* Presume table size is a multiple of ZSTD_ROWSIZE
|
||||||
|
* to help auto-vectorization */
|
||||||
|
FORCE_INLINE_TEMPLATE void
|
||||||
|
ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
|
||||||
{
|
{
|
||||||
|
int const nbRows = (int)size / ZSTD_ROWSIZE;
|
||||||
int cellNb = 0;
|
int cellNb = 0;
|
||||||
int rowNb;
|
int rowNb;
|
||||||
|
assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
|
||||||
|
assert(size < (1U<<31)); /* can be casted to int */
|
||||||
for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
|
for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
|
||||||
int column;
|
int column;
|
||||||
for (column=0; column<ZSTD_ROWSIZE; column++) {
|
for (column=0; column<ZSTD_ROWSIZE; column++) {
|
||||||
|
if (preserveMark) {
|
||||||
|
U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
|
||||||
|
table[cellNb] += adder;
|
||||||
|
}
|
||||||
if (table[cellNb] < reducerValue) table[cellNb] = 0;
|
if (table[cellNb] < reducerValue) table[cellNb] = 0;
|
||||||
else table[cellNb] -= reducerValue;
|
else table[cellNb] -= reducerValue;
|
||||||
cellNb++;
|
cellNb++;
|
||||||
} }
|
} }
|
||||||
}
|
}
|
||||||
|
|
||||||
/*! ZSTD_reduceTable() :
|
static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
|
||||||
* reduce table indexes by `reducerValue` */
|
|
||||||
static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
|
|
||||||
{
|
{
|
||||||
assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
|
ZSTD_reduceTable_internal(table, size, reducerValue, 0);
|
||||||
assert(size < (1U<<31)); /* can be casted to int */
|
|
||||||
ZSTD_reduceTable_internal(table, size/ZSTD_ROWSIZE, reducerValue);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
|
||||||
|
{
|
||||||
|
ZSTD_reduceTable_internal(table, size, reducerValue, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*! ZSTD_ldm_reduceTable() :
|
/*! ZSTD_ldm_reduceTable() :
|
||||||
* reduce table indexes by `reducerValue` */
|
* reduce table indexes by `reducerValue` */
|
||||||
static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
|
static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
|
||||||
@ -1273,8 +1285,9 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
|
|||||||
if (zc->appliedParams.cParams.strategy != ZSTD_fast) {
|
if (zc->appliedParams.cParams.strategy != ZSTD_fast) {
|
||||||
U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
|
U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
|
||||||
if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2)
|
if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2)
|
||||||
ZSTD_preserveUnsortedMark(ms->chainTable, chainSize, reducerValue);
|
ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
|
||||||
ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
|
else
|
||||||
|
ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ms->hashLog3) {
|
if (ms->hashLog3) {
|
||||||
|
@ -32,6 +32,12 @@ extern "C" {
|
|||||||
***************************************/
|
***************************************/
|
||||||
static const U32 g_searchStrength = 8;
|
static const U32 g_searchStrength = 8;
|
||||||
#define HASH_READ_SIZE 8
|
#define HASH_READ_SIZE 8
|
||||||
|
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
|
||||||
|
It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
|
||||||
|
It's not a big deal though : candidate will just be sorted again.
|
||||||
|
Additionnally, candidate position 1 will be lost.
|
||||||
|
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
|
||||||
|
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */
|
||||||
|
|
||||||
|
|
||||||
/*-*************************************
|
/*-*************************************
|
||||||
|
@ -15,48 +15,6 @@
|
|||||||
/*-*************************************
|
/*-*************************************
|
||||||
* Binary Tree search
|
* Binary Tree search
|
||||||
***************************************/
|
***************************************/
|
||||||
#define ZSTD_DUBT_UNSORTED_MARK 1 /* note : index 1 will now be confused with "unsorted" if sorted as larger than its predecessor.
|
|
||||||
It's not a big deal though : the candidate will just be considered unsorted, and be sorted again.
|
|
||||||
Additionnally, candidate position 1 will be lost.
|
|
||||||
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
|
|
||||||
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */
|
|
||||||
|
|
||||||
/*! ZSTD_preserveUnsortedMark() :
|
|
||||||
* pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK before ZSTD_reduceTable()
|
|
||||||
* so that combined operation preserves its value.
|
|
||||||
* Without it, ZSTD_DUBT_UNSORTED_MARK==1 would be squashed to 0.
|
|
||||||
* As a consequence, the list of unsorted elements would stop at first element,
|
|
||||||
* removing candidates, resulting in a very small loss to compression ratio
|
|
||||||
* (since overflow protection with ZSTD_reduceTable() is relatively rare).
|
|
||||||
*
|
|
||||||
* Another potential risk is that a position will be promoted from *unsorted*
|
|
||||||
* to *sorted=>smaller:0*, meaning next candidate will be considered smaller.
|
|
||||||
* This could be wrong, and result in data corruption.
|
|
||||||
*
|
|
||||||
* On second thought, this corruption might be impossible,
|
|
||||||
* because unsorted elements stand at the beginning of the list,
|
|
||||||
* and squashing to zero reduces the list to a single element,
|
|
||||||
* which needs to be sorted anyway.
|
|
||||||
* I haven't spent much thoughts into this possible scenario,
|
|
||||||
* and just felt it was safer to implement ZSTD_preserveUnsortedMark()
|
|
||||||
*
|
|
||||||
* `size` : must be a positive multiple of ZSTD_ROWSIZE */
|
|
||||||
#define ZSTD_ROWSIZE 16
|
|
||||||
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue)
|
|
||||||
{
|
|
||||||
int cellNb = 0;
|
|
||||||
U32 const nbRows = size / ZSTD_ROWSIZE;
|
|
||||||
U32 rowNb;
|
|
||||||
assert((size % ZSTD_ROWSIZE) == 0);
|
|
||||||
for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
|
|
||||||
int column;
|
|
||||||
for (column=0; column<ZSTD_ROWSIZE; column++) {
|
|
||||||
U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
|
|
||||||
table[cellNb] += adder;
|
|
||||||
cellNb++;
|
|
||||||
} }
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void ZSTD_updateDUBT(
|
void ZSTD_updateDUBT(
|
||||||
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
|
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user