[LDM] Speed optimization on repetitive data
LDM does especially poorly on repetitive data when that data's hash happens to have `(hash & stopMask) == 0`. Either because the `stopMask == 0` or random chance. Optimize this case by skipping over repetitive patterns. The detection is very simplistic, but should catch most of the offending cases. ``` head -c 1G /dev/zero | perf stat -- ./zstd -1 -o /dev/null -v --zstd=ldmHashRateLog=1 --long 21.187881087 seconds time elapsed head -c 1G /dev/zero | perf stat -- ./zstd -1 -o /dev/null -v --zstd=ldmHashRateLog=1 --long 1.149707921 seconds time elapsed ```dev
parent
0e2345b859
commit
32823bc150
|
@ -57,6 +57,33 @@ static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const*
|
|||
}
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_gear_reset()
|
||||
* Feeds [data, data + minMatchLength) into the hash without registering any
|
||||
* splits. This effectively resets the hash state. This is used when skipping
|
||||
* over data, either at the beginning of a block, or skipping sections.
|
||||
*/
|
||||
static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
|
||||
BYTE const* data, size_t minMatchLength)
|
||||
{
|
||||
U64 hash = state->rolling;
|
||||
size_t n = 0;
|
||||
|
||||
#define GEAR_ITER_ONCE() do { \
|
||||
hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
|
||||
n += 1; \
|
||||
} while (0)
|
||||
while (n + 3 < minMatchLength) {
|
||||
GEAR_ITER_ONCE();
|
||||
GEAR_ITER_ONCE();
|
||||
GEAR_ITER_ONCE();
|
||||
GEAR_ITER_ONCE();
|
||||
}
|
||||
while (n < minMatchLength) {
|
||||
GEAR_ITER_ONCE();
|
||||
}
|
||||
#undef GEAR_ITER_ONCE
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_gear_feed():
|
||||
*
|
||||
* Registers in the splits array all the split points found in the first
|
||||
|
@ -255,7 +282,7 @@ void ZSTD_ldm_fillHashTable(
|
|||
while (ip < iend) {
|
||||
size_t hashed;
|
||||
unsigned n;
|
||||
|
||||
|
||||
numSplits = 0;
|
||||
hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
|
||||
|
||||
|
@ -327,16 +354,8 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
|||
|
||||
/* Initialize the rolling hash state with the first minMatchLength bytes */
|
||||
ZSTD_ldm_gear_init(&hashState, params);
|
||||
{
|
||||
size_t n = 0;
|
||||
|
||||
while (n < minMatchLength) {
|
||||
numSplits = 0;
|
||||
n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n,
|
||||
splits, &numSplits);
|
||||
}
|
||||
ip += minMatchLength;
|
||||
}
|
||||
ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
|
||||
ip += minMatchLength;
|
||||
|
||||
while (ip < ilimit) {
|
||||
size_t hashed;
|
||||
|
@ -361,6 +380,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
|||
for (n = 0; n < numSplits; n++) {
|
||||
size_t forwardMatchLength = 0, backwardMatchLength = 0,
|
||||
bestMatchLength = 0, mLength;
|
||||
U32 offset;
|
||||
BYTE const* const split = candidates[n].split;
|
||||
U32 const checksum = candidates[n].checksum;
|
||||
U32 const hash = candidates[n].hash;
|
||||
|
@ -428,9 +448,9 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
|||
}
|
||||
|
||||
/* Match found */
|
||||
offset = (U32)(split - base) - bestEntry->offset;
|
||||
mLength = forwardMatchLength + backwardMatchLength;
|
||||
{
|
||||
U32 const offset = (U32)(split - base) - bestEntry->offset;
|
||||
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
|
||||
|
||||
/* Out of sequence storage */
|
||||
|
@ -447,6 +467,21 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
|||
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
|
||||
|
||||
anchor = split + forwardMatchLength;
|
||||
|
||||
/* If we find a match that ends after the data that we've hashed
|
||||
* then we have a repeating, overlapping, pattern. E.g. all zeros.
|
||||
* If one repetition of the pattern matches our `stopMask` then all
|
||||
* repetitions will. We don't need to insert them all into out table,
|
||||
* only the first one. So skip over overlapping matches.
|
||||
* This is a major speed boost (20x) for compressing a single byte
|
||||
* repeated, when that byte ends up in the table.
|
||||
*/
|
||||
if (anchor > ip + hashed) {
|
||||
ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
|
||||
/* Continue the outter loop at anchor (ip + hashed == anchor). */
|
||||
ip = anchor - hashed;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ip += hashed;
|
||||
|
|
Loading…
Reference in New Issue