Stagger Application of `stepSize` in ZSTD_fast

This replicates the behavior of @terrelln's `ZSTD_fast` implementation. That
is, it always looks at adjacent pairs of positions, and only applies the
acceleration every other position. This produces a more fine-grained
acceleration.
dev
W. Felix Handte 2021-12-10 15:44:39 -05:00
parent 4cc5e2818a
commit 22501cd283
1 changed files with 6 additions and 6 deletions

View File

@ -99,7 +99,7 @@ ZSTD_compressBlock_fast_noDict_generic(
U32* const hashTable = ms->hashTable;
U32 const hlog = cParams->hashLog;
/* support stepSize of 0 */
size_t const stepSize = cParams->targetLength + !(cParams->targetLength);
size_t const stepSize = cParams->targetLength + !(cParams->targetLength) - 1;
const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
@ -144,13 +144,13 @@ ZSTD_compressBlock_fast_noDict_generic(
/* start each op */
_start: /* Requires: ip0 */
step = stepSize;
step = 1;
nextStep = ip0 + kStepIncr;
/* calculate positions, ip0 - anchor == 0, so we skip step calc */
ip1 = ip0 + stepSize;
ip2 = ip1 + stepSize;
ip3 = ip2 + stepSize;
ip1 = ip0 + step;
ip2 = ip1 + step + stepSize;
ip3 = ip2 + step;
if (ip3 >= ilimit) {
goto _cleanup;
@ -241,7 +241,7 @@ _start: /* Requires: ip0 */
/* advance to next positions */
ip0 = ip1;
ip1 = ip2;
ip2 = ip2 + step;
ip2 = ip2 + step + stepSize;
ip3 = ip2 + step;
} while (ip3 < ilimit);