Stagger Application of `stepSize` in ZSTD_fast
This replicates the behavior of @terrelln's `ZSTD_fast` implementation. That is, it always looks at adjacent pairs of positions, and only applies the acceleration every other position. This produces a more fine-grained acceleration.dev
parent
4cc5e2818a
commit
22501cd283
|
@ -99,7 +99,7 @@ ZSTD_compressBlock_fast_noDict_generic(
|
|||
U32* const hashTable = ms->hashTable;
|
||||
U32 const hlog = cParams->hashLog;
|
||||
/* support stepSize of 0 */
|
||||
size_t const stepSize = cParams->targetLength + !(cParams->targetLength);
|
||||
size_t const stepSize = cParams->targetLength + !(cParams->targetLength) - 1;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
||||
|
@ -144,13 +144,13 @@ ZSTD_compressBlock_fast_noDict_generic(
|
|||
/* start each op */
|
||||
_start: /* Requires: ip0 */
|
||||
|
||||
step = stepSize;
|
||||
step = 1;
|
||||
nextStep = ip0 + kStepIncr;
|
||||
|
||||
/* calculate positions, ip0 - anchor == 0, so we skip step calc */
|
||||
ip1 = ip0 + stepSize;
|
||||
ip2 = ip1 + stepSize;
|
||||
ip3 = ip2 + stepSize;
|
||||
ip1 = ip0 + step;
|
||||
ip2 = ip1 + step + stepSize;
|
||||
ip3 = ip2 + step;
|
||||
|
||||
if (ip3 >= ilimit) {
|
||||
goto _cleanup;
|
||||
|
@ -241,7 +241,7 @@ _start: /* Requires: ip0 */
|
|||
/* advance to next positions */
|
||||
ip0 = ip1;
|
||||
ip1 = ip2;
|
||||
ip2 = ip2 + step;
|
||||
ip2 = ip2 + step + stepSize;
|
||||
ip3 = ip2 + step;
|
||||
} while (ip3 < ilimit);
|
||||
|
||||
|
|
Loading…
Reference in New Issue