Stagger Application of `stepSize` in ZSTD_fast

This replicates the behavior of @terrelln's `ZSTD_fast` implementation. That is, it always looks at adjacent pairs of positions, and only applies the acceleration every other position. This produces a more fine-grained acceleration.
2021-12-10 15:44:39 -05:00 · 2021-12-10 15:44:39 -05:00 · 22501cd283
parent 4cc5e2818a
commit 22501cd283
1 changed files with 6 additions and 6 deletions
--- a/lib/compress/zstd_fast.c
+++ b/lib/compress/zstd_fast.c
@ -99,7 +99,7 @@ ZSTD_compressBlock_fast_noDict_generic(
    U32* const hashTable = ms->hashTable;
    U32 const hlog = cParams->hashLog;
    /* support stepSize of 0 */
-    size_t const stepSize = cParams->targetLength + !(cParams->targetLength);
+    size_t const stepSize = cParams->targetLength + !(cParams->targetLength) - 1;
    const BYTE* const base = ms->window.base;
    const BYTE* const istart = (const BYTE*)src;
    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
@ -144,13 +144,13 @@ ZSTD_compressBlock_fast_noDict_generic(
    /* start each op */
 _start: /* Requires: ip0 */

-    step = stepSize;
+    step = 1;
    nextStep = ip0 + kStepIncr;

    /* calculate positions, ip0 - anchor == 0, so we skip step calc */
-    ip1 = ip0 + stepSize;
-    ip2 = ip1 + stepSize;
-    ip3 = ip2 + stepSize;
+    ip1 = ip0 + step;
+    ip2 = ip1 + step + stepSize;
+    ip3 = ip2 + step;

    if (ip3 >= ilimit) {
        goto _cleanup;
@ -241,7 +241,7 @@ _start: /* Requires: ip0 */
        /* advance to next positions */
        ip0 = ip1;
        ip1 = ip2;
-        ip2 = ip2 + step;
+        ip2 = ip2 + step + stepSize;
        ip3 = ip2 + step;
    } while (ip3 < ilimit);