From d1327738c277643f09c972a407083ad73c8ecf7b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 11 Jul 2019 15:25:22 -0700 Subject: [PATCH 1/3] updated double_fast complementary insertion in a way which is more favorable to compression ratio, though very slightly slower (~-1%). More details in the PR. --- lib/compress/zstd_double_fast.c | 46 +++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index f156da9d..bdb26ef8 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -254,11 +254,23 @@ _match_stored: anchor = ip; if (ip <= ilimit) { - /* Fill Table */ - hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = current+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = + indexToInsert; + } + { const BYTE* const ipToInsert = ip - 2; + hashLong[ZSTD_hashPtr(ipToInsert, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(ipToInsert, hBitsS, mls)] = + (U32)(ipToInsert-base); + } + { const BYTE* const ipToInsert = ip - 1; + hashLong[ZSTD_hashPtr(ipToInsert, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(ipToInsert, hBitsS, mls)] = + (U32)(ipToInsert-base); + } /* check immediate repcode */ if (dictMode == ZSTD_dictMatchState) { @@ -452,16 +464,28 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( continue; } } - /* found a match : store it */ + /* move to next sequence start */ ip += mLength; anchor = ip; if (ip <= ilimit) { - /* Fill Table */ - hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; - hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2; - hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = current+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = + indexToInsert; + } + { const BYTE* const ipToInsert = ip - 2; + hashLong[ZSTD_hashPtr(ipToInsert, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(ipToInsert, hBitsS, mls)] = + (U32)(ipToInsert-base); + } + { const BYTE* const ipToInsert = ip - 1; + hashLong[ZSTD_hashPtr(ipToInsert, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(ipToInsert, hBitsS, mls)] = + (U32)(ipToInsert-base); + } /* check immediate repcode */ while (ip <= ilimit) { U32 const current2 = (U32)(ip-base); From e8a7f5d3ce0ebb36e4ad850ab12463b1127dba17 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 12 Jul 2019 11:34:53 -0700 Subject: [PATCH 2/3] double-fast: changed the trade-off for a smaller positive change same number of complementary insertions, just organized differently (long at `ip-2`, short at `ip-1`). --- .gitignore | 2 +- lib/compress/zstd_double_fast.c | 17 ++++------------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 0c840b6b..4c297053 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,7 @@ *.dylib # Executables -zstd. +/zstd zstdmt *.exe *.out diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index bdb26ef8..b65fd41e 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -257,19 +257,10 @@ _match_stored: /* Complementary insertion */ /* done after iLimit test, as candidates could be > iend-8 */ { U32 const indexToInsert = current+2; - hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = - indexToInsert; - } - { const BYTE* const ipToInsert = ip - 2; - hashLong[ZSTD_hashPtr(ipToInsert, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(ipToInsert, hBitsS, mls)] = - (U32)(ipToInsert-base); - } - { const BYTE* const ipToInsert = ip - 1; - hashLong[ZSTD_hashPtr(ipToInsert, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(ipToInsert, hBitsS, mls)] = - (U32)(ipToInsert-base); + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); } /* check immediate repcode */ From eaeb7f00b5dea51b3e39175cee930bef0b0e16eb Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 12 Jul 2019 14:17:17 -0700 Subject: [PATCH 3/3] updated the _extDict variant of double fast --- lib/compress/zstd_double_fast.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index b65fd41e..5957255d 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -463,20 +463,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( /* Complementary insertion */ /* done after iLimit test, as candidates could be > iend-8 */ { U32 const indexToInsert = current+2; - hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = - indexToInsert; - } - { const BYTE* const ipToInsert = ip - 2; - hashLong[ZSTD_hashPtr(ipToInsert, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(ipToInsert, hBitsS, mls)] = - (U32)(ipToInsert-base); - } - { const BYTE* const ipToInsert = ip - 1; - hashLong[ZSTD_hashPtr(ipToInsert, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(ipToInsert, hBitsS, mls)] = - (U32)(ipToInsert-base); + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); } + /* check immediate repcode */ while (ip <= ilimit) { U32 const current2 = (U32)(ip-base);