From d1327738c277643f09c972a407083ad73c8ecf7b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 11 Jul 2019 15:25:22 -0700 Subject: [PATCH] updated double_fast complementary insertion in a way which is more favorable to compression ratio, though very slightly slower (~-1%). More details in the PR. --- lib/compress/zstd_double_fast.c | 46 +++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index f156da9d..bdb26ef8 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -254,11 +254,23 @@ _match_stored: anchor = ip; if (ip <= ilimit) { - /* Fill Table */ - hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = current+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = + indexToInsert; + } + { const BYTE* const ipToInsert = ip - 2; + hashLong[ZSTD_hashPtr(ipToInsert, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(ipToInsert, hBitsS, mls)] = + (U32)(ipToInsert-base); + } + { const BYTE* const ipToInsert = ip - 1; + hashLong[ZSTD_hashPtr(ipToInsert, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(ipToInsert, hBitsS, mls)] = + (U32)(ipToInsert-base); + } /* check immediate repcode */ if (dictMode == ZSTD_dictMatchState) { @@ -452,16 +464,28 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( continue; } } - /* found a match : store it */ + /* move to next sequence start */ ip += mLength; anchor = ip; if (ip <= ilimit) { - /* Fill Table */ - hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; - hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2; - hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = current+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = + indexToInsert; + } + { const BYTE* const ipToInsert = ip - 2; + hashLong[ZSTD_hashPtr(ipToInsert, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(ipToInsert, hBitsS, mls)] = + (U32)(ipToInsert-base); + } + { const BYTE* const ipToInsert = ip - 1; + hashLong[ZSTD_hashPtr(ipToInsert, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(ipToInsert, hBitsS, mls)] = + (U32)(ipToInsert-base); + } /* check immediate repcode */ while (ip <= ilimit) { U32 const current2 = (U32)(ip-base);