diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 768a6abb..2f66acab 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1207,11 +1207,22 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, /* We have a choice between copying the dictionary context into the working * context, or referencing the dictionary context from the working context * in-place. We decide here which strategy to use. */ - const int attachDict = ( pledgedSrcSize <= 8 KB + const U64 attachDictSizeCutoffs[(unsigned)ZSTD_btultra+1] = { + 8 KB, /* unused */ + 8 KB, /* ZSTD_fast */ + 16 KB, /* ZSTD_dfast */ + 16 KB, /* ZSTD_greedy */ + 16 KB, /* ZSTD_lazy */ + 16 KB, /* ZSTD_lazy2 */ + 16 KB, /* ZSTD_btlazy2 */ + 16 KB, /* ZSTD_btopt */ + 16 KB /* ZSTD_btultra */ + }; + const int attachDict = ( pledgedSrcSize <= attachDictSizeCutoffs[cdict->cParams.strategy] || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN ) && !params.forceWindow /* dictMatchState isn't correctly * handled in _enforceMaxDist */ - && cdict->cParams.strategy == ZSTD_fast + && cdict->cParams.strategy <= ZSTD_dfast && ZSTD_equivalentCParams(cctx->appliedParams.cParams, cdict->cParams); @@ -2203,7 +2214,8 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict }, { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, ZSTD_compressBlock_fast_dictMatchState, - NULL, NULL, NULL, NULL, NULL, NULL, NULL /* unimplemented as of yet */ } + ZSTD_compressBlock_doubleFast_dictMatchState, + NULL, NULL, NULL, NULL, NULL, NULL /* unimplemented as of yet */ } }; ZSTD_blockCompressor selectedCompressor; ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 78aa0cba..fbd35404 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -52,7 +52,7 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_doubleFast_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, - U32 const mls /* template */) + U32 const mls /* template */, ZSTD_dictMode_e const dictMode) { U32* const hashLong = ms->hashTable; const U32 hBitsL = cParams->hashLog; @@ -62,70 +62,178 @@ size_t ZSTD_compressBlock_doubleFast_generic( const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; - const U32 lowestIndex = ms->window.dictLimit; - const BYTE* const lowest = base + lowestIndex; + const U32 prefixLowestIndex = ms->window.dictLimit; + const BYTE* const prefixLowest = base + prefixLowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; U32 offset_1=rep[0], offset_2=rep[1]; U32 offsetSaved = 0; + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? + dms->hashTable : NULL; + const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? + dms->chainTable : NULL; + const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ? + dms->window.dictLimit : 0; + const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? + dms->window.base : NULL; + const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ? + dictBase + dictLowestIndex : NULL; + const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? + dms->window.nextSrc : NULL; + const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest); + + assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + /* init */ - ip += (ip==lowest); - { U32 const maxRep = (U32)(ip-lowest); + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const maxRep = (U32)(ip - prefixLowest); if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } + if (dictMode == ZSTD_dictMatchState) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } /* Main Search Loop */ while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ size_t mLength; + U32 offset; size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); U32 const current = (U32)(ip-base); U32 const matchIndexL = hashLong[h2]; - U32 const matchIndexS = hashSmall[h]; + U32 matchIndexS = hashSmall[h]; const BYTE* matchLong = base + matchIndexL; const BYTE* match = base + matchIndexS; + const U32 repIndex = current + 1 - offset_1; + const BYTE* repMatch = (dictMode == ZSTD_dictMatchState + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; hashLong[h2] = hashSmall[h] = current; /* update hash tables */ - assert(offset_1 <= current); /* supposed guaranteed by construction */ - if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { - /* favor repcode */ + /* check dictMatchState repcode */ + if (dictMode == ZSTD_dictMatchState + && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, istart) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + goto _match_stored; + } + + /* check noDict repcode */ + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); - } else { - U32 offset; - if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) { - mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; - offset = (U32)(ip-matchLong); - while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ - } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) { - size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); - U32 const matchIndexL3 = hashLong[hl3]; - const BYTE* matchL3 = base + matchIndexL3; - hashLong[hl3] = current + 1; - if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) { - mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; - ip++; - offset = (U32)(ip-matchL3); - while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ - } else { - mLength = ZSTD_count(ip+4, match+4, iend) + 4; - offset = (U32)(ip-match); - while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - } - } else { - ip += ((ip-anchor) >> kSearchStrength) + 1; - continue; - } - - offset_2 = offset_1; - offset_1 = offset; - - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + goto _match_stored; } + /* check prefix long match */ + if ( (matchIndexL > prefixLowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) { + mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; + offset = (U32)(ip-matchLong); + while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + goto _match_found; + } + + /* check dictMatchState long match */ + if (dictMode == ZSTD_dictMatchState) { + U32 const dictMatchIndexL = dictHashLong[h2]; + const BYTE* dictMatchL = dictBase + dictMatchIndexL; + assert(dictMatchL < dictEnd); + + if (dictMatchL > dictLowest && MEM_read64(dictMatchL) == MEM_read64(ip)) { + mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; + offset = (U32)(current - dictMatchIndexL - dictIndexDelta); + while (((ip>anchor) & (dictMatchL>dictLowest)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ + goto _match_found; + } + } + + /* check prefix short match */ + if ( (matchIndexS > prefixLowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) { + goto _search_next_long; + } + + /* check dictMatchState short match */ + if (dictMode == ZSTD_dictMatchState) { + U32 const dictMatchIndexS = dictHashSmall[h]; + match = dictBase + dictMatchIndexS; + matchIndexS = dictMatchIndexS + dictIndexDelta; + + if (match > dictLowest && MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } + } + + ip += ((ip-anchor) >> kSearchStrength) + 1; + continue; + +_search_next_long: + + { + size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndexL3 = hashLong[hl3]; + const BYTE* matchL3 = base + matchIndexL3; + hashLong[hl3] = current + 1; + + /* check prefix long +1 match */ + if ( (matchIndexL3 > prefixLowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) { + mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; + ip++; + offset = (U32)(ip-matchL3); + while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ + goto _match_found; + } + + /* check dict long +1 match */ + if (dictMode == ZSTD_dictMatchState) { + U32 const dictMatchIndexL3 = dictHashLong[hl3]; + const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; + assert(dictMatchL3 < dictEnd); + if (dictMatchL3 > dictLowest && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; + ip++; + offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta); + while (((ip>anchor) & (dictMatchL3>dictLowest)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ + goto _match_found; + } + } + } + + /* if no long +1 match, explore the short match we found */ + if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { + mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, istart) + 4; + offset = (U32)(current - matchIndexS); + while (((ip>anchor) & (match>dictLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } else { + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip - match); + while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + + /* fall-through */ + +_match_found: + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + +_match_stored: /* match found */ ip += mLength; anchor = ip; @@ -138,19 +246,44 @@ size_t ZSTD_compressBlock_doubleFast_generic( hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); /* check immediate repcode */ - while ( (ip <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); - ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } } + if (dictMode == ZSTD_dictMatchState) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState + && repIndex2 < prefixLowestIndex ? + dictBase - dictIndexDelta + repIndex2 : + base + repIndex2; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, istart) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } + } + + if (dictMode == ZSTD_noDict) { + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } } /* save reps for next block */ rep[0] = offset_1 ? offset_1 : offsetSaved; @@ -170,13 +303,33 @@ size_t ZSTD_compressBlock_doubleFast( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 4); + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 4, ZSTD_noDict); case 5 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 5); + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 5, ZSTD_noDict); case 6 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 6); + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 6, ZSTD_noDict); case 7 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 7); + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 7, ZSTD_noDict); + } +} + + +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + const U32 mls = cParams->searchLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 4, ZSTD_dictMatchState); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 5, ZSTD_dictMatchState); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 6, ZSTD_dictMatchState); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 7, ZSTD_dictMatchState); } } diff --git a/lib/compress/zstd_double_fast.h b/lib/compress/zstd_double_fast.h index 3f5a24eb..c475021d 100644 --- a/lib/compress/zstd_double_fast.h +++ b/lib/compress/zstd_double_fast.h @@ -24,6 +24,9 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, size_t ZSTD_compressBlock_doubleFast( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); size_t ZSTD_compressBlock_doubleFast_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);