From d18a4057796a43ce4f3d1c928d612e91e297b061 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 27 Apr 2018 18:46:59 -0400 Subject: [PATCH 01/25] Refer to the Dictionary Match State In-Place (Sometimes) --- lib/compress/zstd_compress.c | 68 +++++++++++++++++---------- lib/compress/zstd_compress_internal.h | 6 ++- 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 22c704f1..18091b09 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -963,6 +963,7 @@ static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) ms->nextToUpdate = ms->window.dictLimit + 1; ms->loadedDictEnd = 0; ms->opt.litLengthSum = 0; /* force reset of btopt stats */ + ms->dictMatchState = NULL; } /*! ZSTD_continueCCtx() : @@ -1203,42 +1204,61 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { + /* We have a choice between copying the dictionary context into the working + * context, or referencing the dictionary context from the working context + * in-place. We decide here which strategy to use. */ + /* TODO: pick reasonable cut-off size, handle ZSTD_CONTENTSIZE_UNKNOWN */ + int attachDict = pledgedSrcSize < 64 KB + && cdict->cParams.strategy == ZSTD_fast + && ZSTD_equivalentCParams(cctx->appliedParams.cParams, + cdict->cParams); + { unsigned const windowLog = params.cParams.windowLog; assert(windowLog != 0); /* Copy only compression parameters related to tables. */ params.cParams = cdict->cParams; params.cParams.windowLog = windowLog; - ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, ZSTDcrp_noMemset, zbuff); + ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + attachDict ? ZSTDcrp_continue : ZSTDcrp_noMemset, + zbuff); assert(cctx->appliedParams.cParams.strategy == cdict->cParams.strategy); assert(cctx->appliedParams.cParams.hashLog == cdict->cParams.hashLog); assert(cctx->appliedParams.cParams.chainLog == cdict->cParams.chainLog); } - /* copy tables */ - { size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog); - size_t const hSize = (size_t)1 << cdict->cParams.hashLog; - size_t const tableSpace = (chainSize + hSize) * sizeof(U32); - assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ - assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize); - assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */ - assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize); - memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */ - } - /* Zero the hashTable3, since the cdict never fills it */ - { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3; - assert(cdict->matchState.hashLog3 == 0); - memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); + if (attachDict) { + DEBUGLOG(4, "attaching dictionary into context"); + cctx->blockState.matchState.dictMatchState = &cdict->matchState; + } else { + DEBUGLOG(4, "copying dictionary into context"); + /* copy tables */ + { size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog); + size_t const hSize = (size_t)1 << cdict->cParams.hashLog; + size_t const tableSpace = (chainSize + hSize) * sizeof(U32); + assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ + assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize); + assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */ + assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize); + memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */ + } + + /* Zero the hashTable3, since the cdict never fills it */ + { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3; + assert(cdict->matchState.hashLog3 == 0); + memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); + } + + /* copy dictionary offsets */ + { + ZSTD_matchState_t const* srcMatchState = &cdict->matchState; + ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } } - /* copy dictionary offsets */ - { - ZSTD_matchState_t const* srcMatchState = &cdict->matchState; - ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; - dstMatchState->window = srcMatchState->window; - dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; - dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; - dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; - } cctx->dictID = cdict->dictID; /* copy block state */ diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 937234c3..9209fb02 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -122,7 +122,8 @@ typedef struct { U32 lowLimit; /* below that point, no more data */ } ZSTD_window_t; -typedef struct { +typedef struct ZSTD_matchState_t ZSTD_matchState_t; +struct ZSTD_matchState_t { ZSTD_window_t window; /* State for window round buffer management */ U32 loadedDictEnd; /* index of end of dictionary */ U32 nextToUpdate; /* index from which to continue table update */ @@ -132,7 +133,8 @@ typedef struct { U32* hashTable3; U32* chainTable; optState_t opt; /* optimal parser state */ -} ZSTD_matchState_t; + const ZSTD_matchState_t *dictMatchState; +}; typedef struct { ZSTD_compressedBlockState_t* prevCBlock; From 8d24ff03534daba8b8fd2169c1dc6873548dda04 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Sat, 28 Apr 2018 00:42:37 -0400 Subject: [PATCH 02/25] Preliminary Support in ZSTD_compressBlock_fast_generic() for Ext Dict Ctx --- lib/compress/zstd_compress_internal.h | 2 + lib/compress/zstd_fast.c | 92 +++++++++++++++++++++------ 2 files changed, 75 insertions(+), 19 deletions(-) diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 9209fb02..05685e55 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -250,6 +250,8 @@ struct ZSTD_CCtx_s { typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; +typedef enum { ZSTD_noDictMatchState, ZSTD_hasDictMatchState } ZSTD_hasDictMatchState_e; + typedef size_t (*ZSTD_blockCompressor) ( ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 22b84d1c..df4423fc 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -45,7 +45,8 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_fast_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, - U32 const hlog, U32 const stepSize, U32 const mls) + U32 const hlog, U32 const stepSize, U32 const mls, + ZSTD_hasDictMatchState_e const hasDict) { U32* const hashTable = ms->hashTable; const BYTE* const base = ms->window.base; @@ -59,6 +60,19 @@ size_t ZSTD_compressBlock_fast_generic( U32 offset_1=rep[0], offset_2=rep[1]; U32 offsetSaved = 0; + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32* const dictHashTable = hasDict == ZSTD_hasDictMatchState ? + dms->hashTable : NULL; + const U32 dictLowestIndex = hasDict == ZSTD_hasDictMatchState ? + dms->window.dictLimit : 0; + const BYTE* const dictBase = hasDict == ZSTD_hasDictMatchState ? + dms->window.base : NULL; + const BYTE* const dictLowest = hasDict == ZSTD_hasDictMatchState ? + dictBase + dictLowestIndex : NULL; + const BYTE* const dictEnd = hasDict == ZSTD_hasDictMatchState ? + dms->window.nextSrc : NULL; + const U32 dictIndexDelta = lowestIndex - (dictEnd - dictBase); + /* init */ ip += (ip==lowest); { U32 const maxRep = (U32)(ip-lowest); @@ -75,19 +89,41 @@ size_t ZSTD_compressBlock_fast_generic( const BYTE* match = base + matchIndex; hashTable[h] = current; /* update hash table */ - if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { + if ((hasDict != ZSTD_hasDictMatchState || current >= lowestIndex + offset_1) + && (offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); } else { if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { - assert(stepSize >= 1); - ip += ((ip-anchor) >> kSearchStrength) + stepSize; - continue; - } - mLength = ZSTD_count(ip+4, match+4, iend) + 4; - { U32 const offset = (U32)(ip-match); + if (hasDict == ZSTD_hasDictMatchState) { + U32 const dictMatchIndex = dictHashTable[h]; + const BYTE* dictMatch = dictBase + dictMatchIndex; + if (dictMatchIndex <= dictLowestIndex || + MEM_read32(dictMatch) != MEM_read32(ip)) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } + + mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, istart) + 4; + { U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); + DEBUGLOG(6, "ip %p (%u) dictMatch %p (%u) idxDelta %u", ip, current, dictMatch, dictMatchIndex, dictIndexDelta); + while (((ip>anchor) & (dictMatch>dictLowest)) && (ip[-1] == dictMatch[-1])) { ip--; dictMatch--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + + } else { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } + } else { + U32 const offset = (U32)(ip-match); + mLength = ZSTD_count(ip+4, match+4, iend) + 4; while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; @@ -104,6 +140,7 @@ size_t ZSTD_compressBlock_fast_generic( hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); /* check immediate repcode */ while ( (ip <= ilimit) + && (hasDict != ZSTD_hasDictMatchState || ip - offset_2 >= istart) && ( (offset_2>0) & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { /* store sequence */ @@ -132,17 +169,34 @@ size_t ZSTD_compressBlock_fast( U32 const hlog = cParams->hashLog; U32 const mls = cParams->searchLength; U32 const stepSize = cParams->targetLength; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4); - case 5 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5); - case 6 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6); - case 7 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7); + if (ms->dictMatchState != NULL) { + ZSTD_hasDictMatchState_e const hdms = ZSTD_hasDictMatchState; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, hdms); + case 5 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, hdms); + case 6 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, hdms); + case 7 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, hdms); + } + } else { + ZSTD_hasDictMatchState_e const hdms = ZSTD_noDictMatchState; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, hdms); + case 5 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, hdms); + case 6 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, hdms); + case 7 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, hdms); + } } } From 70a537d1d7113a0604c0c22fe44fa1ae07824723 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 1 May 2018 16:21:18 -0400 Subject: [PATCH 03/25] Initial Repcode Check Support for Ext Dict Ctx --- lib/compress/zstd_fast.c | 42 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index df4423fc..7ea86511 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -87,10 +87,20 @@ size_t ZSTD_compressBlock_fast_generic( U32 const current = (U32)(ip-base); U32 const matchIndex = hashTable[h]; const BYTE* match = base + matchIndex; + const int repIndex = current + 1 - offset_1; + const BYTE* repBase = hasDict == ZSTD_hasDictMatchState && repIndex < lowestIndex ? dictBase - dictIndexDelta : base; + const BYTE* repMatch = repBase + repIndex; hashTable[h] = current; /* update hash table */ - if ((hasDict != ZSTD_hasDictMatchState || current >= lowestIndex + offset_1) - && (offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { + if (hasDict == ZSTD_hasDictMatchState + && (((U32)((lowestIndex-1) - (U32)repIndex) >= 3) /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < lowestIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, istart) + 4; + ip++; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + } else if (hasDict == ZSTD_noDictMatchState + && (offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip+1))) { mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); @@ -109,7 +119,6 @@ size_t ZSTD_compressBlock_fast_generic( mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, istart) + 4; { U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); - DEBUGLOG(6, "ip %p (%u) dictMatch %p (%u) idxDelta %u", ip, current, dictMatch, dictMatchIndex, dictIndexDelta); while (((ip>anchor) & (dictMatch>dictLowest)) && (ip[-1] == dictMatch[-1])) { ip--; dictMatch--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; @@ -139,6 +148,31 @@ size_t ZSTD_compressBlock_fast_generic( hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); /* check immediate repcode */ + + if (hasDict == ZSTD_hasDictMatchState) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + int const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = hasDict == ZSTD_hasDictMatchState + && repIndex2 < lowestIndex ? + dictBase - dictIndexDelta + repIndex2 : + base + repIndex2; + if ( (((U32)((lowestIndex-1) - (U32)repIndex2) >= 3)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < lowestIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, istart) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } + } + + if (hasDict == ZSTD_noDictMatchState) { while ( (ip <= ilimit) && (hasDict != ZSTD_hasDictMatchState || ip - offset_2 >= istart) && ( (offset_2>0) @@ -151,7 +185,7 @@ size_t ZSTD_compressBlock_fast_generic( ip += rLength; anchor = ip; continue; /* faster when present ... (?) */ - } } } + } } } } /* save reps for next block */ rep[0] = offset_1 ? offset_1 : offsetSaved; From 6929964d65778ce6f0b851f955b3bb68f1a3490d Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 2 May 2018 15:12:18 -0400 Subject: [PATCH 04/25] Add bounds check in repcode tests --- lib/compress/zstd_fast.c | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 7ea86511..fb86199e 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -60,18 +60,33 @@ size_t ZSTD_compressBlock_fast_generic( U32 offset_1=rep[0], offset_2=rep[1]; U32 offsetSaved = 0; + /* This is all complicated by the fact that we need to handle positions + * specified in 3 different ways: by direct pointers, by indices relative + * to the working context base, and by indices relative to the dict context + * base. + * + * Hence the unfortunate collision of "lowestDictIndex", which is the lowest + * index in the dict's index space, and "dictLowestIndex", which is the same + * position in the working context's index space. + */ + const ZSTD_matchState_t* const dms = ms->dictMatchState; const U32* const dictHashTable = hasDict == ZSTD_hasDictMatchState ? dms->hashTable : NULL; - const U32 dictLowestIndex = hasDict == ZSTD_hasDictMatchState ? + const U32 lowestDictIndex = hasDict == ZSTD_hasDictMatchState ? dms->window.dictLimit : 0; const BYTE* const dictBase = hasDict == ZSTD_hasDictMatchState ? dms->window.base : NULL; const BYTE* const dictLowest = hasDict == ZSTD_hasDictMatchState ? - dictBase + dictLowestIndex : NULL; + dictBase + lowestDictIndex : NULL; const BYTE* const dictEnd = hasDict == ZSTD_hasDictMatchState ? dms->window.nextSrc : NULL; - const U32 dictIndexDelta = lowestIndex - (dictEnd - dictBase); + const U32 dictIndexDelta = hasDict == ZSTD_hasDictMatchState ? + lowestIndex - (dictEnd - dictBase) : + 0; + ptrdiff_t dictLowestIndex = hasDict == ZSTD_hasDictMatchState ? + lowestDictIndex + dictIndexDelta : + lowestIndex; /* init */ ip += (ip==lowest); @@ -87,15 +102,15 @@ size_t ZSTD_compressBlock_fast_generic( U32 const current = (U32)(ip-base); U32 const matchIndex = hashTable[h]; const BYTE* match = base + matchIndex; - const int repIndex = current + 1 - offset_1; - const BYTE* repBase = hasDict == ZSTD_hasDictMatchState && repIndex < lowestIndex ? dictBase - dictIndexDelta : base; + const ptrdiff_t repIndex = current + 1 - offset_1; + const BYTE* repBase = hasDict == ZSTD_hasDictMatchState && repIndex < (ptrdiff_t)lowestIndex ? dictBase - dictIndexDelta : base; const BYTE* repMatch = repBase + repIndex; hashTable[h] = current; /* update hash table */ if (hasDict == ZSTD_hasDictMatchState - && (((U32)((lowestIndex-1) - (U32)repIndex) >= 3) /* intentional underflow */) + && (((U32)((lowestIndex-1) - repIndex) >= 3) & (repIndex > dictLowestIndex) /* intentional underflow */) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < lowestIndex ? dictEnd : iend; + const BYTE* repMatchEnd = repIndex < (ptrdiff_t)lowestIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, istart) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); @@ -110,7 +125,7 @@ size_t ZSTD_compressBlock_fast_generic( if (hasDict == ZSTD_hasDictMatchState) { U32 const dictMatchIndex = dictHashTable[h]; const BYTE* dictMatch = dictBase + dictMatchIndex; - if (dictMatchIndex <= dictLowestIndex || + if (dictMatchIndex <= lowestDictIndex || MEM_read32(dictMatch) != MEM_read32(ip)) { assert(stepSize >= 1); ip += ((ip-anchor) >> kSearchStrength) + stepSize; @@ -152,14 +167,14 @@ size_t ZSTD_compressBlock_fast_generic( if (hasDict == ZSTD_hasDictMatchState) { while (ip <= ilimit) { U32 const current2 = (U32)(ip-base); - int const repIndex2 = current2 - offset_2; + ptrdiff_t const repIndex2 = current2 - offset_2; const BYTE* repMatch2 = hasDict == ZSTD_hasDictMatchState - && repIndex2 < lowestIndex ? + && repIndex2 < (ptrdiff_t)lowestIndex ? dictBase - dictIndexDelta + repIndex2 : base + repIndex2; - if ( (((U32)((lowestIndex-1) - (U32)repIndex2) >= 3)) /* intentional overflow */ + if ( (((U32)((lowestIndex-1) - (U32)repIndex2) >= 3) & (repIndex2 > dictLowestIndex)) /* intentional overflow */ && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < lowestIndex ? dictEnd : iend; + const BYTE* const repEnd2 = repIndex2 < (ptrdiff_t)lowestIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, istart) + 4; U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); From 265c2869d1e955ae7a026f9e4130e4b981cb2835 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 2 May 2018 17:10:51 -0400 Subject: [PATCH 05/25] Split Wrapper Functions to Cause Inlining --- lib/compress/zstd_compress.c | 15 ++++--- lib/compress/zstd_compress_internal.h | 14 +++++- lib/compress/zstd_fast.c | 62 +++++++++++++++------------ lib/compress/zstd_fast.h | 3 ++ lib/compress/zstd_ldm.c | 3 +- 5 files changed, 62 insertions(+), 35 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 18091b09..c58909fb 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2137,9 +2137,9 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, /* ZSTD_selectBlockCompressor() : * Not static, but internal use only (used by long distance matcher) * assumption : strat is a valid strategy */ -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict, ZSTD_hasDictMatchState_e hdms) { - static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = { + static const ZSTD_blockCompressor blockCompressor[3][(unsigned)ZSTD_btultra+1] = { { ZSTD_compressBlock_fast /* default for 0 */, ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, @@ -2147,13 +2147,16 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict { ZSTD_compressBlock_fast_extDict /* default for 0 */, ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, - ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict } + ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict }, + { ZSTD_compressBlock_fast_extDictMatchState /* default for 0 */, + ZSTD_compressBlock_fast_extDictMatchState, + NULL, NULL, NULL, NULL, NULL, NULL, NULL } }; ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); assert((U32)strat >= (U32)ZSTD_fast); assert((U32)strat <= (U32)ZSTD_btultra); - return blockCompressor[extDict!=0][(U32)strat]; + return blockCompressor[hdms == ZSTD_hasDictMatchState ? 2 : (extDict!=0)][(U32)strat]; } static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, @@ -2196,6 +2199,8 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, /* select and store sequences */ { U32 const extDict = ZSTD_window_hasExtDict(ms->window); + ZSTD_hasDictMatchState_e const hdms = + ZSTD_matchState_hasDictMatchState(ms); size_t lastLLSize; { int i; for (i = 0; i < ZSTD_REP_NUM; ++i) @@ -2229,7 +2234,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, src, srcSize, extDict); assert(ldmSeqStore.pos == ldmSeqStore.size); } else { /* not long range mode */ - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict); + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict, hdms); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize); } { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 05685e55..f3a4347b 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -252,10 +252,20 @@ typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; typedef enum { ZSTD_noDictMatchState, ZSTD_hasDictMatchState } ZSTD_hasDictMatchState_e; +/** + * ZSTD_matchState_hasDictMatchState(): + * Does what the label says. + */ +MEM_STATIC ZSTD_hasDictMatchState_e ZSTD_matchState_hasDictMatchState(const ZSTD_matchState_t *ms) +{ + return ms->dictMatchState != NULL ? ZSTD_hasDictMatchState : ZSTD_noDictMatchState; +} + + typedef size_t (*ZSTD_blockCompressor) ( ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict); +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict, ZSTD_hasDictMatchState_e hdms); MEM_STATIC U32 ZSTD_LLcode(U32 litLength) @@ -512,6 +522,8 @@ MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) return window.lowLimit < window.dictLimit; } + + /** * ZSTD_window_needOverflowCorrection(): * Returns non-zero if the indices are getting too large and need overflow diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index fb86199e..5f152488 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -218,34 +218,40 @@ size_t ZSTD_compressBlock_fast( U32 const hlog = cParams->hashLog; U32 const mls = cParams->searchLength; U32 const stepSize = cParams->targetLength; - if (ms->dictMatchState != NULL) { - ZSTD_hasDictMatchState_e const hdms = ZSTD_hasDictMatchState; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, hdms); - case 5 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, hdms); - case 6 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, hdms); - case 7 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, hdms); - } - } else { - ZSTD_hasDictMatchState_e const hdms = ZSTD_noDictMatchState; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, hdms); - case 5 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, hdms); - case 6 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, hdms); - case 7 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, hdms); - } + assert(ms->dictMatchState == NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_noDictMatchState); + case 5 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_noDictMatchState); + case 6 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_noDictMatchState); + case 7 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_noDictMatchState); + } +} + +size_t ZSTD_compressBlock_fast_extDictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) +{ + U32 const hlog = cParams->hashLog; + U32 const mls = cParams->searchLength; + U32 const stepSize = cParams->targetLength; + assert(ms->dictMatchState != NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_hasDictMatchState); + case 5 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_hasDictMatchState); + case 6 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_hasDictMatchState); + case 7 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_hasDictMatchState); } } diff --git a/lib/compress/zstd_fast.h b/lib/compress/zstd_fast.h index 746849fc..804d36f2 100644 --- a/lib/compress/zstd_fast.h +++ b/lib/compress/zstd_fast.h @@ -24,6 +24,9 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, size_t ZSTD_compressBlock_fast( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_extDictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); size_t ZSTD_compressBlock_fast_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index 9d825e69..b58c2f1c 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -596,7 +596,8 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, { unsigned const minMatch = cParams->searchLength; ZSTD_blockCompressor const blockCompressor = - ZSTD_selectBlockCompressor(cParams->strategy, extDict); + ZSTD_selectBlockCompressor(cParams->strategy, extDict, + ZSTD_matchState_hasDictMatchState(ms)); BYTE const* const base = ms->window.base; /* Input bounds */ BYTE const* const istart = (BYTE const*)src; From b67196f30d093c0be0fab4e090c9c748779ab27a Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 2 May 2018 17:34:34 -0400 Subject: [PATCH 06/25] Coalesce hasDictMatchState and extDict Checks into One Enum and Rename Stuff --- lib/compress/zstd_compress.c | 20 +++++------ lib/compress/zstd_compress_internal.h | 23 ++++++------ lib/compress/zstd_fast.c | 52 ++++++++++++++------------- lib/compress/zstd_fast.h | 2 +- lib/compress/zstd_ldm.c | 7 ++-- lib/compress/zstd_ldm.h | 3 +- 6 files changed, 52 insertions(+), 55 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index c58909fb..6bbd09c0 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2137,7 +2137,7 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, /* ZSTD_selectBlockCompressor() : * Not static, but internal use only (used by long distance matcher) * assumption : strat is a valid strategy */ -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict, ZSTD_hasDictMatchState_e hdms) +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) { static const ZSTD_blockCompressor blockCompressor[3][(unsigned)ZSTD_btultra+1] = { { ZSTD_compressBlock_fast /* default for 0 */, @@ -2148,15 +2148,15 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict }, - { ZSTD_compressBlock_fast_extDictMatchState /* default for 0 */, - ZSTD_compressBlock_fast_extDictMatchState, - NULL, NULL, NULL, NULL, NULL, NULL, NULL } + { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, + ZSTD_compressBlock_fast_dictMatchState, + NULL, NULL, NULL, NULL, NULL, NULL, NULL /* unimplemented as of yet */ } }; ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); assert((U32)strat >= (U32)ZSTD_fast); assert((U32)strat <= (U32)ZSTD_btultra); - return blockCompressor[hdms == ZSTD_hasDictMatchState ? 2 : (extDict!=0)][(U32)strat]; + return blockCompressor[(int)dictMode][(U32)strat]; } static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, @@ -2198,9 +2198,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, } /* select and store sequences */ - { U32 const extDict = ZSTD_window_hasExtDict(ms->window); - ZSTD_hasDictMatchState_e const hdms = - ZSTD_matchState_hasDictMatchState(ms); + { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); size_t lastLLSize; { int i; for (i = 0; i < ZSTD_REP_NUM; ++i) @@ -2214,7 +2212,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, - src, srcSize, extDict); + src, srcSize); assert(zc->externSeqStore.pos <= zc->externSeqStore.size); } else if (zc->appliedParams.ldmParams.enableLdm) { rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; @@ -2231,10 +2229,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, - src, srcSize, extDict); + src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); } else { /* not long range mode */ - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict, hdms); + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize); } { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index f3a4347b..32bbe08b 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -250,22 +250,13 @@ struct ZSTD_CCtx_s { typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; -typedef enum { ZSTD_noDictMatchState, ZSTD_hasDictMatchState } ZSTD_hasDictMatchState_e; - -/** - * ZSTD_matchState_hasDictMatchState(): - * Does what the label says. - */ -MEM_STATIC ZSTD_hasDictMatchState_e ZSTD_matchState_hasDictMatchState(const ZSTD_matchState_t *ms) -{ - return ms->dictMatchState != NULL ? ZSTD_hasDictMatchState : ZSTD_noDictMatchState; -} +typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e; typedef size_t (*ZSTD_blockCompressor) ( ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict, ZSTD_hasDictMatchState_e hdms); +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e hdms); MEM_STATIC U32 ZSTD_LLcode(U32 litLength) @@ -522,7 +513,15 @@ MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) return window.lowLimit < window.dictLimit; } - +/** + * ZSTD_matchState_dictMode(): + * Does what the label says. + */ +MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) +{ + return ms->dictMatchState != NULL ? ZSTD_dictMatchState : + ZSTD_window_hasExtDict(ms->window) ? ZSTD_extDict : ZSTD_noDict; +} /** * ZSTD_window_needOverflowCorrection(): diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 5f152488..8f3d33e6 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -46,7 +46,7 @@ size_t ZSTD_compressBlock_fast_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, U32 const hlog, U32 const stepSize, U32 const mls, - ZSTD_hasDictMatchState_e const hasDict) + ZSTD_dictMode_e const hasDict) { U32* const hashTable = ms->hashTable; const BYTE* const base = ms->window.base; @@ -71,23 +71,25 @@ size_t ZSTD_compressBlock_fast_generic( */ const ZSTD_matchState_t* const dms = ms->dictMatchState; - const U32* const dictHashTable = hasDict == ZSTD_hasDictMatchState ? + const U32* const dictHashTable = hasDict == ZSTD_dictMatchState ? dms->hashTable : NULL; - const U32 lowestDictIndex = hasDict == ZSTD_hasDictMatchState ? + const U32 lowestDictIndex = hasDict == ZSTD_dictMatchState ? dms->window.dictLimit : 0; - const BYTE* const dictBase = hasDict == ZSTD_hasDictMatchState ? + const BYTE* const dictBase = hasDict == ZSTD_dictMatchState ? dms->window.base : NULL; - const BYTE* const dictLowest = hasDict == ZSTD_hasDictMatchState ? + const BYTE* const dictLowest = hasDict == ZSTD_dictMatchState ? dictBase + lowestDictIndex : NULL; - const BYTE* const dictEnd = hasDict == ZSTD_hasDictMatchState ? + const BYTE* const dictEnd = hasDict == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; - const U32 dictIndexDelta = hasDict == ZSTD_hasDictMatchState ? + const U32 dictIndexDelta = hasDict == ZSTD_dictMatchState ? lowestIndex - (dictEnd - dictBase) : 0; - ptrdiff_t dictLowestIndex = hasDict == ZSTD_hasDictMatchState ? + ptrdiff_t dictLowestIndex = hasDict == ZSTD_dictMatchState ? lowestDictIndex + dictIndexDelta : lowestIndex; + assert(hasDict == ZSTD_noDict || hasDict == ZSTD_dictMatchState); + /* init */ ip += (ip==lowest); { U32 const maxRep = (U32)(ip-lowest); @@ -103,18 +105,18 @@ size_t ZSTD_compressBlock_fast_generic( U32 const matchIndex = hashTable[h]; const BYTE* match = base + matchIndex; const ptrdiff_t repIndex = current + 1 - offset_1; - const BYTE* repBase = hasDict == ZSTD_hasDictMatchState && repIndex < (ptrdiff_t)lowestIndex ? dictBase - dictIndexDelta : base; + const BYTE* repBase = hasDict == ZSTD_dictMatchState && repIndex < (ptrdiff_t)lowestIndex ? dictBase - dictIndexDelta : base; const BYTE* repMatch = repBase + repIndex; hashTable[h] = current; /* update hash table */ - if (hasDict == ZSTD_hasDictMatchState + if (hasDict == ZSTD_dictMatchState && (((U32)((lowestIndex-1) - repIndex) >= 3) & (repIndex > dictLowestIndex) /* intentional underflow */) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < (ptrdiff_t)lowestIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, istart) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); - } else if (hasDict == ZSTD_noDictMatchState + } else if (hasDict == ZSTD_noDict && (offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip+1))) { mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; @@ -122,7 +124,7 @@ size_t ZSTD_compressBlock_fast_generic( } else { if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { - if (hasDict == ZSTD_hasDictMatchState) { + if (hasDict == ZSTD_dictMatchState) { U32 const dictMatchIndex = dictHashTable[h]; const BYTE* dictMatch = dictBase + dictMatchIndex; if (dictMatchIndex <= lowestDictIndex || @@ -164,11 +166,11 @@ size_t ZSTD_compressBlock_fast_generic( hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); /* check immediate repcode */ - if (hasDict == ZSTD_hasDictMatchState) { + if (hasDict == ZSTD_dictMatchState) { while (ip <= ilimit) { U32 const current2 = (U32)(ip-base); ptrdiff_t const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = hasDict == ZSTD_hasDictMatchState + const BYTE* repMatch2 = hasDict == ZSTD_dictMatchState && repIndex2 < (ptrdiff_t)lowestIndex ? dictBase - dictIndexDelta + repIndex2 : base + repIndex2; @@ -187,9 +189,9 @@ size_t ZSTD_compressBlock_fast_generic( } } - if (hasDict == ZSTD_noDictMatchState) { + if (hasDict == ZSTD_noDict) { while ( (ip <= ilimit) - && (hasDict != ZSTD_hasDictMatchState || ip - offset_2 >= istart) + && (hasDict != ZSTD_dictMatchState || ip - offset_2 >= istart) && ( (offset_2>0) & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { /* store sequence */ @@ -223,17 +225,17 @@ size_t ZSTD_compressBlock_fast( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_noDictMatchState); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_noDict); case 5 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_noDictMatchState); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_noDict); case 6 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_noDictMatchState); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_noDict); case 7 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_noDictMatchState); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_noDict); } } -size_t ZSTD_compressBlock_fast_extDictMatchState( +size_t ZSTD_compressBlock_fast_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) { @@ -245,13 +247,13 @@ size_t ZSTD_compressBlock_fast_extDictMatchState( { default: /* includes case 3 */ case 4 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_hasDictMatchState); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_dictMatchState); case 5 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_hasDictMatchState); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_dictMatchState); case 6 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_hasDictMatchState); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_dictMatchState); case 7 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_hasDictMatchState); + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_dictMatchState); } } diff --git a/lib/compress/zstd_fast.h b/lib/compress/zstd_fast.h index 804d36f2..7e7435f8 100644 --- a/lib/compress/zstd_fast.h +++ b/lib/compress/zstd_fast.h @@ -24,7 +24,7 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, size_t ZSTD_compressBlock_fast( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); -size_t ZSTD_compressBlock_fast_extDictMatchState( +size_t ZSTD_compressBlock_fast_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); size_t ZSTD_compressBlock_fast_extDict( diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index b58c2f1c..b0c5d065 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -591,13 +591,12 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, - int const extDict) + ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) { unsigned const minMatch = cParams->searchLength; ZSTD_blockCompressor const blockCompressor = - ZSTD_selectBlockCompressor(cParams->strategy, extDict, - ZSTD_matchState_hasDictMatchState(ms)); + ZSTD_selectBlockCompressor(cParams->strategy, + ZSTD_matchState_dictMode(ms)); BYTE const* const base = ms->window.base; /* Input bounds */ BYTE const* const istart = (BYTE const*)src; diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h index 0c3789ff..96588adb 100644 --- a/lib/compress/zstd_ldm.h +++ b/lib/compress/zstd_ldm.h @@ -62,8 +62,7 @@ size_t ZSTD_ldm_generateSequences( size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, - void const* src, size_t srcSize, - int const extDict); + void const* src, size_t srcSize); /** * ZSTD_ldm_skipSequences(): From c31ee3c7f827ebb1f3141ca4162e4721d3617752 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 2 May 2018 20:30:03 -0400 Subject: [PATCH 07/25] Fix Rep Code Initialization --- lib/compress/zstd_fast.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 8f3d33e6..067efba5 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -91,8 +91,10 @@ size_t ZSTD_compressBlock_fast_generic( assert(hasDict == ZSTD_noDict || hasDict == ZSTD_dictMatchState); /* init */ - ip += (ip==lowest); - { U32 const maxRep = (U32)(ip-lowest); + ip += (hasDict == ZSTD_noDict && ip == lowest); + { U32 const maxRep = hasDict == ZSTD_dictMatchState ? + (U32)(ip - dictLowest) : + (U32)(ip - lowest); if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } From 66bc1ca64142a63f846035c4f5539d400113e56b Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 2 May 2018 22:28:29 -0400 Subject: [PATCH 08/25] Change Cut-Off to 8 KB --- lib/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 6bbd09c0..b877a7fb 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1208,7 +1208,7 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, * context, or referencing the dictionary context from the working context * in-place. We decide here which strategy to use. */ /* TODO: pick reasonable cut-off size, handle ZSTD_CONTENTSIZE_UNKNOWN */ - int attachDict = pledgedSrcSize < 64 KB + int attachDict = pledgedSrcSize <= 8 KB && cdict->cParams.strategy == ZSTD_fast && ZSTD_equivalentCParams(cctx->appliedParams.cParams, cdict->cParams); From ca26cecc7a48e317bee986c0e34dd8a9887eb2f5 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 4 May 2018 13:08:07 -0400 Subject: [PATCH 09/25] Rename and Reformat --- lib/compress/zstd_fast.c | 163 +++++++++++++++++++-------------------- 1 file changed, 80 insertions(+), 83 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 067efba5..d4eaeb6d 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -53,23 +53,13 @@ size_t ZSTD_compressBlock_fast_generic( const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; - const U32 lowestIndex = ms->window.dictLimit; - const BYTE* const lowest = base + lowestIndex; + const U32 localLowestIndex = ms->window.dictLimit; + const BYTE* const localLowest = base + localLowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; U32 offset_1=rep[0], offset_2=rep[1]; U32 offsetSaved = 0; - /* This is all complicated by the fact that we need to handle positions - * specified in 3 different ways: by direct pointers, by indices relative - * to the working context base, and by indices relative to the dict context - * base. - * - * Hence the unfortunate collision of "lowestDictIndex", which is the lowest - * index in the dict's index space, and "dictLowestIndex", which is the same - * position in the working context's index space. - */ - const ZSTD_matchState_t* const dms = ms->dictMatchState; const U32* const dictHashTable = hasDict == ZSTD_dictMatchState ? dms->hashTable : NULL; @@ -82,19 +72,19 @@ size_t ZSTD_compressBlock_fast_generic( const BYTE* const dictEnd = hasDict == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; const U32 dictIndexDelta = hasDict == ZSTD_dictMatchState ? - lowestIndex - (dictEnd - dictBase) : + localLowestIndex - (dictEnd - dictBase) : 0; - ptrdiff_t dictLowestIndex = hasDict == ZSTD_dictMatchState ? + ptrdiff_t dictLowestLocalIndex = hasDict == ZSTD_dictMatchState ? lowestDictIndex + dictIndexDelta : - lowestIndex; + localLowestIndex; assert(hasDict == ZSTD_noDict || hasDict == ZSTD_dictMatchState); /* init */ - ip += (hasDict == ZSTD_noDict && ip == lowest); + ip += (hasDict == ZSTD_noDict && ip == localLowest); { U32 const maxRep = hasDict == ZSTD_dictMatchState ? (U32)(ip - dictLowest) : - (U32)(ip - lowest); + (U32)(ip - localLowest); if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } @@ -106,57 +96,63 @@ size_t ZSTD_compressBlock_fast_generic( U32 const current = (U32)(ip-base); U32 const matchIndex = hashTable[h]; const BYTE* match = base + matchIndex; - const ptrdiff_t repIndex = current + 1 - offset_1; - const BYTE* repBase = hasDict == ZSTD_dictMatchState && repIndex < (ptrdiff_t)lowestIndex ? dictBase - dictIndexDelta : base; + const ptrdiff_t repIndex = (ptrdiff_t)current + 1 - offset_1; + const BYTE* repBase = (hasDict == ZSTD_dictMatchState + && repIndex < (ptrdiff_t)localLowestIndex) ? + dictBase - dictIndexDelta : base; const BYTE* repMatch = repBase + repIndex; hashTable[h] = current; /* update hash table */ if (hasDict == ZSTD_dictMatchState - && (((U32)((lowestIndex-1) - repIndex) >= 3) & (repIndex > dictLowestIndex) /* intentional underflow */) + && (((U32)((localLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + & (repIndex > dictLowestLocalIndex)) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < (ptrdiff_t)lowestIndex ? dictEnd : iend; + const BYTE* repMatchEnd = repIndex < (ptrdiff_t)localLowestIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, istart) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); - } else if (hasDict == ZSTD_noDict - && (offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip+1))) { + } else if ( hasDict == ZSTD_noDict + && (offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip+1))) { mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); - } else { - if ( (matchIndex <= lowestIndex) - || (MEM_read32(match) != MEM_read32(ip)) ) { - if (hasDict == ZSTD_dictMatchState) { - U32 const dictMatchIndex = dictHashTable[h]; - const BYTE* dictMatch = dictBase + dictMatchIndex; - if (dictMatchIndex <= lowestDictIndex || - MEM_read32(dictMatch) != MEM_read32(ip)) { - assert(stepSize >= 1); - ip += ((ip-anchor) >> kSearchStrength) + stepSize; - continue; - } - - mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, istart) + 4; - { U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); - while (((ip>anchor) & (dictMatch>dictLowest)) && (ip[-1] == dictMatch[-1])) { ip--; dictMatch--; mLength++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - } - - } else { + } else if ( (matchIndex <= localLowestIndex) + || (MEM_read32(match) != MEM_read32(ip)) ) { + if (hasDict == ZSTD_dictMatchState) { + U32 const dictMatchIndex = dictHashTable[h]; + const BYTE* dictMatch = dictBase + dictMatchIndex; + if (dictMatchIndex <= lowestDictIndex || + MEM_read32(dictMatch) != MEM_read32(ip)) { assert(stepSize >= 1); ip += ((ip-anchor) >> kSearchStrength) + stepSize; continue; + } else { + /* found a dict match */ + U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); + mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, istart) + 4; + while (((ip>anchor) & (dictMatch>dictLowest)) + && (ip[-1] == dictMatch[-1])) { + ip--; dictMatch--; mLength++; + } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } } else { - U32 const offset = (U32)(ip-match); - mLength = ZSTD_count(ip+4, match+4, iend) + 4; - while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - } } + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } + } else { + /* found a regular match */ + U32 const offset = (U32)(ip-match); + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + while (((ip>anchor) & (match>localLowest)) + && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } /* match found */ ip += mLength; @@ -169,41 +165,42 @@ size_t ZSTD_compressBlock_fast_generic( /* check immediate repcode */ if (hasDict == ZSTD_dictMatchState) { - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - ptrdiff_t const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = hasDict == ZSTD_dictMatchState - && repIndex2 < (ptrdiff_t)lowestIndex ? - dictBase - dictIndexDelta + repIndex2 : - base + repIndex2; - if ( (((U32)((lowestIndex-1) - (U32)repIndex2) >= 3) & (repIndex2 > dictLowestIndex)) /* intentional overflow */ - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < (ptrdiff_t)lowestIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, istart) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); - hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; - ip += repLength2; - anchor = ip; - continue; + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + ptrdiff_t const repIndex2 = (ptrdiff_t)current2 - offset_2; + const BYTE* repMatch2 = hasDict == ZSTD_dictMatchState + && repIndex2 < (ptrdiff_t)localLowestIndex ? + dictBase - dictIndexDelta + repIndex2 : + base + repIndex2; + if ( (((U32)((localLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + & (repIndex2 > dictLowestLocalIndex)) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < (ptrdiff_t)localLowestIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, istart) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; } - break; - } } if (hasDict == ZSTD_noDict) { - while ( (ip <= ilimit) - && (hasDict != ZSTD_dictMatchState || ip - offset_2 >= istart) - && ( (offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base); - ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ + while ( (ip <= ilimit) + && (ip - offset_2 >= istart) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ } } } } /* save reps for next block */ From ae4fcf781613255b76dbf3da55f452bbc5537065 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 9 May 2018 13:14:20 -0400 Subject: [PATCH 10/25] Respond to PR Comments; Formatting/Style/Lint Fixes --- lib/compress/zstd_compress.c | 5 ++++- lib/compress/zstd_compress_internal.h | 10 +++++++--- lib/compress/zstd_fast.c | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index b877a7fb..f0576f08 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2152,11 +2152,14 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo ZSTD_compressBlock_fast_dictMatchState, NULL, NULL, NULL, NULL, NULL, NULL, NULL /* unimplemented as of yet */ } }; + ZSTD_blockCompressor selectedCompressor; ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); assert((U32)strat >= (U32)ZSTD_fast); assert((U32)strat <= (U32)ZSTD_btultra); - return blockCompressor[(int)dictMode][(U32)strat]; + selectedCompressor = blockCompressor[(int)dictMode][(U32)strat]; + assert(selectedCompressor != NULL); + return selectedCompressor; } static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 32bbe08b..913497e7 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -515,12 +515,16 @@ MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) /** * ZSTD_matchState_dictMode(): - * Does what the label says. + * Inspects the provided matchState and figures out what dictMode should be + * passed to the compressor. */ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) { - return ms->dictMatchState != NULL ? ZSTD_dictMatchState : - ZSTD_window_hasExtDict(ms->window) ? ZSTD_extDict : ZSTD_noDict; + return ms->dictMatchState != NULL ? + ZSTD_dictMatchState : + ZSTD_window_hasExtDict(ms->window) ? + ZSTD_extDict : + ZSTD_noDict; } /** diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index d4eaeb6d..60c88e57 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -72,7 +72,7 @@ size_t ZSTD_compressBlock_fast_generic( const BYTE* const dictEnd = hasDict == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; const U32 dictIndexDelta = hasDict == ZSTD_dictMatchState ? - localLowestIndex - (dictEnd - dictBase) : + localLowestIndex - (U32)(dictEnd - dictBase) : 0; ptrdiff_t dictLowestLocalIndex = hasDict == ZSTD_dictMatchState ? lowestDictIndex + dictIndexDelta : From 191fc74a51aa20d46b1ec996a7e20dc3f1dbaf5e Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 9 May 2018 15:14:12 -0400 Subject: [PATCH 11/25] Rename 'hasDict' to 'dictMode' --- lib/compress/zstd_compress_internal.h | 2 +- lib/compress/zstd_fast.c | 36 +++++++++++++-------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 913497e7..80c03433 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -256,7 +256,7 @@ typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD typedef size_t (*ZSTD_blockCompressor) ( ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e hdms); +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode); MEM_STATIC U32 ZSTD_LLcode(U32 litLength) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 60c88e57..f211f142 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -46,7 +46,7 @@ size_t ZSTD_compressBlock_fast_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize, U32 const hlog, U32 const stepSize, U32 const mls, - ZSTD_dictMode_e const hasDict) + ZSTD_dictMode_e const dictMode) { U32* const hashTable = ms->hashTable; const BYTE* const base = ms->window.base; @@ -61,28 +61,28 @@ size_t ZSTD_compressBlock_fast_generic( U32 offsetSaved = 0; const ZSTD_matchState_t* const dms = ms->dictMatchState; - const U32* const dictHashTable = hasDict == ZSTD_dictMatchState ? + const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ? dms->hashTable : NULL; - const U32 lowestDictIndex = hasDict == ZSTD_dictMatchState ? + const U32 lowestDictIndex = dictMode == ZSTD_dictMatchState ? dms->window.dictLimit : 0; - const BYTE* const dictBase = hasDict == ZSTD_dictMatchState ? + const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; - const BYTE* const dictLowest = hasDict == ZSTD_dictMatchState ? + const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ? dictBase + lowestDictIndex : NULL; - const BYTE* const dictEnd = hasDict == ZSTD_dictMatchState ? + const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; - const U32 dictIndexDelta = hasDict == ZSTD_dictMatchState ? + const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? localLowestIndex - (U32)(dictEnd - dictBase) : 0; - ptrdiff_t dictLowestLocalIndex = hasDict == ZSTD_dictMatchState ? + ptrdiff_t dictLowestLocalIndex = dictMode == ZSTD_dictMatchState ? lowestDictIndex + dictIndexDelta : localLowestIndex; - assert(hasDict == ZSTD_noDict || hasDict == ZSTD_dictMatchState); + assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); /* init */ - ip += (hasDict == ZSTD_noDict && ip == localLowest); - { U32 const maxRep = hasDict == ZSTD_dictMatchState ? + ip += (dictMode == ZSTD_noDict && ip == localLowest); + { U32 const maxRep = dictMode == ZSTD_dictMatchState ? (U32)(ip - dictLowest) : (U32)(ip - localLowest); if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; @@ -97,13 +97,13 @@ size_t ZSTD_compressBlock_fast_generic( U32 const matchIndex = hashTable[h]; const BYTE* match = base + matchIndex; const ptrdiff_t repIndex = (ptrdiff_t)current + 1 - offset_1; - const BYTE* repBase = (hasDict == ZSTD_dictMatchState + const BYTE* repBase = (dictMode == ZSTD_dictMatchState && repIndex < (ptrdiff_t)localLowestIndex) ? dictBase - dictIndexDelta : base; const BYTE* repMatch = repBase + repIndex; hashTable[h] = current; /* update hash table */ - if (hasDict == ZSTD_dictMatchState + if (dictMode == ZSTD_dictMatchState && (((U32)((localLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) & (repIndex > dictLowestLocalIndex)) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { @@ -111,14 +111,14 @@ size_t ZSTD_compressBlock_fast_generic( mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, istart) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); - } else if ( hasDict == ZSTD_noDict + } else if ( dictMode == ZSTD_noDict && (offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip+1))) { mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); } else if ( (matchIndex <= localLowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { - if (hasDict == ZSTD_dictMatchState) { + if (dictMode == ZSTD_dictMatchState) { U32 const dictMatchIndex = dictHashTable[h]; const BYTE* dictMatch = dictBase + dictMatchIndex; if (dictMatchIndex <= lowestDictIndex || @@ -164,11 +164,11 @@ size_t ZSTD_compressBlock_fast_generic( hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); /* check immediate repcode */ - if (hasDict == ZSTD_dictMatchState) { + if (dictMode == ZSTD_dictMatchState) { while (ip <= ilimit) { U32 const current2 = (U32)(ip-base); ptrdiff_t const repIndex2 = (ptrdiff_t)current2 - offset_2; - const BYTE* repMatch2 = hasDict == ZSTD_dictMatchState + const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState && repIndex2 < (ptrdiff_t)localLowestIndex ? dictBase - dictIndexDelta + repIndex2 : base + repIndex2; @@ -188,7 +188,7 @@ size_t ZSTD_compressBlock_fast_generic( } } - if (hasDict == ZSTD_noDict) { + if (dictMode == ZSTD_noDict) { while ( (ip <= ilimit) && (ip - offset_2 >= istart) && ( (offset_2>0) From 154eb0941990b97543081b03bf38c469d3a3c172 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 9 May 2018 18:40:23 -0400 Subject: [PATCH 12/25] Switch to Original Match Calc for noDict Repcode Check --- lib/compress/zstd_fast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index f211f142..48a165d3 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -112,7 +112,7 @@ size_t ZSTD_compressBlock_fast_generic( ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); } else if ( dictMode == ZSTD_noDict - && (offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip+1))) { + && (offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); From d005e5daf4323b0c67eb34391ca51445a60372a5 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 10 May 2018 13:46:19 -0400 Subject: [PATCH 13/25] Whitespace Fix --- lib/compress/zstd_fast.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 48a165d3..df2a0a05 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -130,7 +130,7 @@ size_t ZSTD_compressBlock_fast_generic( /* found a dict match */ U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, istart) + 4; - while (((ip>anchor) & (dictMatch>dictLowest)) + while (((ip>anchor) & (dictMatch>dictLowest)) && (ip[-1] == dictMatch[-1])) { ip--; dictMatch--; mLength++; } /* catch up */ @@ -162,8 +162,8 @@ size_t ZSTD_compressBlock_fast_generic( /* Fill Table */ hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); - /* check immediate repcode */ + /* check immediate repcode */ if (dictMode == ZSTD_dictMatchState) { while (ip <= ilimit) { U32 const current2 = (U32)(ip-base); From 2d598e6fedd798086894f953fbc44b189bca746a Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 10 May 2018 17:17:10 -0400 Subject: [PATCH 14/25] Force Working Context Indices Greater than Dict Indices --- lib/compress/zstd_compress.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f0576f08..e0588268 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1229,6 +1229,17 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, if (attachDict) { DEBUGLOG(4, "attaching dictionary into context"); cctx->blockState.matchState.dictMatchState = &cdict->matchState; + + /* prep working match state so dict matches never have negative indices + * when they are translated to the working context's index space. */ + if (cctx->blockState.matchState.window.dictLimit < + (U32)(cdict->matchState.window.nextSrc - cdict->matchState.window.base)) { + cctx->blockState.matchState.window.nextSrc = + cctx->blockState.matchState.window.base + + ( cdict->matchState.window.nextSrc + - cdict->matchState.window.base); + ZSTD_window_clear(&cctx->blockState.matchState.window); + } } else { DEBUGLOG(4, "copying dictionary into context"); /* copy tables */ From 1a7b34ef28d309f58ce4988071eb2b6bf4830599 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 10 May 2018 17:18:08 -0400 Subject: [PATCH 15/25] Use New Index Invariant to Simplify Conditionals --- lib/compress/zstd_fast.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index df2a0a05..dacb2637 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -74,16 +74,18 @@ size_t ZSTD_compressBlock_fast_generic( const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? localLowestIndex - (U32)(dictEnd - dictBase) : 0; - ptrdiff_t dictLowestLocalIndex = dictMode == ZSTD_dictMatchState ? - lowestDictIndex + dictIndexDelta : - localLowestIndex; assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + /* otherwise, we would get index underflow when translating a dict index + * into a local index */ + assert(dictMode != ZSTD_dictMatchState + || localLowestIndex >= (U32)(dictEnd - dictBase)); + /* init */ ip += (dictMode == ZSTD_noDict && ip == localLowest); { U32 const maxRep = dictMode == ZSTD_dictMatchState ? - (U32)(ip - dictLowest) : + (U32)(ip - localLowest + dictEnd - dictLowest) : (U32)(ip - localLowest); if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; @@ -96,23 +98,22 @@ size_t ZSTD_compressBlock_fast_generic( U32 const current = (U32)(ip-base); U32 const matchIndex = hashTable[h]; const BYTE* match = base + matchIndex; - const ptrdiff_t repIndex = (ptrdiff_t)current + 1 - offset_1; + const U32 repIndex = current + 1 - offset_1; const BYTE* repBase = (dictMode == ZSTD_dictMatchState - && repIndex < (ptrdiff_t)localLowestIndex) ? + && repIndex < localLowestIndex) ? dictBase - dictIndexDelta : base; const BYTE* repMatch = repBase + repIndex; hashTable[h] = current; /* update hash table */ if (dictMode == ZSTD_dictMatchState - && (((U32)((localLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) - & (repIndex > dictLowestLocalIndex)) + && ((U32)((localLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < (ptrdiff_t)localLowestIndex ? dictEnd : iend; + const BYTE* repMatchEnd = repIndex < localLowestIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, istart) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); } else if ( dictMode == ZSTD_noDict - && (offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); @@ -167,15 +168,14 @@ size_t ZSTD_compressBlock_fast_generic( if (dictMode == ZSTD_dictMatchState) { while (ip <= ilimit) { U32 const current2 = (U32)(ip-base); - ptrdiff_t const repIndex2 = (ptrdiff_t)current2 - offset_2; + U32 const repIndex2 = current2 - offset_2; const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState - && repIndex2 < (ptrdiff_t)localLowestIndex ? + && repIndex2 < localLowestIndex ? dictBase - dictIndexDelta + repIndex2 : base + repIndex2; - if ( (((U32)((localLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) - & (repIndex2 > dictLowestLocalIndex)) + if ( ((U32)((localLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < (ptrdiff_t)localLowestIndex ? dictEnd : iend; + const BYTE* const repEnd2 = repIndex2 < localLowestIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, istart) + 4; U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); @@ -190,7 +190,6 @@ size_t ZSTD_compressBlock_fast_generic( if (dictMode == ZSTD_noDict) { while ( (ip <= ilimit) - && (ip - offset_2 >= istart) && ( (offset_2>0) & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { /* store sequence */ From b05ae9b6086fea37b7c7edee1fc8296e01a1b521 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 15 May 2018 01:15:33 -0400 Subject: [PATCH 16/25] Refine ip Initialization to Avoid ARM Weirdness --- lib/compress/zstd_fast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index dacb2637..5c6f0dc8 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -83,7 +83,7 @@ size_t ZSTD_compressBlock_fast_generic( || localLowestIndex >= (U32)(dictEnd - dictBase)); /* init */ - ip += (dictMode == ZSTD_noDict && ip == localLowest); + ip += (ip - localLowest + dictEnd - dictLowest == 0); { U32 const maxRep = dictMode == ZSTD_dictMatchState ? (U32)(ip - localLowest + dictEnd - dictLowest) : (U32)(ip - localLowest); From 3ba70cc759550c7ac4f3c02d0dfc3de113d594e9 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 15 May 2018 13:08:03 -0400 Subject: [PATCH 17/25] Clear the Dictionary When Sliding the Window --- lib/compress/zstd_compress.c | 4 +++- lib/compress/zstd_compress_internal.h | 13 ++++++++----- lib/compress/zstd_ldm.c | 2 +- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e0588268..9f488b9a 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1240,6 +1240,7 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, - cdict->matchState.window.base); ZSTD_window_clear(&cctx->blockState.matchState.window); } + cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; } else { DEBUGLOG(4, "copying dictionary into context"); /* copy tables */ @@ -2313,8 +2314,9 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; else ms->nextToUpdate -= correction; ms->loadedDictEnd = 0; + ms->dictMatchState = NULL; } - ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd); + ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; { size_t cSize = ZSTD_compressBlock_internal(cctx, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 80c03433..a61fc374 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -520,10 +520,10 @@ MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) */ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) { - return ms->dictMatchState != NULL ? - ZSTD_dictMatchState : - ZSTD_window_hasExtDict(ms->window) ? - ZSTD_extDict : + return ZSTD_window_hasExtDict(ms->window) ? + ZSTD_extDict : + ms->dictMatchState != NULL ? + ZSTD_dictMatchState : ZSTD_noDict; } @@ -605,7 +605,8 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, */ MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window, void const* srcEnd, U32 maxDist, - U32* loadedDictEndPtr) + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) { U32 const current = (U32)((BYTE const*)srcEnd - window->base); U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0; @@ -619,6 +620,8 @@ MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window, } if (loadedDictEndPtr) *loadedDictEndPtr = 0; + if (dictMatchStatePtr) + *dictMatchStatePtr = NULL; } } diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index b0c5d065..03d1f54c 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -508,7 +508,7 @@ size_t ZSTD_ldm_generateSequences( * * Try invalidation after the sequence generation and test the * the offset against maxDist directly. */ - ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL); + ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL); /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ newLeftoverSize = ZSTD_ldm_generateSequences_internal( ldmState, sequences, params, chunkStart, chunkSize); From 7e0402e738f2e3ff6d74e63283bc7c514b67b3a4 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 15 May 2018 13:13:19 -0400 Subject: [PATCH 18/25] Also Attach Dict When Source Size is Unknown --- lib/compress/zstd_compress.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 9f488b9a..4d4e171b 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1208,7 +1208,8 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, * context, or referencing the dictionary context from the working context * in-place. We decide here which strategy to use. */ /* TODO: pick reasonable cut-off size, handle ZSTD_CONTENTSIZE_UNKNOWN */ - int attachDict = pledgedSrcSize <= 8 KB + int attachDict = ( pledgedSrcSize <= 8 KB + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN ) && cdict->cParams.strategy == ZSTD_fast && ZSTD_equivalentCParams(cctx->appliedParams.cParams, cdict->cParams); From 95bdf20a872ab7eef689799e19f83fa1462a44ba Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 15 May 2018 13:16:50 -0400 Subject: [PATCH 19/25] Moar Renames --- lib/compress/zstd_fast.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 5c6f0dc8..09b1a8ec 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -53,8 +53,8 @@ size_t ZSTD_compressBlock_fast_generic( const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; - const U32 localLowestIndex = ms->window.dictLimit; - const BYTE* const localLowest = base + localLowestIndex; + const U32 prefixLowestIndex = ms->window.dictLimit; + const BYTE* const prefixLowest = base + prefixLowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; U32 offset_1=rep[0], offset_2=rep[1]; @@ -63,16 +63,16 @@ size_t ZSTD_compressBlock_fast_generic( const ZSTD_matchState_t* const dms = ms->dictMatchState; const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ? dms->hashTable : NULL; - const U32 lowestDictIndex = dictMode == ZSTD_dictMatchState ? + const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ? dms->window.dictLimit : 0; const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ? - dictBase + lowestDictIndex : NULL; + dictBase + dictLowestIndex : NULL; const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? - localLowestIndex - (U32)(dictEnd - dictBase) : + prefixLowestIndex - (U32)(dictEnd - dictBase) : 0; assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); @@ -80,13 +80,13 @@ size_t ZSTD_compressBlock_fast_generic( /* otherwise, we would get index underflow when translating a dict index * into a local index */ assert(dictMode != ZSTD_dictMatchState - || localLowestIndex >= (U32)(dictEnd - dictBase)); + || prefixLowestIndex >= (U32)(dictEnd - dictBase)); /* init */ - ip += (ip - localLowest + dictEnd - dictLowest == 0); + ip += (ip - prefixLowest + dictEnd - dictLowest == 0); { U32 const maxRep = dictMode == ZSTD_dictMatchState ? - (U32)(ip - localLowest + dictEnd - dictLowest) : - (U32)(ip - localLowest); + (U32)(ip - prefixLowest + dictEnd - dictLowest) : + (U32)(ip - prefixLowest); if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } @@ -100,15 +100,15 @@ size_t ZSTD_compressBlock_fast_generic( const BYTE* match = base + matchIndex; const U32 repIndex = current + 1 - offset_1; const BYTE* repBase = (dictMode == ZSTD_dictMatchState - && repIndex < localLowestIndex) ? + && repIndex < prefixLowestIndex) ? dictBase - dictIndexDelta : base; const BYTE* repMatch = repBase + repIndex; hashTable[h] = current; /* update hash table */ if (dictMode == ZSTD_dictMatchState - && ((U32)((localLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < localLowestIndex ? dictEnd : iend; + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, istart) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); @@ -117,12 +117,12 @@ size_t ZSTD_compressBlock_fast_generic( mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); - } else if ( (matchIndex <= localLowestIndex) + } else if ( (matchIndex <= prefixLowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { if (dictMode == ZSTD_dictMatchState) { U32 const dictMatchIndex = dictHashTable[h]; const BYTE* dictMatch = dictBase + dictMatchIndex; - if (dictMatchIndex <= lowestDictIndex || + if (dictMatchIndex <= dictLowestIndex || MEM_read32(dictMatch) != MEM_read32(ip)) { assert(stepSize >= 1); ip += ((ip-anchor) >> kSearchStrength) + stepSize; @@ -148,7 +148,7 @@ size_t ZSTD_compressBlock_fast_generic( /* found a regular match */ U32 const offset = (U32)(ip-match); mLength = ZSTD_count(ip+4, match+4, iend) + 4; - while (((ip>anchor) & (match>localLowest)) + while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; @@ -170,12 +170,12 @@ size_t ZSTD_compressBlock_fast_generic( U32 const current2 = (U32)(ip-base); U32 const repIndex2 = current2 - offset_2; const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState - && repIndex2 < localLowestIndex ? + && repIndex2 < prefixLowestIndex ? dictBase - dictIndexDelta + repIndex2 : base + repIndex2; - if ( ((U32)((localLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < localLowestIndex ? dictEnd : iend; + const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, istart) + 4; U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); From a44ab3b475882fc5447a949e8b21e68d9ed9be5e Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 15 May 2018 15:41:37 -0400 Subject: [PATCH 20/25] Remove Out-of-Date Comment --- lib/compress/zstd_compress.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 4d4e171b..aa7fc1e8 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1207,7 +1207,6 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, /* We have a choice between copying the dictionary context into the working * context, or referencing the dictionary context from the working context * in-place. We decide here which strategy to use. */ - /* TODO: pick reasonable cut-off size, handle ZSTD_CONTENTSIZE_UNKNOWN */ int attachDict = ( pledgedSrcSize <= 8 KB || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN ) && cdict->cParams.strategy == ZSTD_fast From 9c92223468acca6e5a7082e4e09b1f6870df7aa4 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 15 May 2018 15:45:37 -0400 Subject: [PATCH 21/25] Avoid Undefined Behavior in Match Ptr Calculation --- lib/compress/zstd_fast.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 09b1a8ec..b21bc768 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -99,10 +99,10 @@ size_t ZSTD_compressBlock_fast_generic( U32 const matchIndex = hashTable[h]; const BYTE* match = base + matchIndex; const U32 repIndex = current + 1 - offset_1; - const BYTE* repBase = (dictMode == ZSTD_dictMatchState + const BYTE* repMatch = (dictMode == ZSTD_dictMatchState && repIndex < prefixLowestIndex) ? - dictBase - dictIndexDelta : base; - const BYTE* repMatch = repBase + repIndex; + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; hashTable[h] = current; /* update hash table */ if (dictMode == ZSTD_dictMatchState From 582b7f85ed25bf828854f9507d1c75c4d74962bf Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 15 May 2018 17:23:16 -0400 Subject: [PATCH 22/25] Don't Attach Empty Dict Contents In weird corner cases, they produce unexpected results... --- lib/compress/zstd_compress.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index aa7fc1e8..b1d52b9a 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1227,20 +1227,25 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, } if (attachDict) { - DEBUGLOG(4, "attaching dictionary into context"); - cctx->blockState.matchState.dictMatchState = &cdict->matchState; + if (cdict->matchState.window.nextSrc - cdict->matchState.window.base == 0) { + /* don't even attach dictionaries with no contents */ + DEBUGLOG(4, "skipping attaching empty dictionary"); + } else { + DEBUGLOG(4, "attaching dictionary into context"); + cctx->blockState.matchState.dictMatchState = &cdict->matchState; - /* prep working match state so dict matches never have negative indices - * when they are translated to the working context's index space. */ - if (cctx->blockState.matchState.window.dictLimit < - (U32)(cdict->matchState.window.nextSrc - cdict->matchState.window.base)) { - cctx->blockState.matchState.window.nextSrc = - cctx->blockState.matchState.window.base + - ( cdict->matchState.window.nextSrc - - cdict->matchState.window.base); - ZSTD_window_clear(&cctx->blockState.matchState.window); + /* prep working match state so dict matches never have negative indices + * when they are translated to the working context's index space. */ + if (cctx->blockState.matchState.window.dictLimit < + (U32)(cdict->matchState.window.nextSrc - cdict->matchState.window.base)) { + cctx->blockState.matchState.window.nextSrc = + cctx->blockState.matchState.window.base + + ( cdict->matchState.window.nextSrc + - cdict->matchState.window.base); + ZSTD_window_clear(&cctx->blockState.matchState.window); + } + cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; } - cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; } else { DEBUGLOG(4, "copying dictionary into context"); /* copy tables */ From 7ef85e061877d96991a4fc419ad96146afc8f88b Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 21 May 2018 18:27:08 -0400 Subject: [PATCH 23/25] Fixes in re Comments --- lib/compress/zstd_compress.c | 24 ++++++++++++------------ lib/compress/zstd_fast.c | 17 +++++++++++------ 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index b1d52b9a..e49046fc 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1207,11 +1207,12 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, /* We have a choice between copying the dictionary context into the working * context, or referencing the dictionary context from the working context * in-place. We decide here which strategy to use. */ - int attachDict = ( pledgedSrcSize <= 8 KB - || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN ) - && cdict->cParams.strategy == ZSTD_fast - && ZSTD_equivalentCParams(cctx->appliedParams.cParams, - cdict->cParams); + const int attachDict = ( pledgedSrcSize <= 8 KB + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN ) + && cdict->cParams.strategy == ZSTD_fast + && ZSTD_equivalentCParams(cctx->appliedParams.cParams, + cdict->cParams); + { unsigned const windowLog = params.cParams.windowLog; assert(windowLog != 0); @@ -1227,7 +1228,9 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, } if (attachDict) { - if (cdict->matchState.window.nextSrc - cdict->matchState.window.base == 0) { + const U32 cdictLen = (U32)( cdict->matchState.window.nextSrc + - cdict->matchState.window.base); + if (cdictLen == 0) { /* don't even attach dictionaries with no contents */ DEBUGLOG(4, "skipping attaching empty dictionary"); } else { @@ -1236,15 +1239,12 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, /* prep working match state so dict matches never have negative indices * when they are translated to the working context's index space. */ - if (cctx->blockState.matchState.window.dictLimit < - (U32)(cdict->matchState.window.nextSrc - cdict->matchState.window.base)) { + if (cctx->blockState.matchState.window.dictLimit < cdictLen) { cctx->blockState.matchState.window.nextSrc = - cctx->blockState.matchState.window.base + - ( cdict->matchState.window.nextSrc - - cdict->matchState.window.base); + cctx->blockState.matchState.window.base + cdictLen; ZSTD_window_clear(&cctx->blockState.matchState.window); } - cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; + cctx->blockState.matchState.loadedDictEnd = params.forceWindow ? 0 : cdictLen; } } else { DEBUGLOG(4, "copying dictionary into context"); diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index b21bc768..3bac2bdd 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -74,6 +74,7 @@ size_t ZSTD_compressBlock_fast_generic( const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? prefixLowestIndex - (U32)(dictEnd - dictBase) : 0; + const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest); assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); @@ -83,13 +84,18 @@ size_t ZSTD_compressBlock_fast_generic( || prefixLowestIndex >= (U32)(dictEnd - dictBase)); /* init */ - ip += (ip - prefixLowest + dictEnd - dictLowest == 0); - { U32 const maxRep = dictMode == ZSTD_dictMatchState ? - (U32)(ip - prefixLowest + dictEnd - dictLowest) : - (U32)(ip - prefixLowest); + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const maxRep = (U32)(ip - prefixLowest); if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } + if (dictMode == ZSTD_dictMatchState) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } /* Main Search Loop */ while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ @@ -169,8 +175,7 @@ size_t ZSTD_compressBlock_fast_generic( while (ip <= ilimit) { U32 const current2 = (U32)(ip-base); U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState - && repIndex2 < prefixLowestIndex ? + const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? dictBase - dictIndexDelta + repIndex2 : base + repIndex2; if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) From 298d24fa573842c7cc0c3530869817bc9ebe36f8 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 21 May 2018 20:12:11 -0400 Subject: [PATCH 24/25] Make loadedDictEnd an Index, not the Dict Len --- lib/compress/zstd_compress.c | 4 +++- lib/compress/zstd_compress_internal.h | 6 ++++++ lib/compress/zstd_fast.c | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e49046fc..105cea44 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1209,6 +1209,8 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, * in-place. We decide here which strategy to use. */ const int attachDict = ( pledgedSrcSize <= 8 KB || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN ) + && !params.forceWindow /* dictMatchState isn't correctly + * handled in _enforceMaxDist */ && cdict->cParams.strategy == ZSTD_fast && ZSTD_equivalentCParams(cctx->appliedParams.cParams, cdict->cParams); @@ -1244,7 +1246,7 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, cctx->blockState.matchState.window.base + cdictLen; ZSTD_window_clear(&cctx->blockState.matchState.window); } - cctx->blockState.matchState.loadedDictEnd = params.forceWindow ? 0 : cdictLen; + cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; } } else { DEBUGLOG(4, "copying dictionary into context"); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index a61fc374..a7666d5c 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -594,14 +594,20 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, * ZSTD_window_enforceMaxDist(): * Updates lowLimit so that: * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd + * * This allows a simple check that index >= lowLimit to see if index is valid. * This must be called before a block compression call, with srcEnd as the block * source end. + * * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit. * This is because dictionaries are allowed to be referenced as long as the last * byte of the dictionary is in the window, but once they are out of range, * they cannot be referenced. If loadedDictEndPtr is NULL, we use * loadedDictEnd == 0. + * + * In normal dict mode, the dict is between lowLimit and dictLimit. In + * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary + * is below them. forceWindow and dictMatchState are therefore incompatible. */ MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window, void const* srcEnd, U32 maxDist, diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 3bac2bdd..bf962a17 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -72,7 +72,7 @@ size_t ZSTD_compressBlock_fast_generic( const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? - prefixLowestIndex - (U32)(dictEnd - dictBase) : + ms->loadedDictEnd - (U32)(dictEnd - dictBase) : 0; const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest); From d9c7e67125d95d751e934870fdf611fbe0995934 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 23 May 2018 16:00:17 -0400 Subject: [PATCH 25/25] Assert that Dict and Current Window are Adjacent in Index Space --- lib/compress/zstd_compress.c | 5 +++++ lib/compress/zstd_fast.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 105cea44..00f3e789 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2210,6 +2210,11 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, ZSTD_resetSeqStore(&(zc->seqStore)); ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */ + /* a gap between an attached dict and the current window is not safe, + * they must remain adjacent, and when that stops being the case, the dict + * must be unset */ + assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); + /* limited update after a very long match */ { const BYTE* const base = ms->window.base; const BYTE* const istart = (const BYTE*)src; diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index bf962a17..3bac2bdd 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -72,7 +72,7 @@ size_t ZSTD_compressBlock_fast_generic( const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? - ms->loadedDictEnd - (U32)(dictEnd - dictBase) : + prefixLowestIndex - (U32)(dictEnd - dictBase) : 0; const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);