From 046ea53bef3d471a429d4b02a7fa578aed80fdea Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 15 Nov 2017 11:29:24 -0800 Subject: [PATCH] still fighting data corruption due to messed up tree. Seems to happen when reaching end of buffer. --- lib/compress/zstd_compress_internal.h | 4 +-- lib/compress/zstd_lazy.c | 45 +++++++++++++++++++++++---- lib/compress/zstd_opt.c | 44 ++++++++++++++++---------- 3 files changed, 68 insertions(+), 25 deletions(-) diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index b97cae14..a9ef7ad1 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -233,12 +233,12 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) */ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase) { -#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 2) +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6) static const BYTE* g_start = NULL; if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ { U32 const pos = (U32)((const BYTE*)literals - g_start); g_debuglog_enable = ((pos >= 3670500) & (pos < 3673800)); - DEBUGLOG(2, "Cpos%7u :%3u literals, match%3u bytes at dist.code%7u", + DEBUGLOG(6, "Cpos%7u :%3u literals, match%3u bytes at dist.code%7u", pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode); } #endif diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index bd6fd747..ee14bd4a 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -50,6 +50,14 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co predictedLarge += (predictedLarge>0); #endif /* ZSTD_C_PREDICT */ +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 2) + g_debuglog_enable = (current <= 8530000); +#endif +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 8) + g_debuglog_enable = (current == 5202593); +#endif + DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current); + assert(ip <= iend-8); /* required for h calculation */ hashTable[h] = current; /* Update Hash Table */ @@ -57,6 +65,12 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co U32* const nextPtr = bt + 2*(matchIndex & btMask); size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 8) + if (current == 5189477) g_debuglog_enable = 1; +#endif + DEBUGLOG(8, "index%8u evaluated during insertion of %u (presumed min matchLength:%3u) ", + matchIndex, current, (U32)matchLength); + #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ if (matchIndex == predictedSmall) { @@ -79,26 +93,48 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co #endif if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { match = base + matchIndex; +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2) + { size_t const controlSize = ZSTD_count(ip, match, iend); + if (controlSize < matchLength) { + DEBUGLOG(2, "Warning !! => matchIndex %u while inserting %u within prefix is smaller than minimum expectation (%u<%u) !", + matchIndex, current, (U32)controlSize, (U32)matchLength); + } } +#endif if (match[matchLength] == ip[matchLength]) matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; } else { match = dictBase + matchIndex; +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2) + { size_t const controlSize = ZSTD_count_2segments(ip, match, iend, dictEnd, prefixStart); + if (controlSize < matchLength) { + DEBUGLOG(2, "Warning !! => matchIndex %u while inserting %u into _extDict is smaller than minimum expectation (%u<%u) !", + matchIndex, current, (U32)controlSize, (U32)matchLength); + } } +#endif matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); if (matchIndex+matchLength >= dictLimit) match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ } + DEBUGLOG(8, "matchIndex%8u has %u bytes in common with %u ", + matchIndex, (U32)matchLength, current); + if (matchLength > bestLength) { bestLength = matchLength; if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; } - if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + DEBUGLOG(8, "index %u has equal value at length %u as src : cannot determine > or <", + matchIndex, (U32)matchLength); break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ /* match+1 is smaller than current */ + DEBUGLOG(8, "matchIndex%8u is smaller than %u (%u < %u)", + matchIndex, current, match[matchLength], ip[matchLength]); *smallerPtr = matchIndex; /* update smaller idx */ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ @@ -106,6 +142,8 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ } else { /* match is larger than current */ + DEBUGLOG(8, "matchIndex%8u is larger than %u (%u < %u)", + matchIndex, current, match[matchLength], ip[matchLength]); *largerPtr = matchIndex; commonLengthLarger = matchLength; if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ @@ -157,9 +195,6 @@ static size_t ZSTD_insertBtAndFindBestMatch ( size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ const BYTE* match; - DEBUGLOG(8, "index%7u evaluated during insertion of %u (presumed min matchLength:%3u) ", - matchIndex, current, (U32)matchLength); - if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { match = base + matchIndex; if (match[matchLength] == ip[matchLength]) @@ -177,8 +212,6 @@ static size_t ZSTD_insertBtAndFindBestMatch ( if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ - DEBUGLOG(8, "index %u has equal value at length %u as src : cannot determine > or <", - matchIndex, (U32)matchLength); break; /* drop, to guarantee consistency (miss a little bit of compression) */ } } diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index ad3cb6c6..b7d4ed19 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -264,15 +264,11 @@ static U32 ZSTD_insertBtAndGetAllMatches ( size_t bestLength = minMatchLen-1; #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 8) - static const BYTE* g_start = NULL; - if (g_start==NULL) g_start = base; /* note : index only works for compression within a single segment */ - { U32 const pos = (U32)(ip - g_start); - g_debuglog_enable = (pos==3673728); - } - if (current == 8793162) g_debuglog_enable = 1; + g_debuglog_enable = (current == 5202593); + //g_debuglog_enable = (current == 8845622); + //if (current == 12193408) g_debuglog_enable = 1; #endif - /* check repCode */ #if 0 if (!extDict) /*static*/ { @@ -293,7 +289,7 @@ static U32 ZSTD_insertBtAndGetAllMatches ( matches[mnum].len = (U32)repLen; mnum++; if ( (repLen > ZSTD_OPT_NUM) - || (ip+repLen == iLimit) ) { /* best possible */ + | (ip+repLen == iLimit) ) { /* best possible */ return mnum; } } } } } else { /* extDict */ @@ -320,7 +316,7 @@ static U32 ZSTD_insertBtAndGetAllMatches ( matches[mnum].len = (U32)repLen; mnum++; if ( (repLen > ZSTD_OPT_NUM) - || (ip+repLen == iLimit) ) { /* best possible */ + | (ip+repLen == iLimit) ) { /* best possible */ return mnum; } } } } } @@ -352,10 +348,10 @@ static U32 ZSTD_insertBtAndGetAllMatches ( assert(mnum==0); /* no prior solution */ matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE; matches[0].len = (U32)mlen; - mnum=1; + mnum = 1; if ( (mlen > ZSTD_OPT_NUM) - || (ip+mlen == iLimit) ) { /* best possible */ - return mnum; + | (ip+mlen == iLimit) ) { /* best possible */ + return 1; } } } } #endif @@ -373,6 +369,13 @@ static U32 ZSTD_insertBtAndGetAllMatches ( #endif if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { match = base + matchIndex; +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2) + { size_t const controlSize = ZSTD_count(ip, match, iLimit); + if (controlSize < matchLength) { + DEBUGLOG(2, "Warning !! => matchIndex %u while searching %u within prefix is smaller than minimum expectation (%u<%u) !", + matchIndex, current, (U32)controlSize, (U32)matchLength); + } } +#endif if (match[matchLength] == ip[matchLength]) { matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1; } @@ -388,9 +391,16 @@ static U32 ZSTD_insertBtAndGetAllMatches ( #endif } else { match = dictBase + matchIndex; +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2) + { size_t const controlSize = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); + if (controlSize < matchLength) { + DEBUGLOG(2, "Warning !! => matchIndex %u while searching %u into _extDict is smaller than minimum expectation (%u<%u) !", + matchIndex, current, (U32)controlSize, (U32)matchLength); + } } +#endif matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + match = base + matchIndex; /* prepare for match[matchLength] */ #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=8) if (matchIndex + 8 < dictLimit) { int i; @@ -431,7 +441,7 @@ static U32 ZSTD_insertBtAndGetAllMatches ( matchIndex, match[matchLength], ip[matchLength], (U32)matchLength); { int i; RAWLOG(8, "index %u: ", matchIndex); - for (i=0; i<27; i++) RAWLOG(7," %02X ", match[i]); + for (i=0; i<18; i++) RAWLOG(7," %02X ", match[i]); RAWLOG(8, " \n"); } #endif @@ -445,7 +455,7 @@ static U32 ZSTD_insertBtAndGetAllMatches ( { int i; const BYTE* const match2 = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; RAWLOG(8, "index %u: ", matchIndex); - for (i=0; i<27; i++) RAWLOG(7," %02X ", match2[i]); + for (i=0; i<18; i++) RAWLOG(7," %02X ", match2[i]); RAWLOG(8, " \n"); } #endif @@ -455,7 +465,7 @@ static U32 ZSTD_insertBtAndGetAllMatches ( matchIndex, match[matchLength], ip[matchLength], (U32)matchLength); { int i; RAWLOG(8, "index %u: ", matchIndex); - for (i=0; i<27; i++) RAWLOG(7," %02X ", match[i]); + for (i=0; i<18; i++) RAWLOG(7," %02X ", match[i]); RAWLOG(8, " \n"); } #endif @@ -469,7 +479,7 @@ static U32 ZSTD_insertBtAndGetAllMatches ( { int i; const BYTE* const match2 = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; RAWLOG(8, "index %u: ", matchIndex); - for (i=0; i<27; i++) RAWLOG(7," %02X ", match2[i]); + for (i=0; i<18; i++) RAWLOG(7," %02X ", match2[i]); RAWLOG(8, " \n"); } #endif