still fighting data corruption

due to messed up tree.
Seems to happen when reaching end of buffer.
This commit is contained in:
Yann Collet 2017-11-15 11:29:24 -08:00
parent 4202b2e8a6
commit 046ea53bef
3 changed files with 68 additions and 25 deletions

View File

@ -233,12 +233,12 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
*/ */
MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase) MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
{ {
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 2) #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6)
static const BYTE* g_start = NULL; static const BYTE* g_start = NULL;
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
{ U32 const pos = (U32)((const BYTE*)literals - g_start); { U32 const pos = (U32)((const BYTE*)literals - g_start);
g_debuglog_enable = ((pos >= 3670500) & (pos < 3673800)); g_debuglog_enable = ((pos >= 3670500) & (pos < 3673800));
DEBUGLOG(2, "Cpos%7u :%3u literals, match%3u bytes at dist.code%7u", DEBUGLOG(6, "Cpos%7u :%3u literals, match%3u bytes at dist.code%7u",
pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode); pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
} }
#endif #endif

View File

@ -50,6 +50,14 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
predictedLarge += (predictedLarge>0); predictedLarge += (predictedLarge>0);
#endif /* ZSTD_C_PREDICT */ #endif /* ZSTD_C_PREDICT */
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 2)
g_debuglog_enable = (current <= 8530000);
#endif
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 8)
g_debuglog_enable = (current == 5202593);
#endif
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
assert(ip <= iend-8); /* required for h calculation */ assert(ip <= iend-8); /* required for h calculation */
hashTable[h] = current; /* Update Hash Table */ hashTable[h] = current; /* Update Hash Table */
@ -57,6 +65,12 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
U32* const nextPtr = bt + 2*(matchIndex & btMask); U32* const nextPtr = bt + 2*(matchIndex & btMask);
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 8)
if (current == 5189477) g_debuglog_enable = 1;
#endif
DEBUGLOG(8, "index%8u evaluated during insertion of %u (presumed min matchLength:%3u) ",
matchIndex, current, (U32)matchLength);
#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
if (matchIndex == predictedSmall) { if (matchIndex == predictedSmall) {
@ -79,26 +93,48 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
#endif #endif
if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
match = base + matchIndex; match = base + matchIndex;
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
{ size_t const controlSize = ZSTD_count(ip, match, iend);
if (controlSize < matchLength) {
DEBUGLOG(2, "Warning !! => matchIndex %u while inserting %u within prefix is smaller than minimum expectation (%u<%u) !",
matchIndex, current, (U32)controlSize, (U32)matchLength);
} }
#endif
if (match[matchLength] == ip[matchLength]) if (match[matchLength] == ip[matchLength])
matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
} else { } else {
match = dictBase + matchIndex; match = dictBase + matchIndex;
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
{ size_t const controlSize = ZSTD_count_2segments(ip, match, iend, dictEnd, prefixStart);
if (controlSize < matchLength) {
DEBUGLOG(2, "Warning !! => matchIndex %u while inserting %u into _extDict is smaller than minimum expectation (%u<%u) !",
matchIndex, current, (U32)controlSize, (U32)matchLength);
} }
#endif
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
if (matchIndex+matchLength >= dictLimit) if (matchIndex+matchLength >= dictLimit)
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
} }
DEBUGLOG(8, "matchIndex%8u has %u bytes in common with %u ",
matchIndex, (U32)matchLength, current);
if (matchLength > bestLength) { if (matchLength > bestLength) {
bestLength = matchLength; bestLength = matchLength;
if (matchLength > matchEndIdx - matchIndex) if (matchLength > matchEndIdx - matchIndex)
matchEndIdx = matchIndex + (U32)matchLength; matchEndIdx = matchIndex + (U32)matchLength;
} }
if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
DEBUGLOG(8, "index %u has equal value at length %u as src : cannot determine > or <",
matchIndex, (U32)matchLength);
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
}
if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
/* match+1 is smaller than current */ /* match+1 is smaller than current */
DEBUGLOG(8, "matchIndex%8u is smaller than %u (%u < %u)",
matchIndex, current, match[matchLength], ip[matchLength]);
*smallerPtr = matchIndex; /* update smaller idx */ *smallerPtr = matchIndex; /* update smaller idx */
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
@ -106,6 +142,8 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
} else { } else {
/* match is larger than current */ /* match is larger than current */
DEBUGLOG(8, "matchIndex%8u is larger than %u (%u < %u)",
matchIndex, current, match[matchLength], ip[matchLength]);
*largerPtr = matchIndex; *largerPtr = matchIndex;
commonLengthLarger = matchLength; commonLengthLarger = matchLength;
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
@ -157,9 +195,6 @@ static size_t ZSTD_insertBtAndFindBestMatch (
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
const BYTE* match; const BYTE* match;
DEBUGLOG(8, "index%7u evaluated during insertion of %u (presumed min matchLength:%3u) ",
matchIndex, current, (U32)matchLength);
if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
match = base + matchIndex; match = base + matchIndex;
if (match[matchLength] == ip[matchLength]) if (match[matchLength] == ip[matchLength])
@ -177,8 +212,6 @@ static size_t ZSTD_insertBtAndFindBestMatch (
if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
DEBUGLOG(8, "index %u has equal value at length %u as src : cannot determine > or <",
matchIndex, (U32)matchLength);
break; /* drop, to guarantee consistency (miss a little bit of compression) */ break; /* drop, to guarantee consistency (miss a little bit of compression) */
} }
} }

View File

@ -264,15 +264,11 @@ static U32 ZSTD_insertBtAndGetAllMatches (
size_t bestLength = minMatchLen-1; size_t bestLength = minMatchLen-1;
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 8) #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 8)
static const BYTE* g_start = NULL; g_debuglog_enable = (current == 5202593);
if (g_start==NULL) g_start = base; /* note : index only works for compression within a single segment */ //g_debuglog_enable = (current == 8845622);
{ U32 const pos = (U32)(ip - g_start); //if (current == 12193408) g_debuglog_enable = 1;
g_debuglog_enable = (pos==3673728);
}
if (current == 8793162) g_debuglog_enable = 1;
#endif #endif
/* check repCode */ /* check repCode */
#if 0 #if 0
if (!extDict) /*static*/ { if (!extDict) /*static*/ {
@ -293,7 +289,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
matches[mnum].len = (U32)repLen; matches[mnum].len = (U32)repLen;
mnum++; mnum++;
if ( (repLen > ZSTD_OPT_NUM) if ( (repLen > ZSTD_OPT_NUM)
|| (ip+repLen == iLimit) ) { /* best possible */ | (ip+repLen == iLimit) ) { /* best possible */
return mnum; return mnum;
} } } } } } } }
} else { /* extDict */ } else { /* extDict */
@ -320,7 +316,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
matches[mnum].len = (U32)repLen; matches[mnum].len = (U32)repLen;
mnum++; mnum++;
if ( (repLen > ZSTD_OPT_NUM) if ( (repLen > ZSTD_OPT_NUM)
|| (ip+repLen == iLimit) ) { /* best possible */ | (ip+repLen == iLimit) ) { /* best possible */
return mnum; return mnum;
} } } } } } } }
} }
@ -352,10 +348,10 @@ static U32 ZSTD_insertBtAndGetAllMatches (
assert(mnum==0); /* no prior solution */ assert(mnum==0); /* no prior solution */
matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE; matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
matches[0].len = (U32)mlen; matches[0].len = (U32)mlen;
mnum=1; mnum = 1;
if ( (mlen > ZSTD_OPT_NUM) if ( (mlen > ZSTD_OPT_NUM)
|| (ip+mlen == iLimit) ) { /* best possible */ | (ip+mlen == iLimit) ) { /* best possible */
return mnum; return 1;
} } } } } } } }
#endif #endif
@ -373,6 +369,13 @@ static U32 ZSTD_insertBtAndGetAllMatches (
#endif #endif
if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
match = base + matchIndex; match = base + matchIndex;
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
{ size_t const controlSize = ZSTD_count(ip, match, iLimit);
if (controlSize < matchLength) {
DEBUGLOG(2, "Warning !! => matchIndex %u while searching %u within prefix is smaller than minimum expectation (%u<%u) !",
matchIndex, current, (U32)controlSize, (U32)matchLength);
} }
#endif
if (match[matchLength] == ip[matchLength]) { if (match[matchLength] == ip[matchLength]) {
matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1; matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1;
} }
@ -388,9 +391,16 @@ static U32 ZSTD_insertBtAndGetAllMatches (
#endif #endif
} else { } else {
match = dictBase + matchIndex; match = dictBase + matchIndex;
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
{ size_t const controlSize = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
if (controlSize < matchLength) {
DEBUGLOG(2, "Warning !! => matchIndex %u while searching %u into _extDict is smaller than minimum expectation (%u<%u) !",
matchIndex, current, (U32)controlSize, (U32)matchLength);
} }
#endif
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
if (matchIndex+matchLength >= dictLimit) if (matchIndex+matchLength >= dictLimit)
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ match = base + matchIndex; /* prepare for match[matchLength] */
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=8) #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=8)
if (matchIndex + 8 < dictLimit) if (matchIndex + 8 < dictLimit)
{ int i; { int i;
@ -431,7 +441,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
matchIndex, match[matchLength], ip[matchLength], (U32)matchLength); matchIndex, match[matchLength], ip[matchLength], (U32)matchLength);
{ int i; { int i;
RAWLOG(8, "index %u: ", matchIndex); RAWLOG(8, "index %u: ", matchIndex);
for (i=0; i<27; i++) RAWLOG(7," %02X ", match[i]); for (i=0; i<18; i++) RAWLOG(7," %02X ", match[i]);
RAWLOG(8, " \n"); RAWLOG(8, " \n");
} }
#endif #endif
@ -445,7 +455,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
{ int i; { int i;
const BYTE* const match2 = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; const BYTE* const match2 = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
RAWLOG(8, "index %u: ", matchIndex); RAWLOG(8, "index %u: ", matchIndex);
for (i=0; i<27; i++) RAWLOG(7," %02X ", match2[i]); for (i=0; i<18; i++) RAWLOG(7," %02X ", match2[i]);
RAWLOG(8, " \n"); RAWLOG(8, " \n");
} }
#endif #endif
@ -455,7 +465,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
matchIndex, match[matchLength], ip[matchLength], (U32)matchLength); matchIndex, match[matchLength], ip[matchLength], (U32)matchLength);
{ int i; { int i;
RAWLOG(8, "index %u: ", matchIndex); RAWLOG(8, "index %u: ", matchIndex);
for (i=0; i<27; i++) RAWLOG(7," %02X ", match[i]); for (i=0; i<18; i++) RAWLOG(7," %02X ", match[i]);
RAWLOG(8, " \n"); RAWLOG(8, " \n");
} }
#endif #endif
@ -469,7 +479,7 @@ static U32 ZSTD_insertBtAndGetAllMatches (
{ int i; { int i;
const BYTE* const match2 = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; const BYTE* const match2 = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
RAWLOG(8, "index %u: ", matchIndex); RAWLOG(8, "index %u: ", matchIndex);
for (i=0; i<27; i++) RAWLOG(7," %02X ", match2[i]); for (i=0; i<18; i++) RAWLOG(7," %02X ", match2[i]);
RAWLOG(8, " \n"); RAWLOG(8, " \n");
} }
#endif #endif