From 2108decb41c953a2edcc3a8cb7becceb3981bd7a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 1 Jun 2018 15:18:32 -0700 Subject: [PATCH] Fixed a nasty corruption bug recently introduce into the new dictionary mode. The bug could be reproduced with this command : ./zstreamtest -v --opaqueapi --no-big-tests -s4092 -t639 error was in function ZSTD_count_2segments() : the beginning of the 2nd segment corresponds to prefixStart and not the beginning of the current block (istart == src). This would result in comparing the wrong byte. --- lib/compress/zstd_compress.c | 3 ++ lib/compress/zstd_compress_internal.h | 5 +++ lib/compress/zstd_double_fast.c | 10 +++--- lib/compress/zstd_fast.c | 14 ++++---- lib/decompress/zstd_decompress.c | 50 +++++++++++++++------------ tests/zstreamtest.c | 16 ++++++--- 6 files changed, 58 insertions(+), 40 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 2f66acab..7a30d552 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -581,6 +581,7 @@ size_t ZSTD_CCtxParam_getParameter( size_t ZSTD_CCtx_setParametersUsingCCtxParams( ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) { + DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); if (cctx->streamStage != zcss_init) return ERROR(stage_wrong); if (cctx->cdict) return ERROR(stage_wrong); @@ -1226,6 +1227,8 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, && ZSTD_equivalentCParams(cctx->appliedParams.cParams, cdict->cParams); + DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", (U32)pledgedSrcSize); + { unsigned const windowLog = params.cParams.windowLog; assert(windowLog != 0); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 87206c1e..fd072f30 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -435,6 +435,11 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd); size_t const matchLength = ZSTD_count(ip, match, vEnd); if (match + matchLength != mEnd) return matchLength; + DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength); + DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match); + DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip); + DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart); + DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd)); return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd); } diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index fbd35404..8db622a6 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -126,7 +126,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, istart) + 4; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); goto _match_stored; @@ -183,7 +183,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( continue; _search_next_long: - + { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); U32 const matchIndexL3 = hashLong[hl3]; @@ -213,10 +213,10 @@ _search_next_long: } } } - + /* if no long +1 match, explore the short match we found */ if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { - mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, istart) + 4; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; offset = (U32)(current - matchIndexS); while (((ip>anchor) & (match>dictLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ } else { @@ -257,7 +257,7 @@ _match_stored: if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, istart) + 4; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index 3bac2bdd..a2fdec61 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -54,7 +54,7 @@ size_t ZSTD_compressBlock_fast_generic( const BYTE* ip = istart; const BYTE* anchor = istart; const U32 prefixLowestIndex = ms->window.dictLimit; - const BYTE* const prefixLowest = base + prefixLowestIndex; + const BYTE* const prefixStart = base + prefixLowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; U32 offset_1=rep[0], offset_2=rep[1]; @@ -74,7 +74,7 @@ size_t ZSTD_compressBlock_fast_generic( const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? prefixLowestIndex - (U32)(dictEnd - dictBase) : 0; - const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest); + const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictLowest); assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); @@ -86,7 +86,7 @@ size_t ZSTD_compressBlock_fast_generic( /* init */ ip += (dictAndPrefixLength == 0); if (dictMode == ZSTD_noDict) { - U32 const maxRep = (U32)(ip - prefixLowest); + U32 const maxRep = (U32)(ip - prefixStart); if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } @@ -115,7 +115,7 @@ size_t ZSTD_compressBlock_fast_generic( && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, istart) + 4; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); } else if ( dictMode == ZSTD_noDict @@ -136,7 +136,7 @@ size_t ZSTD_compressBlock_fast_generic( } else { /* found a dict match */ U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); - mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, istart) + 4; + mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; while (((ip>anchor) & (dictMatch>dictLowest)) && (ip[-1] == dictMatch[-1])) { ip--; dictMatch--; mLength++; @@ -154,7 +154,7 @@ size_t ZSTD_compressBlock_fast_generic( /* found a regular match */ U32 const offset = (U32)(ip-match); mLength = ZSTD_count(ip+4, match+4, iend) + 4; - while (((ip>anchor) & (match>prefixLowest)) + while (((ip>anchor) & (match>prefixStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; @@ -181,7 +181,7 @@ size_t ZSTD_compressBlock_fast_generic( if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, istart) + 4; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 32a21308..030b7483 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -115,8 +115,8 @@ struct ZSTD_DCtx_s const HUF_DTable* HUFptr; ZSTD_entropyDTables_t entropy; const void* previousDstEnd; /* detect continuity */ - const void* base; /* start of current segment */ - const void* vBase; /* virtual start of previous segment if it was just before current one */ + const void* prefixStart; /* start of current segment */ + const void* virtualStart; /* virtual start of previous segment if it was just before current one */ const void* dictEnd; /* end of previous segment */ size_t expected; ZSTD_frameHeader fParams; @@ -1076,7 +1076,7 @@ HINT_INLINE size_t ZSTD_execSequence(BYTE* op, BYTE* const oend, seq_t sequence, const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) { BYTE* const oLitEnd = op + sequence.litLength; size_t const sequenceLength = sequence.litLength + sequence.matchLength; @@ -1088,7 +1088,7 @@ size_t ZSTD_execSequence(BYTE* op, /* check */ if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ - if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); + if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); /* copy Literals */ ZSTD_copy8(op, *litPtr); @@ -1098,21 +1098,25 @@ size_t ZSTD_execSequence(BYTE* op, *litPtr = iLitEnd; /* update for next sequence */ /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - base)) { + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix -> go into extDict */ - if (sequence.offset > (size_t)(oLitEnd - vBase)) + if (sequence.offset > (size_t)(oLitEnd - virtualStart)) return ERROR(corruption_detected); - match = dictEnd + (match - base); + match = dictEnd + (match - prefixStart); if (match + sequence.matchLength <= dictEnd) { memmove(oLitEnd, match, sequence.matchLength); return sequenceLength; } /* span extDict & currentPrefixSegment */ + DEBUGLOG(2, "ZSTD_execSequence: found a 2-segments match") { size_t const length1 = dictEnd - match; + DEBUGLOG(2, "first part (extDict) is %zu bytes long", length1); memmove(oLitEnd, match, length1); op = oLitEnd + length1; sequence.matchLength -= length1; - match = base; + DEBUGLOG(2, "second part (prefix) is %zu bytes long", sequence.matchLength); + match = prefixStart; + DEBUGLOG(2, "first byte of 2nd part : %02X", *prefixStart); if (op > oend_w || sequence.matchLength < MINMATCH) { U32 i; for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; @@ -1355,10 +1359,10 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; const BYTE* const litEnd = litPtr + dctx->litSize; - const BYTE* const base = (const BYTE*) (dctx->base); - const BYTE* const vBase = (const BYTE*) (dctx->vBase); + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - DEBUGLOG(5, "ZSTD_decompressSequences"); + DEBUGLOG(5, "ZSTD_decompressSequences_body"); /* Regen sequences */ if (nbSeq) { @@ -1373,14 +1377,14 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { nbSeq--; { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); if (ZSTD_isError(oneSeqSize)) return oneSeqSize; op += oneSeqSize; } } /* check if reached exact end */ - DEBUGLOG(5, "ZSTD_decompressSequences: after decode loop, remaining nbSeq : %i", nbSeq); + DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); if (nbSeq) return ERROR(corruption_detected); /* save reps for next block */ { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } @@ -1499,8 +1503,8 @@ ZSTD_decompressSequencesLong_body( BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; const BYTE* const litEnd = litPtr + dctx->litSize; - const BYTE* const prefixStart = (const BYTE*) (dctx->base); - const BYTE* const dictStart = (const BYTE*) (dctx->vBase); + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); /* Regen sequences */ @@ -1702,8 +1706,8 @@ static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) { if (dst != dctx->previousDstEnd) { /* not contiguous */ dctx->dictEnd = dctx->previousDstEnd; - dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); - dctx->base = dst; + dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dst; dctx->previousDstEnd = dst; } } @@ -2171,8 +2175,8 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { dctx->dictEnd = dctx->previousDstEnd; - dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); - dctx->base = dict; + dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dict; dctx->previousDstEnd = (const char*)dict + dictSize; return 0; } @@ -2276,8 +2280,8 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) dctx->stage = ZSTDds_getFrameHeaderSize; dctx->decodedSize = 0; dctx->previousDstEnd = NULL; - dctx->base = NULL; - dctx->vBase = NULL; + dctx->prefixStart = NULL; + dctx->virtualStart = NULL; dctx->dictEnd = NULL; dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ dctx->litEntropy = dctx->fseEntropy = 0; @@ -2327,8 +2331,8 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddi CHECK_F( ZSTD_decompressBegin(dstDCtx) ); if (ddict) { /* support begin on NULL */ dstDCtx->dictID = ddict->dictID; - dstDCtx->base = ddict->dictContent; - dstDCtx->vBase = ddict->dictContent; + dstDCtx->prefixStart = ddict->dictContent; + dstDCtx->virtualStart = ddict->dictContent; dstDCtx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; dstDCtx->previousDstEnd = dstDCtx->dictEnd; if (ddict->entropyPresent) { diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index ffed955a..425fbab3 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -1671,10 +1671,13 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double /* compression init */ CHECK_Z( ZSTD_CCtx_loadDictionary(zc, NULL, 0) ); /* cancel previous dict /*/ if ((FUZ_rand(&lseed)&1) /* at beginning, to keep same nb of rand */ - && oldTestLog /* at least one test happened */ && resetAllowed) { + && oldTestLog /* at least one test happened */ + && resetAllowed) { + /* just set a compression level */ maxTestSize = FUZ_randomLength(&lseed, oldTestLog+2); if (maxTestSize >= srcBufferSize) maxTestSize = srcBufferSize-1; { int const compressionLevel = (FUZ_rand(&lseed) % 5) + 1; + DISPLAYLEVEL(5, "t%u : compression level : %i \n", testNb, compressionLevel); CHECK_Z (setCCtxParameter(zc, cctxParams, ZSTD_p_compressionLevel, compressionLevel, useOpaqueAPI) ); } } else { @@ -1698,6 +1701,8 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double { U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? ZSTD_CONTENTSIZE_UNKNOWN : maxTestSize; ZSTD_compressionParameters cParams = ZSTD_getCParams(cLevel, pledgedSrcSize, dictSize); static const U32 windowLogMax = 24; + if (dictSize) + DISPLAYLEVEL(5, "t%u: with dictionary of size : %zu \n", testNb, dictSize); /* mess with compression parameters */ cParams.windowLog += (FUZ_rand(&lseed) & 3) - 1; @@ -1707,7 +1712,7 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double cParams.searchLog += (FUZ_rand(&lseed) & 3) - 1; cParams.searchLength += (FUZ_rand(&lseed) & 3) - 1; cParams.targetLength = (U32)((cParams.targetLength + 1 ) * (0.5 + ((double)(FUZ_rand(&lseed) & 127) / 128))); - cParams = ZSTD_adjustCParams(cParams, 0, 0); + cParams = ZSTD_adjustCParams(cParams, pledgedSrcSize, dictSize); if (FUZ_rand(&lseed) & 1) { DISPLAYLEVEL(5, "t%u: windowLog : %u \n", testNb, cParams.windowLog); @@ -1766,7 +1771,7 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double /* Apply parameters */ if (useOpaqueAPI) { - DISPLAYLEVEL(6," t%u: applying CCtxParams \n", testNb); + DISPLAYLEVEL(5, "t%u: applying CCtxParams \n", testNb); CHECK_Z (ZSTD_CCtx_setParametersUsingCCtxParams(zc, cctxParams) ); } @@ -1832,7 +1837,7 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double } } crcOrig = XXH64_digest(&xxhState); cSize = outBuff.pos; - DISPLAYLEVEL(5, "Frame completed : %u bytes \n", (U32)cSize); + DISPLAYLEVEL(5, "Frame completed : %zu bytes \n", cSize); } CHECK(badParameters(zc, savedParams), "CCtx params are wrong"); @@ -1842,7 +1847,8 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest, double DISPLAYLEVEL(5, "resetting DCtx (dict:%08X) \n", (U32)(size_t)dict); CHECK_Z( ZSTD_resetDStream(zd) ); } else { - DISPLAYLEVEL(5, "using dict of size %u \n", (U32)dictSize); + if (dictSize) + DISPLAYLEVEL(5, "using dictionary of size %zu \n", dictSize); CHECK_Z( ZSTD_initDStream_usingDict(zd, dict, dictSize) ); } { size_t decompressionResult = 1;