From d59cf02df0938c3147f4290f48cf3e533129406e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 14 May 2018 15:32:28 -0700 Subject: [PATCH 1/4] decompress: changed error code when input is too large ZSTD_decompress() can decompress multiple frames sent as a single input. But the input size must be the exact sum of all compressed frames, no more. In the case of a mistake on srcSize, being larger than required, ZSTD_decompress() will try to decompress a new frame after current one, and fail. As a consequence, it will issue an error code, ERROR(prefix_unknown). While the error is technically correct (the decoder could not recognise the header of _next_ frame), it's confusing, as users will believe that the first header of the first frame is wrong, which is not the case (it's correct). It makes it more difficult to understand that the error is in the source size, which is too large. This patch changes the error code provided in such a scenario. If (at least) a first frame was successfully decoded, and then following bytes are garbage values, the decoder assumes the provided input size is wrong (too large), and issue the error code ERROR(srcSize_wrong). --- lib/decompress/zstd_decompress.c | 27 +++++++++++++++++++-------- tests/fuzzer.c | 6 ++++++ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 103dcc64..32a21308 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1882,6 +1882,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) { void* const dststart = dst; + int moreThan1Frame = 0; assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ if (ddict) { @@ -1890,7 +1891,6 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, } while (srcSize >= ZSTD_frameHeaderSize_prefix) { - U32 magicNumber; #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) if (ZSTD_isLegacy(src, srcSize)) { @@ -1912,10 +1912,9 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, } #endif - magicNumber = MEM_readLE32(src); - DEBUGLOG(4, "reading magic number %08X (expecting %08X)", - (U32)magicNumber, (U32)ZSTD_MAGICNUMBER); - if (magicNumber != ZSTD_MAGICNUMBER) { + { U32 const magicNumber = MEM_readLE32(src); + DEBUGLOG(4, "reading magic number %08X (expecting %08X)", + (U32)magicNumber, (U32)ZSTD_MAGICNUMBER); if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { size_t skippableSize; if (srcSize < ZSTD_skippableHeaderSize) @@ -1927,9 +1926,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, src = (const BYTE *)src + skippableSize; srcSize -= skippableSize; continue; - } - return ERROR(prefix_unknown); - } + } } if (ddict) { /* we were called from ZSTD_decompress_usingDDict */ @@ -1943,11 +1940,25 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize); + if ( (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) + && (moreThan1Frame==1) ) { + /* at least one frame successfully completed, + * but following bytes are garbage : + * it's more likely to be a srcSize error, + * specifying more bytes than compressed size of frame(s). + * This error message replaces ERROR(prefix_unknown), + * which would be confusing, as the first header is actually correct. + * Note that one could be unlucky, it might be a corruption error instead, + * happening right at the place where we expect zstd magic bytes. + * But this is _much_ less likely than a srcSize field error. */ + return ERROR(srcSize_wrong); + } if (ZSTD_isError(res)) return res; /* no need to bound check, ZSTD_decompressFrame already has */ dst = (BYTE*)dst + res; dstCapacity -= res; } + moreThan1Frame = 1; } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */ diff --git a/tests/fuzzer.c b/tests/fuzzer.c index f9ea209f..b0c425e1 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -375,6 +375,12 @@ static int basicUnitTests(U32 seed, double compressibility) if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : decompress too large input : ", testNb++); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, compressedBufferSize); + if (!ZSTD_isError(r)) goto _output_error; + if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3d : check CCtx size after compressing empty input : ", testNb++); { ZSTD_CCtx* cctx = ZSTD_createCCtx(); size_t const r = ZSTD_compressCCtx(cctx, compressedBuffer, compressedBufferSize, NULL, 0, 19); From 30d9c84b1ab9a2b100cfe667558e975d70070736 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 15 May 2018 09:46:20 -0700 Subject: [PATCH 2/4] Fix failing Travis tests --- lib/compress/zstd_opt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 76144f73..3a48187c 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -53,7 +53,7 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, for (lit=0; lit<=MaxLit; lit++) { U32 const scaleLog = 11; /* scale to 2K */ U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->hufCTable, lit); - assert(bitCost < scaleLog); + assert(bitCost <= scaleLog); optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; optPtr->litSum += optPtr->litFreq[lit]; } } From 16bb8f1f9e8ec952458be9681a125d29627cdde9 Mon Sep 17 00:00:00 2001 From: fbrosson Date: Fri, 18 May 2018 17:05:36 +0000 Subject: [PATCH 3/4] Drop colon in asm snippet to make old versions of gcc happy. --- lib/common/cpu.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/common/cpu.h b/lib/common/cpu.h index 4eb48e39..88e0ebf4 100644 --- a/lib/common/cpu.h +++ b/lib/common/cpu.h @@ -72,8 +72,7 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { "cpuid\n\t" "popl %%ebx\n\t" : "=a"(f1a), "=c"(f1c), "=d"(f1d) - : "a"(1) - :); + : "a"(1)); } if (n >= 7) { __asm__( From 291824f49dbe50a4812c204994ae288c4c43979a Mon Sep 17 00:00:00 2001 From: fbrosson Date: Fri, 18 May 2018 18:40:11 +0000 Subject: [PATCH 4/4] __builtin_prefetch did probably not exist before gcc 3.1. --- lib/common/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/common/compiler.h b/lib/common/compiler.h index b588e110..366ed2b4 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -92,7 +92,7 @@ #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ # include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ # define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) -#elif defined(__GNUC__) +#elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) # define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) #else # define PREFETCH(ptr) /* disabled */