From d59cf02df0938c3147f4290f48cf3e533129406e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 14 May 2018 15:32:28 -0700 Subject: [PATCH] decompress: changed error code when input is too large ZSTD_decompress() can decompress multiple frames sent as a single input. But the input size must be the exact sum of all compressed frames, no more. In the case of a mistake on srcSize, being larger than required, ZSTD_decompress() will try to decompress a new frame after current one, and fail. As a consequence, it will issue an error code, ERROR(prefix_unknown). While the error is technically correct (the decoder could not recognise the header of _next_ frame), it's confusing, as users will believe that the first header of the first frame is wrong, which is not the case (it's correct). It makes it more difficult to understand that the error is in the source size, which is too large. This patch changes the error code provided in such a scenario. If (at least) a first frame was successfully decoded, and then following bytes are garbage values, the decoder assumes the provided input size is wrong (too large), and issue the error code ERROR(srcSize_wrong). --- lib/decompress/zstd_decompress.c | 27 +++++++++++++++++++-------- tests/fuzzer.c | 6 ++++++ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 103dcc64..32a21308 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1882,6 +1882,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) { void* const dststart = dst; + int moreThan1Frame = 0; assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ if (ddict) { @@ -1890,7 +1891,6 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, } while (srcSize >= ZSTD_frameHeaderSize_prefix) { - U32 magicNumber; #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) if (ZSTD_isLegacy(src, srcSize)) { @@ -1912,10 +1912,9 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, } #endif - magicNumber = MEM_readLE32(src); - DEBUGLOG(4, "reading magic number %08X (expecting %08X)", - (U32)magicNumber, (U32)ZSTD_MAGICNUMBER); - if (magicNumber != ZSTD_MAGICNUMBER) { + { U32 const magicNumber = MEM_readLE32(src); + DEBUGLOG(4, "reading magic number %08X (expecting %08X)", + (U32)magicNumber, (U32)ZSTD_MAGICNUMBER); if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { size_t skippableSize; if (srcSize < ZSTD_skippableHeaderSize) @@ -1927,9 +1926,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, src = (const BYTE *)src + skippableSize; srcSize -= skippableSize; continue; - } - return ERROR(prefix_unknown); - } + } } if (ddict) { /* we were called from ZSTD_decompress_usingDDict */ @@ -1943,11 +1940,25 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize); + if ( (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) + && (moreThan1Frame==1) ) { + /* at least one frame successfully completed, + * but following bytes are garbage : + * it's more likely to be a srcSize error, + * specifying more bytes than compressed size of frame(s). + * This error message replaces ERROR(prefix_unknown), + * which would be confusing, as the first header is actually correct. + * Note that one could be unlucky, it might be a corruption error instead, + * happening right at the place where we expect zstd magic bytes. + * But this is _much_ less likely than a srcSize field error. */ + return ERROR(srcSize_wrong); + } if (ZSTD_isError(res)) return res; /* no need to bound check, ZSTD_decompressFrame already has */ dst = (BYTE*)dst + res; dstCapacity -= res; } + moreThan1Frame = 1; } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */ diff --git a/tests/fuzzer.c b/tests/fuzzer.c index f9ea209f..b0c425e1 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -375,6 +375,12 @@ static int basicUnitTests(U32 seed, double compressibility) if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : decompress too large input : ", testNb++); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, compressedBufferSize); + if (!ZSTD_isError(r)) goto _output_error; + if (ZSTD_getErrorCode(r) != ZSTD_error_srcSize_wrong) goto _output_error; } + DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3d : check CCtx size after compressing empty input : ", testNb++); { ZSTD_CCtx* cctx = ZSTD_createCCtx(); size_t const r = ZSTD_compressCCtx(cctx, compressedBuffer, compressedBufferSize, NULL, 0, 19);