From 308047eb5dbb1659961835fce90db4883ed386a6 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 3 Aug 2017 14:05:01 -0700 Subject: [PATCH] Fix compression failure on incompressible data If the destination buffer is the minimum allowed size in `ZSTD_compressSequences()` (2^17), then if the block isn't compressible compression might fail with `dstSize_tooSmall`, when it should instead emit a raw uncompressed block. Additionally, `ZSTD_compressLiterals()` implicitly called `ZSTD_noCompressLiterals()` if Huffman compression failed. Make that explicit. --- lib/compress/zstd_compress.c | 52 +++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index a73763d2..a70a6668 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -942,7 +942,7 @@ static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t * entropy, else { entropy->hufCTable_repeatMode = HUF_repeat_check; } /* now have a table to reuse */ } - if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) { + if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { entropy->hufCTable_repeatMode = HUF_repeat_none; return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); } @@ -1156,11 +1156,10 @@ MEM_STATIC size_t ZSTD_encodeSequences(void* dst, size_t dstCapacity, } } -MEM_STATIC size_t ZSTD_compressSequences (seqStore_t* seqStorePtr, +MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, ZSTD_entropyCTables_t* entropy, ZSTD_compressionParameters const* cParams, - void* dst, size_t dstCapacity, - size_t srcSize) + void* dst, size_t dstCapacity) { const int longOffsets = cParams->windowLog > STREAM_ACCUMULATOR_MIN; U32 count[MaxSeq+1]; @@ -1195,7 +1194,7 @@ MEM_STATIC size_t ZSTD_compressSequences (seqStore_t* seqStorePtr, if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; - if (nbSeq==0) goto _check_compressibility; + if (nbSeq==0) return op - ostart; /* seqHead : flags for FSE encoding type */ seqHead = op++; @@ -1244,23 +1243,40 @@ MEM_STATIC size_t ZSTD_compressSequences (seqStore_t* seqStorePtr, op += streamSize; } + return op - ostart; +} - /* check compressibility */ -_check_compressibility: - { size_t const minGain = ZSTD_minGain(srcSize); - size_t const maxCSize = srcSize - minGain; - if ((size_t)(op-ostart) >= maxCSize) { - entropy->hufCTable_repeatMode = HUF_repeat_none; - entropy->offcode_repeatMode = FSE_repeat_none; - entropy->matchlength_repeatMode = FSE_repeat_none; - entropy->litlength_repeatMode = FSE_repeat_none; - return 0; - } } +MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, + ZSTD_entropyCTables_t* entropy, + ZSTD_compressionParameters const* cParams, + void* dst, size_t dstCapacity, + size_t srcSize) +{ + size_t const cSize = ZSTD_compressSequences_internal(seqStorePtr, entropy, cParams, + dst, dstCapacity); + size_t const minGain = ZSTD_minGain(srcSize); + size_t const maxCSize = srcSize - minGain; + /* If the srcSize <= dstCapacity, then there is enough space to write a + * raw uncompressed block. Since we ran out of space, the block must not + * be compressible, so fall back to a raw uncompressed block. + */ + int const uncompressibleError = cSize == ERROR(dstSize_tooSmall) && srcSize <= dstCapacity; + + if (ZSTD_isError(cSize) && !uncompressibleError) + return cSize; + /* Check compressibility */ + if (cSize >= maxCSize || uncompressibleError) { + entropy->hufCTable_repeatMode = HUF_repeat_none; + entropy->offcode_repeatMode = FSE_repeat_none; + entropy->matchlength_repeatMode = FSE_repeat_none; + entropy->litlength_repeatMode = FSE_repeat_none; + return 0; + } + assert(!ZSTD_isError(cSize)); /* confirm repcodes */ { int i; for (i=0; irep[i] = seqStorePtr->repToConfirm[i]; } - - return op - ostart; + return cSize; }