From 43f21a600ec431aa615b09868f1f586b949607fb Mon Sep 17 00:00:00 2001 From: udayanbapat <104396882+udayanbapat@users.noreply.github.com> Date: Thu, 14 Jul 2022 11:54:34 -0700 Subject: [PATCH] Intial commit to address 3090. Added support to decompress empty block. (#3118) * Intial commit to address 3090. Added support to decompress empty block * Update zstd_decompress_block.c Addressed review comments for the case of 'set_basic' * Update lib/decompress/zstd_decompress_block.c Co-authored-by: Nick Terrell * Update lib/decompress/zstd_decompress_block.c Co-authored-by: Nick Terrell Co-authored-by: Nick Terrell --- lib/common/zstd_internal.h | 2 +- lib/compress/zstd_compress.c | 12 +++++++++--- lib/decompress/zstd_decompress_block.c | 6 ++++-- tests/golden-decompression/empty-block.zst | Bin 0 -> 11 bytes tests/playTests.sh | 5 ++++- 5 files changed, 18 insertions(+), 7 deletions(-) create mode 100644 tests/golden-decompression/empty-block.zst diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index e76b8e19..03823245 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -93,7 +93,7 @@ typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; #define ZSTD_FRAMECHECKSUMSIZE 4 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ -#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index b1f9bba8..59d441b2 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2863,7 +2863,9 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) assert(srcSize <= ZSTD_BLOCKSIZE_MAX); /* Assert that we have correctly flushed the ctx params into the ms's copy */ ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); - if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding + * additional 1. We need to revisit and change this logic to be more consistent */ + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); } else { @@ -4000,7 +4002,9 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, ZSTD_matchState_t* const ms = &cctx->blockState.matchState; U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); - RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, + /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding + * additional 1. We need to revisit and change this logic to be more consistent */ + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE + 1, dstSize_tooSmall, "not enough space to store compressed block"); if (remaining < blockSize) blockSize = remaining; @@ -6215,7 +6219,9 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, blockSize -= additionalByteAdjustment; /* If blocks are too small, emit as a nocompress block */ - if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding + * additional 1. We need to revisit and change this logic to be more consistent */ + if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); DEBUGLOG(5, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 466e83b6..6df4c384 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -135,7 +135,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, ZSTD_FALLTHROUGH; case set_compressed: - RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); + RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3"); { size_t lhSize, litSize, litCSize; U32 singleStream=0; U32 const lhlCode = (istart[0] >> 2) & 3; @@ -238,6 +238,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, break; case 3: lhSize = 3; + RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3"); litSize = MEM_readLE24(istart) >> 4; break; } @@ -280,12 +281,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, break; case 1: lhSize = 2; + RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3"); litSize = MEM_readLE16(istart) >> 4; break; case 3: lhSize = 3; + RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4"); litSize = MEM_readLE24(istart) >> 4; - RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); break; } RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); diff --git a/tests/golden-decompression/empty-block.zst b/tests/golden-decompression/empty-block.zst new file mode 100644 index 0000000000000000000000000000000000000000..2a3782aff0d1a6bab62dcdc1af29b448322a84bd GIT binary patch literal 11 QcmdPcs{dDkL6iXq028eOKL7v# literal 0 HcmV?d00001 diff --git a/tests/playTests.sh b/tests/playTests.sh index 873c3c67..50aef5df 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -418,8 +418,11 @@ println "\n===> decompression only tests " dd bs=1048576 count=1 if=/dev/zero of=tmp zstd -d -o tmp1 "$TESTDIR/golden-decompression/rle-first-block.zst" $DIFF -s tmp1 tmp -rm -f tmp* +touch tmp_empty +zstd -d -o tmp2 "$TESTDIR/golden-decompression/empty-block.zst" +$DIFF -s tmp2 tmp_empty +rm -f tmp* println "\n===> compress multiple files" println hello > tmp1