diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 6d8e74fe..d7058e30 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -1557,7 +1557,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long

Block level API


 
 

Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). - User will have to take in charge required information to regenerate data, such as compressed and content sizes. + But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. A few rules to respect : - Compressing and decompressing require a context structure @@ -1568,12 +1568,14 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long + copyCCtx() and copyDCtx() can be used too - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + If input is larger than a block size, it's necessary to split input data into multiple blocks - + For inputs larger than a single block, really consider using regular ZSTD_compress() instead. - Frame metadata is not that costly, and quickly becomes negligible as source size grows larger. - - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero. - In which case, nothing is produced into `dst` ! - + User must test for such outcome and deal directly with uncompressed data - + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!! + + For inputs larger than a single block, consider using regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! + ===> In which case, nothing is produced into `dst` ! + + User __must__ test for such outcome and deal directly with uncompressed data + + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. + Doing so would mess up with statistics history, leading to potential data corruption. + + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! + In case of multiple successive blocks, should some of them be uncompressed, decoder must be informed of their existence in order to follow proper history. Use ZSTD_insertBlock() for such a case. diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 81b16eac..8c96fd68 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -56,9 +56,9 @@ extern "C" { /** * Return the specified error if the condition evaluates to true. * - * In debug modes, prints additional information. In order to do that - * (particularly, printing the conditional that failed), this can't just wrap - * RETURN_ERROR(). + * In debug modes, prints additional information. + * In order to do that (particularly, printing the conditional that failed), + * this can't just wrap RETURN_ERROR(). */ #define RETURN_ERROR_IF(cond, err, ...) \ if (cond) { \ diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 1200d828..cd73db13 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1955,7 +1955,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; - size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); BYTE* seqHead; BYTE* lastNCount = NULL; @@ -1964,7 +1964,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, /* Compress literals */ { const BYTE* const literals = seqStorePtr->litStart; - size_t const litSize = seqStorePtr->lit - literals; + size_t const litSize = (size_t)(seqStorePtr->lit - literals); size_t const cSize = ZSTD_compressLiterals( &prevEntropy->huf, &nextEntropy->huf, cctxParams->cParams.strategy, @@ -1991,7 +1991,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, if (nbSeq==0) { /* Copy the old tables over as if we repeated them */ memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); - return op - ostart; + return (size_t)(op - ostart); } /* seqHead : flags for FSE encoding type */ @@ -2012,7 +2012,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, ZSTD_defaultAllowed, strategy); assert(set_basic < set_compressed && set_rle < set_compressed); assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), workspace, wkspSize); @@ -2035,7 +2035,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, OF_defaultNorm, OF_defaultNormLog, defaultPolicy, strategy); assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), workspace, wkspSize); @@ -2056,7 +2056,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy); assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), workspace, wkspSize); @@ -2070,7 +2070,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); { size_t const bitstreamSize = ZSTD_encodeSequences( - op, oend - op, + op, (size_t)(oend - op), CTable_MatchLength, mlCodeTable, CTable_OffsetBits, ofCodeTable, CTable_LitLength, llCodeTable, @@ -2097,7 +2097,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, } DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); - return op - ostart; + return (size_t)(op - ostart); } MEM_STATIC size_t @@ -2539,8 +2539,9 @@ size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - size_t const blockSizeMax = ZSTD_getBlockSize(cctx); - RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong); + DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); + { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); + RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong); } return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); } @@ -2565,7 +2566,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, if (srcSize <= HASH_READ_SIZE) return 0; while (iend - ip > HASH_READ_SIZE) { - size_t const remaining = iend - ip; + size_t const remaining = (size_t)(iend - ip); size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); const BYTE* const ichunk = ip + chunk; diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index cf99a137..751060b2 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -574,9 +574,10 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) } /** ZSTD_insertBlock() : - insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ + * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) { + DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); ZSTD_checkContinuity(dctx, blockStart); dctx->previousDstEnd = (const char*)blockStart + blockSize; return blockSize; diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 24f4859c..cbcfc084 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -79,6 +79,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ { + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected); { const BYTE* const istart = (const BYTE*) src; @@ -87,6 +88,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, switch(litEncType) { case set_repeat: + DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted); /* fall-through */ @@ -116,7 +118,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, /* 2 - 2 - 18 - 18 */ lhSize = 5; litSize = (lhc >> 4) & 0x3FFFF; - litCSize = (lhc >> 22) + (istart[4] << 10); + litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); break; } RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected); @@ -391,7 +393,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, symbolNext[s] = 1; } else { if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; - symbolNext[s] = normalizedCounter[s]; + assert(normalizedCounter[s]>=0); + symbolNext[s] = (U16)normalizedCounter[s]; } } } memcpy(dt, &DTableH, sizeof(DTableH)); } diff --git a/lib/zstd.h b/lib/zstd.h index 4a1f8161..923517c0 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1909,7 +1909,7 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); /*! Block functions produce and decode raw zstd blocks, without frame metadata. Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). - User will have to take in charge required information to regenerate data, such as compressed and content sizes. + But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. A few rules to respect : - Compressing and decompressing require a context structure @@ -1920,12 +1920,14 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + copyCCtx() and copyDCtx() can be used too - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + If input is larger than a block size, it's necessary to split input data into multiple blocks - + For inputs larger than a single block, really consider using regular ZSTD_compress() instead. - Frame metadata is not that costly, and quickly becomes negligible as source size grows larger. - - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero. - In which case, nothing is produced into `dst` ! - + User must test for such outcome and deal directly with uncompressed data - + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!! + + For inputs larger than a single block, consider using regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! + ===> In which case, nothing is produced into `dst` ! + + User __must__ test for such outcome and deal directly with uncompressed data + + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. + Doing so would mess up with statistics history, leading to potential data corruption. + + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! + In case of multiple successive blocks, should some of them be uncompressed, decoder must be informed of their existence in order to follow proper history. Use ZSTD_insertBlock() for such a case. diff --git a/tests/fuzzer.c b/tests/fuzzer.c index a663111d..2de7c009 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1907,8 +1907,10 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "test%3i : Dictionary Block compression test : ", testNb++); CHECK( ZSTD_compressBegin_usingDict(cctx, CNBuffer, dictSize, 5) ); CHECK_VAR(cSize, ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize)); - CHECK( ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize+blockSize, blockSize) ); /* just to ensure cctx history consistency */ - memcpy((char*)compressedBuffer+cSize, (char*)CNBuffer+dictSize+blockSize, blockSize); /* fake non-compressed block (without header) */ + RDG_genBuffer((char*)CNBuffer+dictSize+blockSize, blockSize, 0.0, 0.0, seed); /* create a non-compressible second block */ + { CHECK_NEWV(r, ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize+blockSize, blockSize) ); /* for cctx history consistency */ + assert(r == 0); /* non-compressible block */ } + memcpy((char*)compressedBuffer+cSize, (char*)CNBuffer+dictSize+blockSize, blockSize); /* send non-compressed block (without header) */ CHECK_VAR(cSize2, ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize+blockSize, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize+2*blockSize, blockSize)); DISPLAYLEVEL(3, "OK \n"); @@ -1927,6 +1929,7 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(1, "ZSTD_decompressBlock() with _usingDict() and after insertBlock() fails : %u, instead of %u expected \n", (unsigned)r, (unsigned)blockSize); goto _output_error; } } + assert(memcpy((char*)CNBuffer+dictSize, decodedBuffer, blockSize*3)); /* ensure regenerated content is identical to origin */ DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "test%3i : Block compression with CDict : ", testNb++);