From c991cc18287ec2d9362d1a222e0c6e00d44704c5 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 28 Jul 2016 00:55:43 +0200 Subject: [PATCH] new frame end, 32-bits checksums --- lib/common/zstd_internal.h | 2 +- lib/compress/zbuff_compress.c | 6 +- lib/compress/zstd_compress.c | 62 +++++++++++++------ lib/decompress/zstd_decompress.c | 103 +++++++++++++++++++++---------- programs/fuzzer.c | 4 +- zstd_compression_format.md | 94 +++++++++++++--------------- 6 files changed, 160 insertions(+), 111 deletions(-) diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index a68a92cc..f801959e 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -88,7 +88,7 @@ static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; -typedef enum { bt_raw, bt_rle, bt_compressed, bt_end } blockType_e; +typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ diff --git a/lib/compress/zbuff_compress.c b/lib/compress/zbuff_compress.c index 9b842f64..31c85909 100644 --- a/lib/compress/zbuff_compress.c +++ b/lib/compress/zbuff_compress.c @@ -95,6 +95,7 @@ struct ZBUFF_CCtx_s { size_t outBuffContentSize; size_t outBuffFlushedSize; ZBUFF_cStage stage; + U32 checksum; ZSTD_customMem customMem; }; /* typedef'd tp ZBUFF_CCtx within "zstd_buffered.h" */ @@ -164,6 +165,7 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, zbc->inBuffTarget = zbc->blockSize; zbc->outBuffContentSize = zbc->outBuffFlushedSize = 0; zbc->stage = ZBUFFcs_load; + zbc->checksum = params.fParams.checksumFlag > 0; return 0; /* ready to go */ } @@ -300,11 +302,11 @@ size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr) op += outSize; if (remainingToFlush) { *dstCapacityPtr = op-ostart; - return remainingToFlush + ZBUFF_endFrameSize; + return remainingToFlush + ZBUFF_endFrameSize + (zbc->checksum * 4); } /* create epilogue */ zbc->stage = ZBUFFcs_final; - zbc->outBuffContentSize = ZSTD_compressEnd(zbc->zc, zbc->outBuff, zbc->outBuffSize); /* epilogue into outBuff */ + zbc->outBuffContentSize = ZSTD_compressEnd(zbc->zc, zbc->outBuff, zbc->outBuffSize); /* epilogue into outBuff */ } /* flush epilogue */ diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e7d8c2da..dc4cb92f 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2227,9 +2227,17 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCa } +/*! ZSTD_compress_generic() : +* Compress a chunk of data into one or multiple blocks. +* All blocks will be terminated, all input will be consumed. +* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. +* Frame is supposed already started (header already produced) +* @return : compressed size, or an error code +*/ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, - const void* src, size_t srcSize) + const void* src, size_t srcSize, + U32 lastFrameChunk) { size_t blockSize = cctx->blockSize; size_t remaining = srcSize; @@ -2244,6 +2252,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, XXH64_update(&cctx->xxhState, src, srcSize); while (remaining) { + U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); size_t cSize; ZSTD_statsResetFreqs(stats); /* debug only */ @@ -2261,12 +2270,15 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, if (ZSTD_isError(cSize)) return cSize; if (cSize == 0) { /* block is not compressible */ - cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize); - if (ZSTD_isError(cSize)) return cSize; + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3); + if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); + MEM_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */ + memcpy(op + ZSTD_blockHeaderSize, ip, blockSize); + cSize = ZSTD_blockHeaderSize+blockSize; } else { - U32 const cBlockHeader24 = (U32)bt_compressed + (U32)(cSize << 2); + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); MEM_writeLE24(op, cBlockHeader24); - cSize += 3; + cSize += ZSTD_blockHeaderSize; } remaining -= blockSize; @@ -2275,6 +2287,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, op += cSize; } + if (lastFrameChunk) cctx->stage = ZSTDcs_ending; ZSTD_statsPrint(stats, cctx->params.cParams.searchLength); /* debug only */ return op-ostart; } @@ -2322,7 +2335,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize, - U32 frame) + U32 frame, U32 lastFrameChunk) { const BYTE* const ip = (const BYTE*) src; size_t fhSize = 0; @@ -2372,7 +2385,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, zc->nextSrc = ip + srcSize; { size_t const cSize = frame ? - ZSTD_compress_generic (zc, dst, dstCapacity, src, srcSize) : + ZSTD_compress_generic (zc, dst, dstCapacity, src, srcSize, lastFrameChunk) : ZSTD_compressBlock_internal (zc, dst, dstCapacity, src, srcSize); if (ZSTD_isError(cSize)) return cSize; return cSize + fhSize; @@ -2384,7 +2397,7 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 1); + return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 1, 0); } @@ -2398,7 +2411,7 @@ size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx); if (srcSize > blockSizeMax) return ERROR(srcSize_wrong); ZSTD_LOG_BLOCK("%p: ZSTD_compressBlock searchLength=%d\n", cctx->base, cctx->params.cParams.searchLength); - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0); + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0); } @@ -2572,13 +2585,14 @@ size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel) * @return : nb of bytes written into dst (or an error code) */ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) { - BYTE* op = (BYTE*)dst; + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; size_t fhSize = 0; - if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /*< not even init ! */ + if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /*< not even init ! */ /* special case : empty frame */ - if (cctx->stage==ZSTDcs_init) { + if (cctx->stage == ZSTDcs_init) { fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0); if (ZSTD_isError(fhSize)) return fhSize; dstCapacity -= fhSize; @@ -2586,16 +2600,24 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) cctx->stage = ZSTDcs_ongoing; } - /* frame epilogue */ - if (dstCapacity < ZSTD_blockHeaderSize) return ERROR(dstSize_tooSmall); - { U32 const checksum = cctx->params.fParams.checksumFlag ? - (U32)(XXH64_digest(&cctx->xxhState) >> 11) : - 0; - MEM_writeLE24(op, (U32)bt_end + (checksum << 2)); + if (cctx->stage != ZSTDcs_ending) { + /* write one last empty block, make it the "last" block */ + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; + if (dstCapacity<4) return ERROR(dstSize_tooSmall); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + } + + if (cctx->params.fParams.checksumFlag) { + U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); + if (dstCapacity<4) return ERROR(dstSize_tooSmall); + MEM_writeLE32(op, checksum); + op += 4; } cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ - return ZSTD_blockHeaderSize+fhSize; + return op-ostart; } @@ -2635,7 +2657,7 @@ static size_t ZSTD_compress_internal (ZSTD_CCtx* ctx, if(ZSTD_isError(errorCode)) return errorCode; } /* body (compression) */ - { size_t const oSize = ZSTD_compressContinue (ctx, op, dstCapacity, src, srcSize); + { size_t const oSize = ZSTD_compressContinue_internal(ctx, op, dstCapacity, src, srcSize, 1, 1); if(ZSTD_isError(oSize)) return oSize; op += oSize; dstCapacity -= oSize; } diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 2940dd68..60f7568d 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -105,6 +105,7 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } ***************************************************************/ typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, + ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; struct ZSTD_DCtx_s @@ -131,6 +132,7 @@ struct ZSTD_DCtx_s ZSTD_customMem customMem; size_t litBufSize; size_t litSize; + size_t rleSize; BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH]; BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; }; /* typedef'd to ZSTD_DCtx within "zstd_static.h" */ @@ -318,6 +320,7 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t sr typedef struct { blockType_e blockType; + U32 lastBlock; U32 origSize; } blockProperties_t; @@ -327,11 +330,12 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bp { if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); { U32 const cBlockHeader = MEM_readLE24(src); - U32 const cSize = cBlockHeader >> 2; - bpPtr->blockType = (blockType_e)(cBlockHeader & 3); + U32 const cSize = cBlockHeader >> 3; + bpPtr->lastBlock = cBlockHeader & 1; + bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); bpPtr->origSize = cSize; /* only useful for RLE */ - if (bpPtr->blockType == bt_end) return 0; if (bpPtr->blockType == bt_rle) return 1; + if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected); return cSize; } } @@ -345,6 +349,14 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, } +static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, size_t regenSize) +{ + if (srcSize != 1) return ERROR(srcSize_wrong); + if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall); + memset(dst, *(const BYTE*)src, regenSize); + return regenSize; +} + /*! ZSTD_decodeLiteralsBlock() : @return : nb of bytes read from src (< srcSize ) */ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, @@ -889,29 +901,29 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, case bt_rle : decodedSize = ZSTD_generateNxBytes(op, oend-op, *ip, blockProperties.origSize); break; - case bt_end : - /* end of frame */ - if (remainingSize) return ERROR(srcSize_wrong); - if (dctx->fParams.checksumFlag) { - U64 const h64 = XXH64_digest(&dctx->xxhState); - U32 const h32 = (U32)(h64>>11) & ((1<<22)-1); - U32 const check32 = MEM_readLE24(src) >> 2; - if (check32 != h32) return ERROR(checksum_wrong); - } - decodedSize = 0; - break; + case bt_reserved : default: - return ERROR(GENERIC); /* impossible */ + return ERROR(corruption_detected); } - if (blockProperties.blockType == bt_end) break; /* bt_end */ if (ZSTD_isError(decodedSize)) return decodedSize; if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, op, decodedSize); op += decodedSize; ip += cBlockSize; remainingSize -= cBlockSize; + if (blockProperties.lastBlock) break; } + if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); + U32 checkRead; + if (remainingSize<4) return ERROR(checksum_wrong); + checkRead = MEM_readLE32(ip); + if (checkRead != checkCalc) return ERROR(checksum_wrong); + remainingSize -= 4; + } + + if (remainingSize) return ERROR(srcSize_wrong); return op-ostart; } @@ -1022,22 +1034,29 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c { blockProperties_t bp; size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); if (ZSTD_isError(cBlockSize)) return cBlockSize; - if (bp.blockType == bt_end) { + dctx->expected = cBlockSize; + dctx->bType = bp.blockType; + dctx->rleSize = bp.origSize; + if (cBlockSize) { + dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; + return 0; + } + /* empty block */ + if (bp.lastBlock) { if (dctx->fParams.checksumFlag) { - U64 const h64 = XXH64_digest(&dctx->xxhState); - U32 const h32 = (U32)(h64>>11) & ((1<<22)-1); - U32 const check32 = MEM_readLE24(src) >> 2; - if (check32 != h32) return ERROR(checksum_wrong); + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* end of frame */ + dctx->stage = ZSTDds_getFrameHeaderSize; } - dctx->expected = 0; - dctx->stage = ZSTDds_getFrameHeaderSize; } else { - dctx->expected = cBlockSize; - dctx->bType = bp.blockType; - dctx->stage = ZSTDds_decompressBlock; + dctx->expected = 3; /* go directly to next header */ + dctx->stage = ZSTDds_decodeBlockHeader; } return 0; } + case ZSTDds_decompressLastBlock: case ZSTDds_decompressBlock: { size_t rSize; switch(dctx->bType) @@ -1049,21 +1068,37 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); break; case bt_rle : - return ERROR(GENERIC); /* not yet handled */ - break; - case bt_end : /* should never happen (filtered at phase 1) */ - rSize = 0; + rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize); break; + case bt_reserved : /* should never happen */ default: - return ERROR(GENERIC); /* impossible */ + return ERROR(corruption_detected); } - dctx->stage = ZSTDds_decodeBlockHeader; - dctx->expected = ZSTD_blockHeaderSize; - dctx->previousDstEnd = (char*)dst + rSize; if (ZSTD_isError(rSize)) return rSize; if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); + + if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ + if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ + dctx->expected = 0; + dctx->stage = ZSTDds_checkChecksum; + } + dctx->expected = 0; /* ends here */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } else { + dctx->stage = ZSTDds_decodeBlockHeader; + dctx->expected = ZSTD_blockHeaderSize; + dctx->previousDstEnd = (char*)dst + rSize; + } return rSize; } + case ZSTDds_checkChecksum: + { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); /* srcSize == 4, guaranteed by dctx->expected */ + if (check32 != h32) return ERROR(checksum_wrong); + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + } case ZSTDds_decodeSkippableHeader: { memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_min, src, dctx->expected); dctx->expected = MEM_readLE32(dctx->headerBuffer + 4); diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 3778f12b..33cfbcb2 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -145,8 +145,8 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(4, "OK \n"); DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, (U32)CNBuffSize); - CHECKPLUS( r , ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize), - if (r != CNBuffSize) goto _output_error); + { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize); + if (r != CNBuffSize) goto _output_error; } DISPLAYLEVEL(4, "OK \n"); DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++); diff --git a/zstd_compression_format.md b/zstd_compression_format.md index efbf13cd..79f9620e 100644 --- a/zstd_compression_format.md +++ b/zstd_compression_format.md @@ -100,9 +100,9 @@ General Structure of Zstandard Frame format ------------------------------------------- The structure of a single Zstandard frame is following: -| `Magic_Number` | `Frame_Header` |`Data_Block`| [More data blocks] |`End_Marker`| -|:--------------:|:--------------:|:----------:| ------------------ |:----------:| -| 4 bytes | 2-14 bytes | n bytes | | 3 bytes | +| `Magic_Number` | `Frame_Header` |`Data_Block`| [More data blocks] | [`Content_Checksum`] | +|:--------------:|:--------------:|:----------:| ------------------ |:--------------------:| +| 4 bytes | 2-14 bytes | n bytes | | 0-4 bytes | __`Magic_Number`__ @@ -118,27 +118,13 @@ __`Data_Block`__ Detailed in [next chapter](#the-structure-of-data_block). That’s where compressed data is stored. -__`End_Marker`__ +__`Content_Checksum`__ -The flow of blocks ends when the last block header brings an _end signal_. -This last block header may optionally host a `Content_Checksum`. - -##### __`Content_Checksum`__ - -`Content_Checksum` allow to verify that frame content has been regenerated correctly. +An optional 32-bit checksum, only present if `Content_Checksum_flag` is set. The content checksum is the result of [xxh64() hash function](https://www.xxHash.com) digesting the original (decoded) data as input, and a seed of zero. -Bits from 11 to 32 (included) are extracted to form a 22 bits checksum -stored within `End_Marker`. -``` -mask22bits = (1<<22)-1; -contentChecksum = (XXH64(content, size, 0) >> 11) & mask22bits; -``` -`Content_Checksum` is only present when its associated flag -is set in the frame descriptor. -Its usage is optional. - +The low 4 bytes of the checksum are stored in little endian format. The structure of `Frame_Header` @@ -172,23 +158,25 @@ __`Frame_Content_Size_flag`__ This is a 2-bits flag (`= Frame_Header_Descriptor >> 6`), specifying if decompressed data size is provided within the header. -The `Value` can be converted to `Field_Size` that is number of bytes used by `Frame_Content_Size` according to the following table: +The `Flag_Value` can be converted into `Field_Size`, +which is the number of bytes used by `Frame_Content_Size` +according to the following table: -| `Value` | 0 | 1 | 2 | 3 | +|`Flag_Value`| 0 | 1 | 2 | 3 | | ---------- | --- | --- | --- | --- | |`Field_Size`| 0-1 | 2 | 4 | 8 | -The meaning of `Value` equal to `0` depends on `Single_Segment_flag` : -it either means `0` (size not provided) _if_ the `Window_Descriptor` byte is present, -or `1` (frame content size <= 255 bytes) otherwise. +When `Flag_Value` is `0`, `Field_Size` depends on `Single_Segment_flag` : +if `Single_Segment_flag` is set, `Field_Size` is 1. +Otherwise, `Field_Size` is 0 (content size not provided). __`Single_Segment_flag`__ If this flag is set, -data shall be regenerated within a single continuous memory segment. +data must be regenerated within a single continuous memory segment. -In this case, `Window_Descriptor` byte __is not present__, -but `Frame_Content_Size_flag` field necessarily is. +In this case, `Frame_Content_Size` is necessarily present, +but `Window_Descriptor` byte is skipped. As a consequence, the decoder must allocate a memory segment of size equal or bigger than `Frame_Content_Size`. @@ -205,7 +193,7 @@ depending on local limitations. __`Unused_bit`__ The value of this bit should be set to zero. -A decoder compliant with this specification version should not interpret it. +A decoder compliant with this specification version shall not interpret it. It might be used in a future version, to signal a property which is not mandatory to properly decode the frame. @@ -215,13 +203,12 @@ This bit is reserved for some future feature. Its value _must be zero_. A decoder compliant with this specification version must ensure it is not set. This bit may be used in a future revision, -to signal a feature that must be interpreted in order to decode the frame. +to signal a feature that must be interpreted to decode the frame correctly. __`Content_Checksum_flag`__ -If this flag is set, a content checksum will be present within `End_Marker`. -The checksum is a 22 bits value extracted from the XXH64() of data, -and stored within `End_Marker`. See [`Content_Checksum`](#content_checksum) . +If this flag is set, a 32-bits `Content_Checksum` will be present at frame's end. +See `Content_Checksum` paragraph. __`Dictionary_ID_flag`__ @@ -236,10 +223,10 @@ It also specifies the size of this field. ### `Window_Descriptor` Provides guarantees on maximum back-reference distance -that will be present within compressed data. -This information is useful for decoders to allocate enough memory. +that will be used within compressed data. +This information is important for decoders to allocate enough memory. -The `Window_Descriptor` byte is optional. It should be absent if `Single_Segment_flag` is set. +The `Window_Descriptor` byte is optional. It is absent when `Single_Segment_flag` is set. In this case, the maximum back-reference distance is the content size itself, which can be any value from 1 to 2^64-1 bytes (16 EB). @@ -265,8 +252,8 @@ a decoder can refuse a compressed frame which requests a memory size beyond decoder's authorized range. For improved interoperability, -decoders are recommended to be compatible with window sizes of 8 MB. -Encoders are recommended to not request more than 8 MB. +decoders are recommended to be compatible with window sizes of 8 MB, +and encoders are recommended to not request more than 8 MB. It's merely a recommendation though, decoders are free to support larger or lower limits, depending on local limitations. @@ -313,30 +300,34 @@ When `Field_Size` is 1, 4 or 8 bytes, the value is read directly. When `Field_Size` is 2, _the offset of 256 is added_. It's allowed to represent a small size (for example `18`) using any compatible variant. -In order to preserve decoder from unreasonable memory requirement, -a decoder can refuse a compressed frame -which requests a memory size beyond decoder's authorized range. - The structure of `Data_Block` ----------------------------- The structure of `Data_Block` is following: -| `Block_Type` | `Block_Size` | `Block_Content` | -|:------------:|:------------:|:---------------:| -| 2 bits | 22 bits | n bytes | +| `Last_Block` | `Block_Type` | `Block_Size` | `Block_Content` | +|:------------:|:------------:|:------------:|:---------------:| +| 1 bit | 2 bits | 21 bits | n bytes | + +The block header uses 3-bytes. + +__`Last_Block`__ + +The lowest bit signals if this block is the last one. +Frame ends right after this block. +It may be followed by an optional `Content_Checksum` . __`Block_Type` and `Block_Size`__ -The block header uses 3-bytes, format is __little-endian__. -The 2 highest bits represent the `Block_Type`, -while the remaining 22 bits represent the (compressed) `Block_Size`. +The next 2 bits represent the `Block_Type`, +while the remaining 21 bits represent the `Block_Size`. +Format is __little-endian__. There are 4 block types : | Value | 0 | 1 | 2 | 3 | | ------------ | ----------- | ----------- | ------------------ | --------- | -| `Block_Type` | `Raw_Block` | `RLE_Block` | `Compressed_Block` | `EndMark` | +| `Block_Type` | `Raw_Block` | `RLE_Block` | `Compressed_Block` | `Reserved`| - `Raw_Block` - this is an uncompressed block. `Block_Size` is the number of bytes to read and copy. @@ -348,9 +339,8 @@ There are 4 block types : `Block_Size` is the compressed size. Decompressed size is unknown, but its maximum possible value is guaranteed (see below) -- `EndMark` - this is not a block. It signals the end of the frame. - The rest of the field may be optionally filled by a checksum - (see [`Content_Checksum`](#content_checksum)). +- `Reserved` - this is not a block. + This value cannot be used with current version of this specification. Block sizes must respect a few rules : - In compressed mode, compressed size if always strictly `< decompressed size`.