new frame end, 32-bits checksums

This commit is contained in:
Yann Collet 2016-07-28 00:55:43 +02:00
parent d4180cad9c
commit c991cc1828
6 changed files with 160 additions and 111 deletions

View File

@ -88,7 +88,7 @@ static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
typedef enum { bt_raw, bt_rle, bt_compressed, bt_end } blockType_e; typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */

View File

@ -95,6 +95,7 @@ struct ZBUFF_CCtx_s {
size_t outBuffContentSize; size_t outBuffContentSize;
size_t outBuffFlushedSize; size_t outBuffFlushedSize;
ZBUFF_cStage stage; ZBUFF_cStage stage;
U32 checksum;
ZSTD_customMem customMem; ZSTD_customMem customMem;
}; /* typedef'd tp ZBUFF_CCtx within "zstd_buffered.h" */ }; /* typedef'd tp ZBUFF_CCtx within "zstd_buffered.h" */
@ -164,6 +165,7 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
zbc->inBuffTarget = zbc->blockSize; zbc->inBuffTarget = zbc->blockSize;
zbc->outBuffContentSize = zbc->outBuffFlushedSize = 0; zbc->outBuffContentSize = zbc->outBuffFlushedSize = 0;
zbc->stage = ZBUFFcs_load; zbc->stage = ZBUFFcs_load;
zbc->checksum = params.fParams.checksumFlag > 0;
return 0; /* ready to go */ return 0; /* ready to go */
} }
@ -300,11 +302,11 @@ size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
op += outSize; op += outSize;
if (remainingToFlush) { if (remainingToFlush) {
*dstCapacityPtr = op-ostart; *dstCapacityPtr = op-ostart;
return remainingToFlush + ZBUFF_endFrameSize; return remainingToFlush + ZBUFF_endFrameSize + (zbc->checksum * 4);
} }
/* create epilogue */ /* create epilogue */
zbc->stage = ZBUFFcs_final; zbc->stage = ZBUFFcs_final;
zbc->outBuffContentSize = ZSTD_compressEnd(zbc->zc, zbc->outBuff, zbc->outBuffSize); /* epilogue into outBuff */ zbc->outBuffContentSize = ZSTD_compressEnd(zbc->zc, zbc->outBuff, zbc->outBuffSize); /* epilogue into outBuff */
} }
/* flush epilogue */ /* flush epilogue */

View File

@ -2227,9 +2227,17 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCa
} }
/*! ZSTD_compress_generic() :
* Compress a chunk of data into one or multiple blocks.
* All blocks will be terminated, all input will be consumed.
* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
* Frame is supposed already started (header already produced)
* @return : compressed size, or an error code
*/
static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize) const void* src, size_t srcSize,
U32 lastFrameChunk)
{ {
size_t blockSize = cctx->blockSize; size_t blockSize = cctx->blockSize;
size_t remaining = srcSize; size_t remaining = srcSize;
@ -2244,6 +2252,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
XXH64_update(&cctx->xxhState, src, srcSize); XXH64_update(&cctx->xxhState, src, srcSize);
while (remaining) { while (remaining) {
U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
size_t cSize; size_t cSize;
ZSTD_statsResetFreqs(stats); /* debug only */ ZSTD_statsResetFreqs(stats); /* debug only */
@ -2261,12 +2270,15 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
if (ZSTD_isError(cSize)) return cSize; if (ZSTD_isError(cSize)) return cSize;
if (cSize == 0) { /* block is not compressible */ if (cSize == 0) { /* block is not compressible */
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize); U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3);
if (ZSTD_isError(cSize)) return cSize; if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
MEM_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */
memcpy(op + ZSTD_blockHeaderSize, ip, blockSize);
cSize = ZSTD_blockHeaderSize+blockSize;
} else { } else {
U32 const cBlockHeader24 = (U32)bt_compressed + (U32)(cSize << 2); U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
MEM_writeLE24(op, cBlockHeader24); MEM_writeLE24(op, cBlockHeader24);
cSize += 3; cSize += ZSTD_blockHeaderSize;
} }
remaining -= blockSize; remaining -= blockSize;
@ -2275,6 +2287,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
op += cSize; op += cSize;
} }
if (lastFrameChunk) cctx->stage = ZSTDcs_ending;
ZSTD_statsPrint(stats, cctx->params.cParams.searchLength); /* debug only */ ZSTD_statsPrint(stats, cctx->params.cParams.searchLength); /* debug only */
return op-ostart; return op-ostart;
} }
@ -2322,7 +2335,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc, static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
U32 frame) U32 frame, U32 lastFrameChunk)
{ {
const BYTE* const ip = (const BYTE*) src; const BYTE* const ip = (const BYTE*) src;
size_t fhSize = 0; size_t fhSize = 0;
@ -2372,7 +2385,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc,
zc->nextSrc = ip + srcSize; zc->nextSrc = ip + srcSize;
{ size_t const cSize = frame ? { size_t const cSize = frame ?
ZSTD_compress_generic (zc, dst, dstCapacity, src, srcSize) : ZSTD_compress_generic (zc, dst, dstCapacity, src, srcSize, lastFrameChunk) :
ZSTD_compressBlock_internal (zc, dst, dstCapacity, src, srcSize); ZSTD_compressBlock_internal (zc, dst, dstCapacity, src, srcSize);
if (ZSTD_isError(cSize)) return cSize; if (ZSTD_isError(cSize)) return cSize;
return cSize + fhSize; return cSize + fhSize;
@ -2384,7 +2397,7 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* zc,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize) const void* src, size_t srcSize)
{ {
return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 1); return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 1, 0);
} }
@ -2398,7 +2411,7 @@ size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const
size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx); size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx);
if (srcSize > blockSizeMax) return ERROR(srcSize_wrong); if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
ZSTD_LOG_BLOCK("%p: ZSTD_compressBlock searchLength=%d\n", cctx->base, cctx->params.cParams.searchLength); ZSTD_LOG_BLOCK("%p: ZSTD_compressBlock searchLength=%d\n", cctx->base, cctx->params.cParams.searchLength);
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0); return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0);
} }
@ -2572,13 +2585,14 @@ size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel)
* @return : nb of bytes written into dst (or an error code) */ * @return : nb of bytes written into dst (or an error code) */
size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
{ {
BYTE* op = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
BYTE* op = ostart;
size_t fhSize = 0; size_t fhSize = 0;
if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /*< not even init ! */ if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /*< not even init ! */
/* special case : empty frame */ /* special case : empty frame */
if (cctx->stage==ZSTDcs_init) { if (cctx->stage == ZSTDcs_init) {
fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0); fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0);
if (ZSTD_isError(fhSize)) return fhSize; if (ZSTD_isError(fhSize)) return fhSize;
dstCapacity -= fhSize; dstCapacity -= fhSize;
@ -2586,16 +2600,24 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
cctx->stage = ZSTDcs_ongoing; cctx->stage = ZSTDcs_ongoing;
} }
/* frame epilogue */ if (cctx->stage != ZSTDcs_ending) {
if (dstCapacity < ZSTD_blockHeaderSize) return ERROR(dstSize_tooSmall); /* write one last empty block, make it the "last" block */
{ U32 const checksum = cctx->params.fParams.checksumFlag ? U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
(U32)(XXH64_digest(&cctx->xxhState) >> 11) : if (dstCapacity<4) return ERROR(dstSize_tooSmall);
0; MEM_writeLE32(op, cBlockHeader24);
MEM_writeLE24(op, (U32)bt_end + (checksum << 2)); op += ZSTD_blockHeaderSize;
dstCapacity -= ZSTD_blockHeaderSize;
}
if (cctx->params.fParams.checksumFlag) {
U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
if (dstCapacity<4) return ERROR(dstSize_tooSmall);
MEM_writeLE32(op, checksum);
op += 4;
} }
cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ cctx->stage = ZSTDcs_created; /* return to "created but no init" status */
return ZSTD_blockHeaderSize+fhSize; return op-ostart;
} }
@ -2635,7 +2657,7 @@ static size_t ZSTD_compress_internal (ZSTD_CCtx* ctx,
if(ZSTD_isError(errorCode)) return errorCode; } if(ZSTD_isError(errorCode)) return errorCode; }
/* body (compression) */ /* body (compression) */
{ size_t const oSize = ZSTD_compressContinue (ctx, op, dstCapacity, src, srcSize); { size_t const oSize = ZSTD_compressContinue_internal(ctx, op, dstCapacity, src, srcSize, 1, 1);
if(ZSTD_isError(oSize)) return oSize; if(ZSTD_isError(oSize)) return oSize;
op += oSize; op += oSize;
dstCapacity -= oSize; } dstCapacity -= oSize; }

View File

@ -105,6 +105,7 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
***************************************************************/ ***************************************************************/
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
struct ZSTD_DCtx_s struct ZSTD_DCtx_s
@ -131,6 +132,7 @@ struct ZSTD_DCtx_s
ZSTD_customMem customMem; ZSTD_customMem customMem;
size_t litBufSize; size_t litBufSize;
size_t litSize; size_t litSize;
size_t rleSize;
BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH]; BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH];
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
}; /* typedef'd to ZSTD_DCtx within "zstd_static.h" */ }; /* typedef'd to ZSTD_DCtx within "zstd_static.h" */
@ -318,6 +320,7 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t sr
typedef struct typedef struct
{ {
blockType_e blockType; blockType_e blockType;
U32 lastBlock;
U32 origSize; U32 origSize;
} blockProperties_t; } blockProperties_t;
@ -327,11 +330,12 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bp
{ {
if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
{ U32 const cBlockHeader = MEM_readLE24(src); { U32 const cBlockHeader = MEM_readLE24(src);
U32 const cSize = cBlockHeader >> 2; U32 const cSize = cBlockHeader >> 3;
bpPtr->blockType = (blockType_e)(cBlockHeader & 3); bpPtr->lastBlock = cBlockHeader & 1;
bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
bpPtr->origSize = cSize; /* only useful for RLE */ bpPtr->origSize = cSize; /* only useful for RLE */
if (bpPtr->blockType == bt_end) return 0;
if (bpPtr->blockType == bt_rle) return 1; if (bpPtr->blockType == bt_rle) return 1;
if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected);
return cSize; return cSize;
} }
} }
@ -345,6 +349,14 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src,
} }
static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, size_t regenSize)
{
if (srcSize != 1) return ERROR(srcSize_wrong);
if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall);
memset(dst, *(const BYTE*)src, regenSize);
return regenSize;
}
/*! ZSTD_decodeLiteralsBlock() : /*! ZSTD_decodeLiteralsBlock() :
@return : nb of bytes read from src (< srcSize ) */ @return : nb of bytes read from src (< srcSize ) */
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
@ -889,29 +901,29 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
case bt_rle : case bt_rle :
decodedSize = ZSTD_generateNxBytes(op, oend-op, *ip, blockProperties.origSize); decodedSize = ZSTD_generateNxBytes(op, oend-op, *ip, blockProperties.origSize);
break; break;
case bt_end : case bt_reserved :
/* end of frame */
if (remainingSize) return ERROR(srcSize_wrong);
if (dctx->fParams.checksumFlag) {
U64 const h64 = XXH64_digest(&dctx->xxhState);
U32 const h32 = (U32)(h64>>11) & ((1<<22)-1);
U32 const check32 = MEM_readLE24(src) >> 2;
if (check32 != h32) return ERROR(checksum_wrong);
}
decodedSize = 0;
break;
default: default:
return ERROR(GENERIC); /* impossible */ return ERROR(corruption_detected);
} }
if (blockProperties.blockType == bt_end) break; /* bt_end */
if (ZSTD_isError(decodedSize)) return decodedSize; if (ZSTD_isError(decodedSize)) return decodedSize;
if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, op, decodedSize); if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, op, decodedSize);
op += decodedSize; op += decodedSize;
ip += cBlockSize; ip += cBlockSize;
remainingSize -= cBlockSize; remainingSize -= cBlockSize;
if (blockProperties.lastBlock) break;
} }
if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
U32 checkRead;
if (remainingSize<4) return ERROR(checksum_wrong);
checkRead = MEM_readLE32(ip);
if (checkRead != checkCalc) return ERROR(checksum_wrong);
remainingSize -= 4;
}
if (remainingSize) return ERROR(srcSize_wrong);
return op-ostart; return op-ostart;
} }
@ -1022,22 +1034,29 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
{ blockProperties_t bp; { blockProperties_t bp;
size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
if (ZSTD_isError(cBlockSize)) return cBlockSize; if (ZSTD_isError(cBlockSize)) return cBlockSize;
if (bp.blockType == bt_end) { dctx->expected = cBlockSize;
dctx->bType = bp.blockType;
dctx->rleSize = bp.origSize;
if (cBlockSize) {
dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
return 0;
}
/* empty block */
if (bp.lastBlock) {
if (dctx->fParams.checksumFlag) { if (dctx->fParams.checksumFlag) {
U64 const h64 = XXH64_digest(&dctx->xxhState); dctx->expected = 4;
U32 const h32 = (U32)(h64>>11) & ((1<<22)-1); dctx->stage = ZSTDds_checkChecksum;
U32 const check32 = MEM_readLE24(src) >> 2; } else {
if (check32 != h32) return ERROR(checksum_wrong); dctx->expected = 0; /* end of frame */
dctx->stage = ZSTDds_getFrameHeaderSize;
} }
dctx->expected = 0;
dctx->stage = ZSTDds_getFrameHeaderSize;
} else { } else {
dctx->expected = cBlockSize; dctx->expected = 3; /* go directly to next header */
dctx->bType = bp.blockType; dctx->stage = ZSTDds_decodeBlockHeader;
dctx->stage = ZSTDds_decompressBlock;
} }
return 0; return 0;
} }
case ZSTDds_decompressLastBlock:
case ZSTDds_decompressBlock: case ZSTDds_decompressBlock:
{ size_t rSize; { size_t rSize;
switch(dctx->bType) switch(dctx->bType)
@ -1049,21 +1068,37 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
break; break;
case bt_rle : case bt_rle :
return ERROR(GENERIC); /* not yet handled */ rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize);
break;
case bt_end : /* should never happen (filtered at phase 1) */
rSize = 0;
break; break;
case bt_reserved : /* should never happen */
default: default:
return ERROR(GENERIC); /* impossible */ return ERROR(corruption_detected);
} }
dctx->stage = ZSTDds_decodeBlockHeader;
dctx->expected = ZSTD_blockHeaderSize;
dctx->previousDstEnd = (char*)dst + rSize;
if (ZSTD_isError(rSize)) return rSize; if (ZSTD_isError(rSize)) return rSize;
if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
if (dctx->fParams.checksumFlag) { /* another round for frame checksum */
dctx->expected = 0;
dctx->stage = ZSTDds_checkChecksum;
}
dctx->expected = 0; /* ends here */
dctx->stage = ZSTDds_getFrameHeaderSize;
} else {
dctx->stage = ZSTDds_decodeBlockHeader;
dctx->expected = ZSTD_blockHeaderSize;
dctx->previousDstEnd = (char*)dst + rSize;
}
return rSize; return rSize;
} }
case ZSTDds_checkChecksum:
{ U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
U32 const check32 = MEM_readLE32(src); /* srcSize == 4, guaranteed by dctx->expected */
if (check32 != h32) return ERROR(checksum_wrong);
dctx->expected = 0;
dctx->stage = ZSTDds_getFrameHeaderSize;
return 0;
}
case ZSTDds_decodeSkippableHeader: case ZSTDds_decodeSkippableHeader:
{ memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_min, src, dctx->expected); { memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_min, src, dctx->expected);
dctx->expected = MEM_readLE32(dctx->headerBuffer + 4); dctx->expected = MEM_readLE32(dctx->headerBuffer + 4);

View File

@ -145,8 +145,8 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(4, "OK \n"); DISPLAYLEVEL(4, "OK \n");
DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, (U32)CNBuffSize); DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, (U32)CNBuffSize);
CHECKPLUS( r , ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize), { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize);
if (r != CNBuffSize) goto _output_error); if (r != CNBuffSize) goto _output_error; }
DISPLAYLEVEL(4, "OK \n"); DISPLAYLEVEL(4, "OK \n");
DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++); DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);

View File

@ -100,9 +100,9 @@ General Structure of Zstandard Frame format
------------------------------------------- -------------------------------------------
The structure of a single Zstandard frame is following: The structure of a single Zstandard frame is following:
| `Magic_Number` | `Frame_Header` |`Data_Block`| [More data blocks] |`End_Marker`| | `Magic_Number` | `Frame_Header` |`Data_Block`| [More data blocks] | [`Content_Checksum`] |
|:--------------:|:--------------:|:----------:| ------------------ |:----------:| |:--------------:|:--------------:|:----------:| ------------------ |:--------------------:|
| 4 bytes | 2-14 bytes | n bytes | | 3 bytes | | 4 bytes | 2-14 bytes | n bytes | | 0-4 bytes |
__`Magic_Number`__ __`Magic_Number`__
@ -118,27 +118,13 @@ __`Data_Block`__
Detailed in [next chapter](#the-structure-of-data_block). Detailed in [next chapter](#the-structure-of-data_block).
Thats where compressed data is stored. Thats where compressed data is stored.
__`End_Marker`__ __`Content_Checksum`__
The flow of blocks ends when the last block header brings an _end signal_. An optional 32-bit checksum, only present if `Content_Checksum_flag` is set.
This last block header may optionally host a `Content_Checksum`.
##### __`Content_Checksum`__
`Content_Checksum` allow to verify that frame content has been regenerated correctly.
The content checksum is the result The content checksum is the result
of [xxh64() hash function](https://www.xxHash.com) of [xxh64() hash function](https://www.xxHash.com)
digesting the original (decoded) data as input, and a seed of zero. digesting the original (decoded) data as input, and a seed of zero.
Bits from 11 to 32 (included) are extracted to form a 22 bits checksum The low 4 bytes of the checksum are stored in little endian format.
stored within `End_Marker`.
```
mask22bits = (1<<22)-1;
contentChecksum = (XXH64(content, size, 0) >> 11) & mask22bits;
```
`Content_Checksum` is only present when its associated flag
is set in the frame descriptor.
Its usage is optional.
The structure of `Frame_Header` The structure of `Frame_Header`
@ -172,23 +158,25 @@ __`Frame_Content_Size_flag`__
This is a 2-bits flag (`= Frame_Header_Descriptor >> 6`), This is a 2-bits flag (`= Frame_Header_Descriptor >> 6`),
specifying if decompressed data size is provided within the header. specifying if decompressed data size is provided within the header.
The `Value` can be converted to `Field_Size` that is number of bytes used by `Frame_Content_Size` according to the following table: The `Flag_Value` can be converted into `Field_Size`,
which is the number of bytes used by `Frame_Content_Size`
according to the following table:
| `Value` | 0 | 1 | 2 | 3 | |`Flag_Value`| 0 | 1 | 2 | 3 |
| ---------- | --- | --- | --- | --- | | ---------- | --- | --- | --- | --- |
|`Field_Size`| 0-1 | 2 | 4 | 8 | |`Field_Size`| 0-1 | 2 | 4 | 8 |
The meaning of `Value` equal to `0` depends on `Single_Segment_flag` : When `Flag_Value` is `0`, `Field_Size` depends on `Single_Segment_flag` :
it either means `0` (size not provided) _if_ the `Window_Descriptor` byte is present, if `Single_Segment_flag` is set, `Field_Size` is 1.
or `1` (frame content size <= 255 bytes) otherwise. Otherwise, `Field_Size` is 0 (content size not provided).
__`Single_Segment_flag`__ __`Single_Segment_flag`__
If this flag is set, If this flag is set,
data shall be regenerated within a single continuous memory segment. data must be regenerated within a single continuous memory segment.
In this case, `Window_Descriptor` byte __is not present__, In this case, `Frame_Content_Size` is necessarily present,
but `Frame_Content_Size_flag` field necessarily is. but `Window_Descriptor` byte is skipped.
As a consequence, the decoder must allocate a memory segment As a consequence, the decoder must allocate a memory segment
of size equal or bigger than `Frame_Content_Size`. of size equal or bigger than `Frame_Content_Size`.
@ -205,7 +193,7 @@ depending on local limitations.
__`Unused_bit`__ __`Unused_bit`__
The value of this bit should be set to zero. The value of this bit should be set to zero.
A decoder compliant with this specification version should not interpret it. A decoder compliant with this specification version shall not interpret it.
It might be used in a future version, It might be used in a future version,
to signal a property which is not mandatory to properly decode the frame. to signal a property which is not mandatory to properly decode the frame.
@ -215,13 +203,12 @@ This bit is reserved for some future feature.
Its value _must be zero_. Its value _must be zero_.
A decoder compliant with this specification version must ensure it is not set. A decoder compliant with this specification version must ensure it is not set.
This bit may be used in a future revision, This bit may be used in a future revision,
to signal a feature that must be interpreted in order to decode the frame. to signal a feature that must be interpreted to decode the frame correctly.
__`Content_Checksum_flag`__ __`Content_Checksum_flag`__
If this flag is set, a content checksum will be present within `End_Marker`. If this flag is set, a 32-bits `Content_Checksum` will be present at frame's end.
The checksum is a 22 bits value extracted from the XXH64() of data, See `Content_Checksum` paragraph.
and stored within `End_Marker`. See [`Content_Checksum`](#content_checksum) .
__`Dictionary_ID_flag`__ __`Dictionary_ID_flag`__
@ -236,10 +223,10 @@ It also specifies the size of this field.
### `Window_Descriptor` ### `Window_Descriptor`
Provides guarantees on maximum back-reference distance Provides guarantees on maximum back-reference distance
that will be present within compressed data. that will be used within compressed data.
This information is useful for decoders to allocate enough memory. This information is important for decoders to allocate enough memory.
The `Window_Descriptor` byte is optional. It should be absent if `Single_Segment_flag` is set. The `Window_Descriptor` byte is optional. It is absent when `Single_Segment_flag` is set.
In this case, the maximum back-reference distance is the content size itself, In this case, the maximum back-reference distance is the content size itself,
which can be any value from 1 to 2^64-1 bytes (16 EB). which can be any value from 1 to 2^64-1 bytes (16 EB).
@ -265,8 +252,8 @@ a decoder can refuse a compressed frame
which requests a memory size beyond decoder's authorized range. which requests a memory size beyond decoder's authorized range.
For improved interoperability, For improved interoperability,
decoders are recommended to be compatible with window sizes of 8 MB. decoders are recommended to be compatible with window sizes of 8 MB,
Encoders are recommended to not request more than 8 MB. and encoders are recommended to not request more than 8 MB.
It's merely a recommendation though, It's merely a recommendation though,
decoders are free to support larger or lower limits, decoders are free to support larger or lower limits,
depending on local limitations. depending on local limitations.
@ -313,30 +300,34 @@ When `Field_Size` is 1, 4 or 8 bytes, the value is read directly.
When `Field_Size` is 2, _the offset of 256 is added_. When `Field_Size` is 2, _the offset of 256 is added_.
It's allowed to represent a small size (for example `18`) using any compatible variant. It's allowed to represent a small size (for example `18`) using any compatible variant.
In order to preserve decoder from unreasonable memory requirement,
a decoder can refuse a compressed frame
which requests a memory size beyond decoder's authorized range.
The structure of `Data_Block` The structure of `Data_Block`
----------------------------- -----------------------------
The structure of `Data_Block` is following: The structure of `Data_Block` is following:
| `Block_Type` | `Block_Size` | `Block_Content` | | `Last_Block` | `Block_Type` | `Block_Size` | `Block_Content` |
|:------------:|:------------:|:---------------:| |:------------:|:------------:|:------------:|:---------------:|
| 2 bits | 22 bits | n bytes | | 1 bit | 2 bits | 21 bits | n bytes |
The block header uses 3-bytes.
__`Last_Block`__
The lowest bit signals if this block is the last one.
Frame ends right after this block.
It may be followed by an optional `Content_Checksum` .
__`Block_Type` and `Block_Size`__ __`Block_Type` and `Block_Size`__
The block header uses 3-bytes, format is __little-endian__. The next 2 bits represent the `Block_Type`,
The 2 highest bits represent the `Block_Type`, while the remaining 21 bits represent the `Block_Size`.
while the remaining 22 bits represent the (compressed) `Block_Size`. Format is __little-endian__.
There are 4 block types : There are 4 block types :
| Value | 0 | 1 | 2 | 3 | | Value | 0 | 1 | 2 | 3 |
| ------------ | ----------- | ----------- | ------------------ | --------- | | ------------ | ----------- | ----------- | ------------------ | --------- |
| `Block_Type` | `Raw_Block` | `RLE_Block` | `Compressed_Block` | `EndMark` | | `Block_Type` | `Raw_Block` | `RLE_Block` | `Compressed_Block` | `Reserved`|
- `Raw_Block` - this is an uncompressed block. - `Raw_Block` - this is an uncompressed block.
`Block_Size` is the number of bytes to read and copy. `Block_Size` is the number of bytes to read and copy.
@ -348,9 +339,8 @@ There are 4 block types :
`Block_Size` is the compressed size. `Block_Size` is the compressed size.
Decompressed size is unknown, Decompressed size is unknown,
but its maximum possible value is guaranteed (see below) but its maximum possible value is guaranteed (see below)
- `EndMark` - this is not a block. It signals the end of the frame. - `Reserved` - this is not a block.
The rest of the field may be optionally filled by a checksum This value cannot be used with current version of this specification.
(see [`Content_Checksum`](#content_checksum)).
Block sizes must respect a few rules : Block sizes must respect a few rules :
- In compressed mode, compressed size if always strictly `< decompressed size`. - In compressed mode, compressed size if always strictly `< decompressed size`.