added : frame content checksum

dev
Yann Collet 2016-05-31 18:13:56 +02:00
parent e3f4e6cbda
commit f2a3b6e7b4
7 changed files with 91 additions and 66 deletions

4
.gitignore vendored
View File

@ -32,6 +32,4 @@ projects/VS2015
_codelite/
_zstdbench/
zlib_wrapper/
# CMake
contrib/cmake/
.clang_complete

View File

@ -86,8 +86,9 @@ typedef struct {
} ZSTD_compressionParameters;
typedef struct {
U32 contentSizeFlag; /* 1: content size will be in frame header (if known). */
U32 noDictIDFlag; /* 1: no dict ID will be saved into frame header (if dictionary compression) */
U32 contentSizeFlag; /* 1: content size will be in frame header (if known). */
U32 checksumFlag; /* 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */
U32 noDictIDFlag; /* 1: no dict ID will be saved into frame header (if dictionary compression) */
} ZSTD_frameParameters;
typedef struct {
@ -196,6 +197,7 @@ typedef struct {
U64 frameContentSize;
U32 windowLog;
U32 dictID;
U32 checksumFlag;
} ZSTD_frameParams;
#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */

View File

@ -51,9 +51,11 @@
/*-*************************************
* Dependencies
***************************************/
#include <stdlib.h> /* malloc */
#include <string.h> /* memset */
#include <stdlib.h> /* malloc */
#include <string.h> /* memset */
#include "mem.h"
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
#include "xxhash.h" /* XXH_reset, update, digest */
#include "fse_static.h"
#include "huf_static.h"
#include "zstd_internal.h"
@ -104,6 +106,7 @@ struct ZSTD_CCtx_s
void* workSpace;
size_t workSpaceSize;
size_t blockSize;
XXH64_state_t xxhState;
ZSTD_allocFunction customAlloc;
ZSTD_freeFunction customFree;
@ -266,6 +269,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
} }
if (reset) memset(zc->workSpace, 0, tableSpace ); /* reset only tables */
XXH64_reset(&zc->xxhState, 0);
zc->hashTable3 = (U32*)(zc->workSpace);
zc->hashTable = zc->hashTable3 + h3Size;
zc->chainTable = zc->hashTable + hSize;
@ -1938,7 +1942,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
/* catch up */
if (offset) {
U32 matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
@ -2043,19 +2047,22 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCa
static size_t ZSTD_compress_generic (ZSTD_CCtx* zc,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
{
size_t blockSize = zc->blockSize;
size_t blockSize = cctx->blockSize;
size_t remaining = srcSize;
const BYTE* ip = (const BYTE*)src;
BYTE* const ostart = (BYTE*)dst;
BYTE* op = ostart;
const U32 maxDist = 1 << zc->params.cParams.windowLog;
ZSTD_stats_t* stats = &zc->seqStore.stats;
const U32 maxDist = 1 << cctx->params.cParams.windowLog;
ZSTD_stats_t* stats = &cctx->seqStore.stats;
ZSTD_statsInit(stats);
if (cctx->params.fParams.checksumFlag)
XXH64_update(&cctx->xxhState, src, srcSize);
while (remaining) {
size_t cSize;
ZSTD_statsResetFreqs(stats);
@ -2063,14 +2070,14 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc,
if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
if (remaining < blockSize) blockSize = remaining;
if ((U32)(ip+blockSize - zc->base) > zc->loadedDictEnd + maxDist) {
if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) {
/* enforce maxDist */
U32 const newLowLimit = (U32)(ip+blockSize - zc->base) - maxDist;
if (zc->lowLimit < newLowLimit) zc->lowLimit = newLowLimit;
if (zc->dictLimit < zc->lowLimit) zc->dictLimit = zc->lowLimit;
U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist;
if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit;
if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit;
}
cSize = ZSTD_compressBlock_internal(zc, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize);
cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize);
if (ZSTD_isError(cSize)) return cSize;
if (cSize == 0) { /* block is not compressible */
@ -2090,7 +2097,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* zc,
op += cSize;
}
ZSTD_statsPrint(stats, zc->params.cParams.searchLength);
ZSTD_statsPrint(stats, cctx->params.cParams.searchLength);
return op-ostart;
}
@ -2104,7 +2111,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
BYTE const fAllocByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) /* windowLog : 4 KB - 128 MB */
| (fcsId << 6) );
U32 const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
BYTE const fCheckByte = (BYTE)(dictIDSizeCode&3);
BYTE const fCheckByte = (BYTE)((dictIDSizeCode&3) + (params.fParams.checksumFlag<<4));
size_t pos;
if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
@ -2261,40 +2268,45 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
{
/* note : magic number already checked */
size_t offcodeHeaderSize, matchlengthHeaderSize, litlengthHeaderSize, errorCode;
short offcodeNCount[MaxOff+1];
unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog;
short matchlengthNCount[MaxML+1];
unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
short litlengthNCount[MaxLL+1];
unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
size_t const dictSizeStart = dictSize;
size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize);
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
zc->flagStaticTables = 1;
dict = (const char*)dict + hufHeaderSize;
dictSize -= hufHeaderSize;
{ size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize);
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
zc->flagStaticTables = 1;
dict = (const char*)dict + hufHeaderSize;
dictSize -= hufHeaderSize;
}
offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog);
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted);
dict = (const char*)dict + offcodeHeaderSize;
dictSize -= offcodeHeaderSize;
{ short offcodeNCount[MaxOff+1];
unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog;
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
{ size_t const errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog);
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
dict = (const char*)dict + offcodeHeaderSize;
dictSize -= offcodeHeaderSize;
}
matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted);
dict = (const char*)dict + matchlengthHeaderSize;
dictSize -= matchlengthHeaderSize;
{ short matchlengthNCount[MaxML+1];
unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
{ size_t const errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
dict = (const char*)dict + matchlengthHeaderSize;
dictSize -= matchlengthHeaderSize;
}
litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog);
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted);
{ short litlengthNCount[MaxLL+1];
unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
{ size_t const errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog);
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
dictSize -= litlengthHeaderSize;
}
return hufHeaderSize + offcodeHeaderSize + matchlengthHeaderSize + litlengthHeaderSize;
return (dictSizeStart-dictSize);
}
/** ZSTD_compress_insertDictionary() :
@ -2366,30 +2378,34 @@ size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel)
/*! ZSTD_compressEnd() :
* Write frame epilogue.
* @return : nb of bytes written into dst (or an error code) */
size_t ZSTD_compressEnd(ZSTD_CCtx* zc, void* dst, size_t dstCapacity)
size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
{
BYTE* op = (BYTE*)dst;
size_t fhSize = 0;
/* not even init ! */
if (zc->stage==0) return ERROR(stage_wrong);
if (cctx->stage==0) return ERROR(stage_wrong);
/* special case : empty frame */
if (zc->stage==1) {
fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, zc->params, 0, 0);
if (cctx->stage==1) {
fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0);
if (ZSTD_isError(fhSize)) return fhSize;
dstCapacity -= fhSize;
op += fhSize;
zc->stage = 2;
cctx->stage = 2;
}
/* frame epilogue */
if (dstCapacity < 3) return ERROR(dstSize_tooSmall);
op[0] = (BYTE)(bt_end << 6);
op[1] = 0;
op[2] = 0;
{ U32 const checksum = cctx->params.fParams.checksumFlag ?
(U32)((XXH64_digest(&cctx->xxhState) >> 11) & ((1<<22)-1)) :
0;
op[0] = (BYTE)((bt_end<<6) + (checksum>>16));
op[1] = (BYTE)(checksum>>8);
op[2] = (BYTE)checksum;
}
zc->stage = 0; /* return to "created by not init" status */
cctx->stage = 0; /* return to "created but not init" status */
return 3+fhSize;
}

View File

@ -57,6 +57,8 @@
#include <string.h> /* memcpy, memmove */
#include <stdio.h> /* debug only : printf */
#include "mem.h" /* low level memory routines */
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
#include "xxhash.h" /* XXH64_* */
#include "zstd_internal.h"
#include "fse_static.h"
#include "huf_static.h"
@ -116,6 +118,7 @@ struct ZSTD_DCtx_s
size_t expected;
size_t headerSize;
ZSTD_frameParams fParams;
XXH64_state_t xxhState;
ZSTD_allocFunction customAlloc;
ZSTD_freeFunction customFree;
blockType_t bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
@ -339,6 +342,8 @@ size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t
U32 const dictIDSizeCode = checkByte&3;
fparamsPtr->windowLog = (allocByte & 0xF) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
if ((allocByte & 0x30) != 0) return ERROR(frameParameter_unsupported); /* reserved bits */
if ((checkByte & 0xEC) != 0) return ERROR(frameParameter_unsupported); /* reserved bits */
fparamsPtr->checksumFlag = checkByte & 0x10;
switch(dictIDSizeCode) /* fcsId */
{
default: /* impossible */
@ -367,6 +372,7 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t sr
size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, srcSize);
if ((MEM_32bits()) && (dctx->fParams.windowLog > 25)) return ERROR(frameParameter_unsupportedBy32bits);
if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) return ERROR(dictionary_wrong);
if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
return result;
}
@ -1021,6 +1027,9 @@ size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx)
return dctx->expected;
}
/** ZSTD_decompressContinue() :
* @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
* or an error code, which can be tested using ZSTD_isError() */
size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
/* Sanity check */

View File

@ -1,3 +0,0 @@
-I../lib/common
-I../lib/legacy
-I./legacy

1
programs/.gitignore vendored
View File

@ -40,6 +40,7 @@ grillResults.txt
_*
tmp*
*.zst
result
# fuzzer
afl

View File

@ -216,7 +216,7 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(4, "test%3i : check content size on duplicated context : ", testNb++);
{ size_t const testSize = CNBuffSize / 3;
{ ZSTD_compressionParameters const cPar = ZSTD_getCParams(2, testSize, dictSize);
ZSTD_frameParameters const fPar = { 1 , 0 };
ZSTD_frameParameters const fPar = { 1 , 0 , 0 };
ZSTD_parameters p;
p.cParams = cPar; p.fParams = fPar;
CHECK( ZSTD_compressBegin_advanced(ctxOrig, CNBuffer, dictSize, p, testSize-1) );
@ -276,7 +276,7 @@ static int basicUnitTests(U32 seed, double compressibility)
DISPLAYLEVEL(4, "OK \n");
DISPLAYLEVEL(4, "test%3i : compress without dictID : ", testNb++);
{ ZSTD_frameParameters const fParams = { 0, 1 /*NoDictID*/ };
{ ZSTD_frameParameters const fParams = { 0 /*contentSize*/, 0 /*checksum*/, 1 /*NoDictID*/ };
ZSTD_compressionParameters const cParams = ZSTD_getCParams(3, CNBuffSize, dictSize);
ZSTD_parameters p;
p.cParams = cParams; p.fParams = fParams;
@ -639,12 +639,14 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
dictSize = FUZ_randomLength(&lseed, maxSampleLog); /* needed also for decompression */
dict = srcBuffer + (FUZ_rand(&lseed) % (srcBufferSize - dictSize));
if (FUZ_rand(&lseed) & 15) {
if (FUZ_rand(&lseed) & 0xF) {
size_t const errorCode = ZSTD_compressBegin_usingDict(refCtx, dict, dictSize, cLevel);
CHECK (ZSTD_isError(errorCode), "ZSTD_compressBegin_usingDict error : %s", ZSTD_getErrorName(errorCode));
} else {
ZSTD_compressionParameters const cPar = ZSTD_getCParams(cLevel, 0, dictSize);
ZSTD_frameParameters const fpar = { FUZ_rand(&lseed)&1, FUZ_rand(&lseed)&1 }; /* note : since dictionary is fake, dictIDflag has no impact */
ZSTD_frameParameters const fpar = { FUZ_rand(&lseed)&1 /* contentSizeFlag */,
!(FUZ_rand(&lseed)&3) /* contentChecksumFlag*/,
0 /*NodictID*/ }; /* note : since dictionary is fake, dictIDflag has no impact */
ZSTD_parameters p;
size_t errorCode;
p.cParams = cPar; p.fParams = fpar;