added ZSTD_refDDict()
Now DDict does no longer depends on DCtx duplication
This commit is contained in:
parent
d73eebc00f
commit
bd7fa21deb
@ -179,6 +179,8 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
|
|||||||
memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need to copy workspace */
|
memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need to copy workspace */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
/* deprecated */
|
||||||
static void ZSTD_refDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
|
static void ZSTD_refDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
|
||||||
{
|
{
|
||||||
ZSTD_decompressBegin(dstDCtx); /* init */
|
ZSTD_decompressBegin(dstDCtx); /* init */
|
||||||
@ -199,6 +201,9 @@ static void ZSTD_refDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
|
|||||||
dstDCtx->entropy.rep[2] = srcDCtx->entropy.rep[2];
|
dstDCtx->entropy.rep[2] = srcDCtx->entropy.rep[2];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void ZSTD_refDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddict);
|
||||||
|
|
||||||
|
|
||||||
/*-*************************************************************
|
/*-*************************************************************
|
||||||
@ -1575,7 +1580,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
|||||||
void* dst, size_t dstCapacity,
|
void* dst, size_t dstCapacity,
|
||||||
const void* src, size_t srcSize,
|
const void* src, size_t srcSize,
|
||||||
const void *dict, size_t dictSize,
|
const void *dict, size_t dictSize,
|
||||||
const ZSTD_DCtx* refContext)
|
const ZSTD_DDict* ddict)
|
||||||
{
|
{
|
||||||
void* const dststart = dst;
|
void* const dststart = dst;
|
||||||
while (srcSize >= ZSTD_frameHeaderSize_prefix) {
|
while (srcSize >= ZSTD_frameHeaderSize_prefix) {
|
||||||
@ -1619,9 +1624,9 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (refContext) {
|
if (ddict) {
|
||||||
/* we were called from ZSTD_decompress_usingDDict */
|
/* we were called from ZSTD_decompress_usingDDict */
|
||||||
ZSTD_refDCtx(dctx, refContext);
|
ZSTD_refDDict(dctx, ddict);
|
||||||
} else {
|
} else {
|
||||||
/* this will initialize correctly with no dict if dict == NULL, so
|
/* this will initialize correctly with no dict if dict == NULL, so
|
||||||
* use this in all cases but ddict */
|
* use this in all cases but ddict */
|
||||||
@ -1881,9 +1886,10 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyTables_t* entropy, const void* const
|
|||||||
|
|
||||||
if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
|
if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
|
||||||
{ int i;
|
{ int i;
|
||||||
|
size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
|
||||||
for (i=0; i<3; i++) {
|
for (i=0; i<3; i++) {
|
||||||
U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
|
U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
|
||||||
if (rep==0 || rep >= dictSize) return ERROR(dictionary_corrupted);
|
if (rep==0 || rep >= dictContentSize) return ERROR(dictionary_corrupted);
|
||||||
entropy->rep[i] = rep;
|
entropy->rep[i] = rep;
|
||||||
} }
|
} }
|
||||||
|
|
||||||
@ -1926,8 +1932,51 @@ struct ZSTD_DDict_s {
|
|||||||
const void* dictContent;
|
const void* dictContent;
|
||||||
size_t dictSize;
|
size_t dictSize;
|
||||||
ZSTD_DCtx* refContext;
|
ZSTD_DCtx* refContext;
|
||||||
|
U32 dictID;
|
||||||
|
U32 entropyPresent;
|
||||||
}; /* typedef'd to ZSTD_DDict within "zstd.h" */
|
}; /* typedef'd to ZSTD_DDict within "zstd.h" */
|
||||||
|
|
||||||
|
static void ZSTD_refDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddict)
|
||||||
|
{
|
||||||
|
ZSTD_decompressBegin(dstDCtx); /* init */
|
||||||
|
if (ddict) { /* support refDDict on NULL */
|
||||||
|
dstDCtx->dictID = ddict->dictID;
|
||||||
|
dstDCtx->base = ddict->dictContent;
|
||||||
|
dstDCtx->vBase = ddict->dictContent;
|
||||||
|
dstDCtx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
|
||||||
|
dstDCtx->previousDstEnd = dstDCtx->dictEnd;
|
||||||
|
if (ddict->entropyPresent) {
|
||||||
|
dstDCtx->litEntropy = 1;
|
||||||
|
dstDCtx->fseEntropy = 1;
|
||||||
|
dstDCtx->LLTptr = ddict->refContext->entropy.LLTable;
|
||||||
|
dstDCtx->MLTptr = ddict->refContext->entropy.MLTable;
|
||||||
|
dstDCtx->OFTptr = ddict->refContext->entropy.OFTable;
|
||||||
|
dstDCtx->HUFptr = ddict->refContext->entropy.hufTable;
|
||||||
|
dstDCtx->entropy.rep[0] = ddict->refContext->entropy.rep[0];
|
||||||
|
dstDCtx->entropy.rep[1] = ddict->refContext->entropy.rep[1];
|
||||||
|
dstDCtx->entropy.rep[2] = ddict->refContext->entropy.rep[2];
|
||||||
|
} else {
|
||||||
|
dstDCtx->litEntropy = 0;
|
||||||
|
dstDCtx->fseEntropy = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict)
|
||||||
|
{
|
||||||
|
ddict->entropyPresent = 0;
|
||||||
|
if (ddict->dictSize < 8) return 0;
|
||||||
|
{ U32 const magic = MEM_readLE32(ddict->dictContent);
|
||||||
|
if (magic != ZSTD_DICT_MAGIC) return 0; /* pure content mode */
|
||||||
|
}
|
||||||
|
ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + 4);
|
||||||
|
|
||||||
|
/* load entropy tables */
|
||||||
|
CHECK_E( ZSTD_loadEntropy(&ddict->refContext->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted );
|
||||||
|
ddict->entropyPresent = 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem)
|
ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem)
|
||||||
{
|
{
|
||||||
if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
|
if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
|
||||||
@ -1953,22 +2002,22 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, unsigne
|
|||||||
ddict->dictBuffer = internalBuffer;
|
ddict->dictBuffer = internalBuffer;
|
||||||
ddict->dictContent = internalBuffer;
|
ddict->dictContent = internalBuffer;
|
||||||
}
|
}
|
||||||
|
ddict->dictSize = dictSize;
|
||||||
/* parse dictionary content */
|
/* parse dictionary content */
|
||||||
{ //size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, ddict->dictContent, dictSize);
|
{ size_t const errorCode = ZSTD_loadEntropy_inDDict(ddict);
|
||||||
size_t const errorCode = ZSTD_decompress_insertDictionary(dctx, ddict->dictContent, dictSize);
|
|
||||||
if (ZSTD_isError(errorCode)) {
|
if (ZSTD_isError(errorCode)) {
|
||||||
ZSTD_freeDDict(ddict);
|
ZSTD_freeDDict(ddict);
|
||||||
return NULL;
|
return NULL;
|
||||||
} }
|
} }
|
||||||
|
|
||||||
ddict->dictSize = dictSize;
|
|
||||||
return ddict;
|
return ddict;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*! ZSTD_createDDict() :
|
/*! ZSTD_createDDict() :
|
||||||
* Create a digested dictionary, ready to start decompression without startup delay.
|
* Create a digested dictionary, to start decompression without startup delay.
|
||||||
* `dict` can be released after `ZSTD_DDict` creation */
|
* `dict` content is copied inside DDict.
|
||||||
|
* Consequently, `dict` can be released after `ZSTD_DDict` creation */
|
||||||
ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
|
ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
|
||||||
{
|
{
|
||||||
ZSTD_customMem const allocator = { NULL, NULL, NULL };
|
ZSTD_customMem const allocator = { NULL, NULL, NULL };
|
||||||
@ -1977,9 +2026,9 @@ ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
|
|||||||
|
|
||||||
|
|
||||||
/*! ZSTD_createDDict_byReference() :
|
/*! ZSTD_createDDict_byReference() :
|
||||||
* Create a digested dictionary, ready to start decompression operation without startup delay.
|
* Create a digested dictionary, to start decompression without startup delay.
|
||||||
* Dictionary content is simply referenced, and therefore stays in dictBuffer.
|
* Dictionary content is simply referenced, it will be accessed during decompression.
|
||||||
* It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */
|
* Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
|
||||||
ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
|
ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
|
||||||
{
|
{
|
||||||
ZSTD_customMem const allocator = { NULL, NULL, NULL };
|
ZSTD_customMem const allocator = { NULL, NULL, NULL };
|
||||||
@ -2055,7 +2104,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
|
|||||||
/* pass content and size in case legacy frames are encountered */
|
/* pass content and size in case legacy frames are encountered */
|
||||||
return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize,
|
return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize,
|
||||||
ddict->dictContent, ddict->dictSize,
|
ddict->dictContent, ddict->dictSize,
|
||||||
ddict->refContext);
|
ddict);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2256,9 +2305,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
|||||||
} }
|
} }
|
||||||
|
|
||||||
/* Consume header */
|
/* Consume header */
|
||||||
{ const ZSTD_DCtx* refContext = zds->ddict ? zds->ddict->refContext : NULL;
|
ZSTD_refDDict(zds->dctx, zds->ddict);
|
||||||
ZSTD_refDCtx(zds->dctx, refContext);
|
|
||||||
}
|
|
||||||
{ size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); /* == ZSTD_frameHeaderSize_prefix */
|
{ size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); /* == ZSTD_frameHeaderSize_prefix */
|
||||||
CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size));
|
CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size));
|
||||||
{ size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx);
|
{ size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx);
|
||||||
|
@ -257,7 +257,7 @@ static int basicUnitTests(U32 seed, double compressibility)
|
|||||||
DISPLAYLEVEL(4, "OK \n");
|
DISPLAYLEVEL(4, "OK \n");
|
||||||
|
|
||||||
DISPLAYLEVEL(4, "test%3i : decompress with DDict : ", testNb++);
|
DISPLAYLEVEL(4, "test%3i : decompress with DDict : ", testNb++);
|
||||||
{ ZSTD_DDict* const ddict = ZSTD_createDDict(CNBuffer, dictSize);
|
{ ZSTD_DDict* const ddict = ZSTD_createDDict_byReference(CNBuffer, dictSize);
|
||||||
size_t const r = ZSTD_decompress_usingDDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, ddict);
|
size_t const r = ZSTD_decompress_usingDDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, ddict);
|
||||||
if (r != CNBuffSize - dictSize) goto _output_error;
|
if (r != CNBuffSize - dictSize) goto _output_error;
|
||||||
DISPLAYLEVEL(4, "OK (size of DDict : %u) \n", (U32)ZSTD_sizeof_DDict(ddict));
|
DISPLAYLEVEL(4, "OK (size of DDict : %u) \n", (U32)ZSTD_sizeof_DDict(ddict));
|
||||||
|
@ -209,18 +209,19 @@ $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
|
|||||||
|
|
||||||
$ECHO "\n**** dictionary tests **** "
|
$ECHO "\n**** dictionary tests **** "
|
||||||
|
|
||||||
TESTFILE=../programs/zstdcli.c
|
$ECHO "- test with raw dict (content only) "
|
||||||
./datagen > tmpDict
|
./datagen > tmpDict
|
||||||
./datagen -g1M | $MD5SUM > tmp1
|
./datagen -g1M | $MD5SUM > tmp1
|
||||||
./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dvq | $MD5SUM > tmp2
|
./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dvq | $MD5SUM > tmp2
|
||||||
$DIFF -q tmp1 tmp2
|
$DIFF -q tmp1 tmp2
|
||||||
$ECHO "- Create first dictionary"
|
$ECHO "- Create first dictionary "
|
||||||
|
TESTFILE=../programs/zstdcli.c
|
||||||
$ZSTD --train *.c ../programs/*.c -o tmpDict
|
$ZSTD --train *.c ../programs/*.c -o tmpDict
|
||||||
cp $TESTFILE tmp
|
cp $TESTFILE tmp
|
||||||
$ZSTD -f tmp -D tmpDict
|
$ZSTD -f tmp -D tmpDict
|
||||||
$ZSTD -d tmp.zst -D tmpDict -fo result
|
$ZSTD -d tmp.zst -D tmpDict -fo result
|
||||||
$DIFF $TESTFILE result
|
$DIFF $TESTFILE result
|
||||||
$ECHO "- Create second (different) dictionary"
|
$ECHO "- Create second (different) dictionary "
|
||||||
$ZSTD --train *.c ../programs/*.c ../programs/*.h -o tmpDictC
|
$ZSTD --train *.c ../programs/*.c ../programs/*.h -o tmpDictC
|
||||||
$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
|
$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
|
||||||
$ECHO "- Create dictionary with short dictID"
|
$ECHO "- Create dictionary with short dictID"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user