Merge pull request #573 from facebook/ddict

Improved DDict memory usage
dev
Yann Collet 2017-02-27 20:54:42 -08:00 committed by GitHub
commit a81c2e7e44
6 changed files with 151 additions and 92 deletions

View File

@ -59,7 +59,6 @@
****************************************************************/
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memcpy, memset */
#include <stdio.h> /* printf (debug) */
#include "bitstream.h"
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"

View File

@ -459,7 +459,7 @@ size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize)
void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr;
HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compilation fails here, assertion is false */
HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
/* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */

View File

@ -87,22 +87,26 @@ typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
typedef struct {
FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
U32 rep[ZSTD_REP_NUM];
} ZSTD_entropyTables_t;
struct ZSTD_DCtx_s
{
const FSE_DTable* LLTptr;
const FSE_DTable* MLTptr;
const FSE_DTable* OFTptr;
const HUF_DTable* HUFptr;
FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
const void* previousDstEnd;
const void* base;
const void* vBase;
const void* dictEnd;
ZSTD_entropyTables_t entropy;
const void* previousDstEnd; /* detect continuity */
const void* base; /* start of current segment */
const void* vBase; /* virtual start of previous segment if it was just before current one */
const void* dictEnd; /* end of previous segment */
size_t expected;
U32 rep[ZSTD_REP_NUM];
ZSTD_frameParams fParams;
blockType_e bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
ZSTD_dStage stage;
@ -131,15 +135,15 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
dctx->base = NULL;
dctx->vBase = NULL;
dctx->dictEnd = NULL;
dctx->hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
dctx->litEntropy = dctx->fseEntropy = 0;
dctx->dictID = 0;
MEM_STATIC_ASSERT(sizeof(dctx->rep) == sizeof(repStartValue));
memcpy(dctx->rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
dctx->LLTptr = dctx->LLTable;
dctx->MLTptr = dctx->MLTable;
dctx->OFTptr = dctx->OFTable;
dctx->HUFptr = dctx->hufTable;
MEM_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
dctx->LLTptr = dctx->entropy.LLTable;
dctx->MLTptr = dctx->entropy.MLTable;
dctx->OFTptr = dctx->entropy.OFTable;
dctx->HUFptr = dctx->entropy.hufTable;
return 0;
}
@ -175,6 +179,8 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need to copy workspace */
}
#if 0
/* deprecated */
static void ZSTD_refDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
{
ZSTD_decompressBegin(dstDCtx); /* init */
@ -186,15 +192,18 @@ static void ZSTD_refDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
dstDCtx->dictID = srcDCtx->dictID;
dstDCtx->litEntropy = srcDCtx->litEntropy;
dstDCtx->fseEntropy = srcDCtx->fseEntropy;
dstDCtx->LLTptr = srcDCtx->LLTable;
dstDCtx->MLTptr = srcDCtx->MLTable;
dstDCtx->OFTptr = srcDCtx->OFTable;
dstDCtx->HUFptr = srcDCtx->hufTable;
dstDCtx->rep[0] = srcDCtx->rep[0];
dstDCtx->rep[1] = srcDCtx->rep[1];
dstDCtx->rep[2] = srcDCtx->rep[2];
dstDCtx->LLTptr = srcDCtx->entropy.LLTable;
dstDCtx->MLTptr = srcDCtx->entropy.MLTable;
dstDCtx->OFTptr = srcDCtx->entropy.OFTable;
dstDCtx->HUFptr = srcDCtx->entropy.hufTable;
dstDCtx->entropy.rep[0] = srcDCtx->entropy.rep[0];
dstDCtx->entropy.rep[1] = srcDCtx->entropy.rep[1];
dstDCtx->entropy.rep[2] = srcDCtx->entropy.rep[2];
}
}
#endif
static void ZSTD_refDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddict);
/*-*************************************************************
@ -506,14 +515,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) :
HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) ) :
( singleStream ?
HUF_decompress1X2_DCtx(dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) :
HUF_decompress4X_hufOnly (dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize)) ))
HUF_decompress1X2_DCtx(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) :
HUF_decompress4X_hufOnly (dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize)) ))
return ERROR(corruption_detected);
dctx->litPtr = dctx->litBuffer;
dctx->litSize = litSize;
dctx->litEntropy = 1;
if (litEncType==set_compressed) dctx->HUFptr = dctx->hufTable;
if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
return litCSize + lhSize;
}
@ -830,19 +839,19 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
ip++;
/* Build DTables */
{ size_t const llhSize = ZSTD_buildSeqTable(dctx->LLTable, &dctx->LLTptr,
{ size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
LLtype, MaxLL, LLFSELog,
ip, iend-ip, LL_defaultDTable, dctx->fseEntropy);
if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
ip += llhSize;
}
{ size_t const ofhSize = ZSTD_buildSeqTable(dctx->OFTable, &dctx->OFTptr,
{ size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
OFtype, MaxOff, OffFSELog,
ip, iend-ip, OF_defaultDTable, dctx->fseEntropy);
if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
ip += ofhSize;
}
{ size_t const mlhSize = ZSTD_buildSeqTable(dctx->MLTable, &dctx->MLTptr,
{ size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
MLtype, MaxML, MLFSELog,
ip, iend-ip, ML_defaultDTable, dctx->fseEntropy);
if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
@ -999,9 +1008,9 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState)
FORCE_INLINE
size_t ZSTD_execSequence(BYTE* op,
BYTE* const oend, seq_t sequence,
const BYTE** litPtr, const BYTE* const litLimit,
const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
BYTE* const oend, seq_t sequence,
const BYTE** litPtr, const BYTE* const litLimit,
const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
{
BYTE* const oLitEnd = op + sequence.litLength;
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
@ -1104,7 +1113,7 @@ static size_t ZSTD_decompressSequences(
if (nbSeq) {
seqState_t seqState;
dctx->fseEntropy = 1;
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->rep[i]; }
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
@ -1121,7 +1130,7 @@ static size_t ZSTD_decompressSequences(
/* check if reached exact end */
if (nbSeq) return ERROR(corruption_detected);
/* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->rep[i] = (U32)(seqState.prevOffset[i]); }
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
}
/* last literal segment */
@ -1330,7 +1339,7 @@ static size_t ZSTD_decompressSequencesLong(
seqState_t seqState;
int seqNb;
dctx->fseEntropy = 1;
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->rep[i]; }
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
seqState.base = base;
seqState.pos = (size_t)(op-base);
seqState.gotoDict = (iPtrDiff)(dictEnd - base);
@ -1365,7 +1374,7 @@ static size_t ZSTD_decompressSequencesLong(
}
/* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->rep[i] = (U32)(seqState.prevOffset[i]); }
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
}
/* last literal segment */
@ -1571,7 +1580,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const void *dict, size_t dictSize,
const ZSTD_DCtx* refContext)
const ZSTD_DDict* ddict)
{
void* const dststart = dst;
while (srcSize >= ZSTD_frameHeaderSize_prefix) {
@ -1579,8 +1588,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (ZSTD_isLegacy(src, srcSize)) {
size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
size_t decodedSize;
size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
if (ZSTD_isError(frameSize)) return frameSize;
decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
@ -1615,9 +1624,9 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
}
}
if (refContext) {
if (ddict) {
/* we were called from ZSTD_decompress_usingDDict */
ZSTD_refDCtx(dctx, refContext);
ZSTD_refDDict(dctx, ddict);
} else {
/* this will initialize correctly with no dict if dict == NULL, so
* use this in all cases but ddict */
@ -1625,8 +1634,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
}
ZSTD_checkContinuity(dctx, dst);
{
const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
{ const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
&src, &srcSize);
if (ZSTD_isError(res)) return res;
/* don't need to bounds check this, ZSTD_decompressFrame will have
@ -1636,9 +1644,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
}
}
if (srcSize) {
return ERROR(srcSize_wrong);
}
if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */
return (BYTE*)dst - (BYTE*)dststart;
}
@ -1835,22 +1841,29 @@ static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dict
return 0;
}
static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t const dictSize)
/* ZSTD_loadEntropy() :
* dict : must point at beginning of a valid zstd dictionary
* @return : size of entropy tables read */
static size_t ZSTD_loadEntropy(ZSTD_entropyTables_t* entropy, const void* const dict, size_t const dictSize)
{
const BYTE* dictPtr = (const BYTE*)dict;
const BYTE* const dictEnd = dictPtr + dictSize;
{ size_t const hSize = HUF_readDTableX4(dctx->hufTable, dict, dictSize);
if (dictSize <= 8) return ERROR(dictionary_corrupted);
dictPtr += 8; /* skip header = magic + dictID */
{ size_t const hSize = HUF_readDTableX4(entropy->hufTable, dictPtr, dictEnd-dictPtr);
if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
dictPtr += hSize;
}
{ short offcodeNCount[MaxOff+1];
U32 offcodeMaxValue=MaxOff, offcodeLog;
U32 offcodeMaxValue = MaxOff, offcodeLog;
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
CHECK_E(FSE_buildDTable(dctx->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
CHECK_E(FSE_buildDTable(entropy->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
dictPtr += offcodeHeaderSize;
}
@ -1859,7 +1872,7 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t c
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
CHECK_E(FSE_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
CHECK_E(FSE_buildDTable(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
dictPtr += matchlengthHeaderSize;
}
@ -1868,17 +1881,19 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t c
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
CHECK_E(FSE_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
CHECK_E(FSE_buildDTable(entropy->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
dictPtr += litlengthHeaderSize;
}
if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] == 0 || dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] == 0 || dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] == 0 || dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
dictPtr += 12;
{ int i;
size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
for (i=0; i<3; i++) {
U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
if (rep==0 || rep >= dictContentSize) return ERROR(dictionary_corrupted);
entropy->rep[i] = rep;
} }
dctx->litEntropy = dctx->fseEntropy = 1;
return dictPtr - (const BYTE*)dict;
}
@ -1892,13 +1907,12 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict
dctx->dictID = MEM_readLE32((const char*)dict + 4);
/* load entropy tables */
dict = (const char*)dict + 8;
dictSize -= 8;
{ size_t const eSize = ZSTD_loadEntropy(dctx, dict, dictSize);
{ size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize);
if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted);
dict = (const char*)dict + eSize;
dictSize -= eSize;
}
dctx->litEntropy = dctx->fseEntropy = 1;
/* reference dictionary content */
return ZSTD_refDictContent(dctx, dict, dictSize);
@ -1918,50 +1932,90 @@ struct ZSTD_DDict_s {
void* dictBuffer;
const void* dictContent;
size_t dictSize;
ZSTD_DCtx* refContext;
ZSTD_entropyTables_t entropy;
U32 dictID;
U32 entropyPresent;
ZSTD_customMem cMem;
}; /* typedef'd to ZSTD_DDict within "zstd.h" */
static void ZSTD_refDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddict)
{
ZSTD_decompressBegin(dstDCtx); /* init */
if (ddict) { /* support refDDict on NULL */
dstDCtx->dictID = ddict->dictID;
dstDCtx->base = ddict->dictContent;
dstDCtx->vBase = ddict->dictContent;
dstDCtx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
dstDCtx->previousDstEnd = dstDCtx->dictEnd;
if (ddict->entropyPresent) {
dstDCtx->litEntropy = 1;
dstDCtx->fseEntropy = 1;
dstDCtx->LLTptr = ddict->entropy.LLTable;
dstDCtx->MLTptr = ddict->entropy.MLTable;
dstDCtx->OFTptr = ddict->entropy.OFTable;
dstDCtx->HUFptr = ddict->entropy.hufTable;
dstDCtx->entropy.rep[0] = ddict->entropy.rep[0];
dstDCtx->entropy.rep[1] = ddict->entropy.rep[1];
dstDCtx->entropy.rep[2] = ddict->entropy.rep[2];
} else {
dstDCtx->litEntropy = 0;
dstDCtx->fseEntropy = 0;
}
}
}
static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict)
{
ddict->entropyPresent = 0;
if (ddict->dictSize < 8) return 0;
{ U32 const magic = MEM_readLE32(ddict->dictContent);
if (magic != ZSTD_DICT_MAGIC) return 0; /* pure content mode */
}
ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + 4);
/* load entropy tables */
CHECK_E( ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted );
ddict->entropyPresent = 1;
return 0;
}
ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem)
{
if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
if (!customMem.customAlloc || !customMem.customFree) return NULL;
{ ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
ZSTD_DCtx* const dctx = ZSTD_createDCtx_advanced(customMem);
if (!ddict || !dctx) {
ZSTD_free(ddict, customMem);
ZSTD_free(dctx, customMem);
return NULL;
}
if (!ddict) return NULL;
ddict->cMem = customMem;
if ((byReference) || (!dict) || (!dictSize)) {
ddict->dictBuffer = NULL;
ddict->dictContent = dict;
} else {
void* const internalBuffer = ZSTD_malloc(dictSize, customMem);
if (!internalBuffer) { ZSTD_free(dctx, customMem); ZSTD_free(ddict, customMem); return NULL; }
if (!internalBuffer) { ZSTD_freeDDict(ddict); return NULL; }
memcpy(internalBuffer, dict, dictSize);
ddict->dictBuffer = internalBuffer;
ddict->dictContent = internalBuffer;
}
{ size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, ddict->dictContent, dictSize);
ddict->dictSize = dictSize;
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
/* parse dictionary content */
{ size_t const errorCode = ZSTD_loadEntropy_inDDict(ddict);
if (ZSTD_isError(errorCode)) {
ZSTD_free(ddict->dictBuffer, customMem);
ZSTD_free(ddict, customMem);
ZSTD_free(dctx, customMem);
ZSTD_freeDDict(ddict);
return NULL;
} }
ddict->dictSize = dictSize;
ddict->refContext = dctx;
return ddict;
}
}
/*! ZSTD_createDDict() :
* Create a digested dictionary, ready to start decompression without startup delay.
* `dict` can be released after `ZSTD_DDict` creation */
* Create a digested dictionary, to start decompression without startup delay.
* `dict` content is copied inside DDict.
* Consequently, `dict` can be released after `ZSTD_DDict` creation */
ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
{
ZSTD_customMem const allocator = { NULL, NULL, NULL };
@ -1970,9 +2024,9 @@ ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
/*! ZSTD_createDDict_byReference() :
* Create a digested dictionary, ready to start decompression operation without startup delay.
* Dictionary content is simply referenced, and therefore stays in dictBuffer.
* It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */
* Create a digested dictionary, to start decompression without startup delay.
* Dictionary content is simply referenced, it will be accessed during decompression.
* Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
{
ZSTD_customMem const allocator = { NULL, NULL, NULL };
@ -1983,8 +2037,7 @@ ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize
size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
{
if (ddict==NULL) return 0; /* support free on NULL */
{ ZSTD_customMem const cMem = ddict->refContext->customMem;
ZSTD_freeDCtx(ddict->refContext);
{ ZSTD_customMem const cMem = ddict->cMem;
ZSTD_free(ddict->dictBuffer, cMem);
ZSTD_free(ddict, cMem);
return 0;
@ -1994,7 +2047,7 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
{
if (ddict==NULL) return 0; /* support sizeof on NULL */
return sizeof(*ddict) + ZSTD_sizeof_DCtx(ddict->refContext) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
}
/*! ZSTD_getDictID_fromDict() :
@ -2048,7 +2101,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
/* pass content and size in case legacy frames are encountered */
return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize,
ddict->dictContent, ddict->dictSize,
ddict->refContext);
ddict);
}
@ -2249,9 +2302,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
} }
/* Consume header */
{ const ZSTD_DCtx* refContext = zds->ddict ? zds->ddict->refContext : NULL;
ZSTD_refDCtx(zds->dctx, refContext);
}
ZSTD_refDDict(zds->dctx, zds->ddict);
{ size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); /* == ZSTD_frameHeaderSize_prefix */
CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size));
{ size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx);

View File

@ -909,7 +909,7 @@ static int FIO_decompressSrcFile(dRess_t ress, const char* dstFileName, const ch
}
srcFile = FIO_openSrcFile(srcFileName);
if (srcFile==0) return 1;
if (srcFile==NULL) return 1;
/* for each frame */
for ( ; ; ) {

View File

@ -256,6 +256,14 @@ static int basicUnitTests(U32 seed, double compressibility)
if (r != CNBuffSize - dictSize) goto _output_error);
DISPLAYLEVEL(4, "OK \n");
DISPLAYLEVEL(4, "test%3i : decompress with DDict : ", testNb++);
{ ZSTD_DDict* const ddict = ZSTD_createDDict_byReference(CNBuffer, dictSize);
size_t const r = ZSTD_decompress_usingDDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, ddict);
if (r != CNBuffSize - dictSize) goto _output_error;
DISPLAYLEVEL(4, "OK (size of DDict : %u) \n", (U32)ZSTD_sizeof_DDict(ddict));
ZSTD_freeDDict(ddict);
}
DISPLAYLEVEL(4, "test%3i : check content size on duplicated context : ", testNb++);
{ size_t const testSize = CNBuffSize / 3;
{ ZSTD_parameters p = ZSTD_getParams(2, testSize, dictSize);

View File

@ -211,18 +211,19 @@ $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
$ECHO "\n**** dictionary tests **** "
TESTFILE=../programs/zstdcli.c
$ECHO "- test with raw dict (content only) "
./datagen > tmpDict
./datagen -g1M | $MD5SUM > tmp1
./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dvq | $MD5SUM > tmp2
$DIFF -q tmp1 tmp2
$ECHO "- Create first dictionary"
$ECHO "- Create first dictionary "
TESTFILE=../programs/zstdcli.c
$ZSTD --train *.c ../programs/*.c -o tmpDict
cp $TESTFILE tmp
$ZSTD -f tmp -D tmpDict
$ZSTD -d tmp.zst -D tmpDict -fo result
$DIFF $TESTFILE result
$ECHO "- Create second (different) dictionary"
$ECHO "- Create second (different) dictionary "
$ZSTD --train *.c ../programs/*.c ../programs/*.h -o tmpDictC
$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
$ECHO "- Create dictionary with short dictID"