Add unit test, improve documentation

dev
senhuang42 2020-12-28 16:43:04 -05:00
parent ea52fc3606
commit 22b7bff2bc
3 changed files with 134 additions and 54 deletions

View File

@ -73,9 +73,9 @@
/***************************
* Multiple DDicts Hashset *
***************************/
/*************************************
* Multiple DDicts Hashset internals *
*************************************/
#define DDICT_HASHSET_MAX_LOAD_FACTOR 0.75
#define DDICT_HASHSET_TABLE_BASE_SIZE 64
@ -85,7 +85,7 @@
* Returns an index between [0, hashSet->ddictPtrTableSize]
*/
static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) {
U32 hash = XXH32(&dictID, sizeof(U32), 0);
U32 hash = XXH64(&dictID, sizeof(U32), 0);
/* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */
return hash & (hashSet->ddictPtrTableSize - 1);
}
@ -94,8 +94,8 @@ static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 d
* Returns 0 if successful (ddict may or may not have actually been added), or a zstd error code if something went wrong
*/
static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, U32 dictID) {
RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!");
size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!");
DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
while (hashSet->ddictPtrTable[idx] != NULL) {
/* Replace existing ddict if inserting ddict with same dictID */
@ -125,7 +125,7 @@ static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMe
const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem);
const ZSTD_DDict** oldTable = hashSet->ddictPtrTable;
size_t oldTableSize = hashSet->ddictPtrTableSize;
size_t i = 0;
size_t i;
DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize);
RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!");
@ -152,7 +152,6 @@ static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet,
size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]);
if (currDictID == dictID || currDictID == 0) {
/* currDictID == 0 implies a NULL ddict entry */
DEBUGLOG(4, "Dict ID not found");
break;
} else {
if (idx == hashSet->ddictPtrTableSize - 1) {
@ -172,6 +171,7 @@ static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet,
*/
static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem);
DEBUGLOG(4, "Allocating new hash set");
ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem);
ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
ret->ddictPtrCount = 0;
@ -182,10 +182,16 @@ static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
}
/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself.
* Note: The ZSTD_DDict* within the table are NOT freed.
*/
static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
if (hashSet->ddictPtrTable) ZSTD_customFree(hashSet->ddictPtrTable, customMem);
ZSTD_customFree(hashSet, customMem);
DEBUGLOG(4, "Freeing ddict hash set");
if (hashSet && hashSet->ddictPtrTable) {
ZSTD_customFree(hashSet->ddictPtrTable, customMem);
}
if (hashSet) {
ZSTD_customFree(hashSet, customMem);
}
}
/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set.
@ -229,6 +235,7 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
dctx->outBufferMode = ZSTD_bm_buffered;
dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
}
static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
@ -248,10 +255,8 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
dctx->noForwardProgress = 0;
dctx->oversizedDuration = 0;
dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
ZSTD_DCtx_resetParameters(dctx);
dctx->validateChecksum = 1;
dctx->refMultipleDDicts = 0;
dctx->ddictSet = NULL;
ZSTD_DCtx_resetParameters(dctx);
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
dctx->dictContentEndForFuzzing = NULL;
#endif
@ -324,14 +329,19 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */
}
/* Public function: Given a dctx with a digested frame params, re-selects the correct DDict based on
* the requested dict ID from the frame. ZSTD_d_refMultipleDDicts must be enabled for this function
* to be called.
/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on
* the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then
* accordingly sets the ddict to be used to decompress the frame.
*
* If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is.
*
* ZSTD_d_refMultipleDDicts must be enabled for this function to be called.
*/
static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) {
assert(dctx->refMultipleDDicts && dctx->ddictSet);
DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame");
{ const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID);
if (dctx->ddict) {
const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID);
if (frameDDict) {
DEBUGLOG(4, "DDict found!");
ZSTD_clearDict(dctx);
@ -602,7 +612,7 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
/* Reference DDict requested by frame if dctx references multiple ddicts */
if (dctx->refMultipleDDicts && dctx->ddictSet) {
if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) {
ZSTD_DCtx_selectFrameDDict(dctx);
}
@ -727,6 +737,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
return bound;
}
/*-*************************************************************
* Frame decoding
***************************************************************/
@ -1549,7 +1560,13 @@ size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
if (ddict) {
dctx->ddict = ddict;
dctx->dictUses = ZSTD_use_indefinitely;
if (dctx->refMultipleDDicts && dctx->ddictSet) {
if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) {
if (dctx->ddictSet == NULL) {
dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem);
if (!dctx->ddictSet) {
RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!");
}
}
assert(!dctx->staticSize); /* Impossible: ddictSet cannot have been allocated if static dctx */
FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict,
ZSTD_getDictID_fromDDict(ddict), dctx->customMem), "");
@ -1600,8 +1617,8 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
bounds.upperBound = (int)ZSTD_d_ignoreChecksum;
return bounds;
case ZSTD_d_refMultipleDDicts:
bounds.lowerBound = (int)ZSTD_d_refSingleDict;
bounds.upperBound = (int)ZSTD_d_refMultipleDicts;
bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
return bounds;
default:;
}
@ -1671,23 +1688,10 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
return 0;
case ZSTD_d_refMultipleDDicts:
CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value);
DEBUGLOG(4, "Referencing multiple ddicts param enabled");
if (dctx->staticSize != 0) {
RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!");
}
dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
if (dctx->refMultipleDDicts == ZSTD_d_refMultipleDicts && dctx->ddictSet == NULL) {
DEBUGLOG(4, "Allocating new hash set");
dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem);
if (!dctx->ddictSet) {
RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!");
}
} else {
if (dctx->ddictSet) {
ZSTD_freeDDictHashSet(dctx->ddictSet, dctx->customMem);
dctx->ddictSet = NULL;
}
}
return 0;
default:;
}

View File

@ -1001,6 +1001,13 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, s
/*! ZSTD_DCtx_refDDict() :
* Reference a prepared dictionary, to be used to decompress next frames.
* The dictionary remains active for decompression of future frames using same DCtx.
*
* If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function
* will store the DDict references in a table, and the DDict used for decompression
* will be determined at decompression time, as per the dict ID in the frame.
* The memory for the table is allocated on the first call to refDDict, and can be
* freed with ZSTD_freeDCtx().
*
* @result : 0, or an error code (which can be tested with ZSTD_isError()).
* Note 1 : Currently, only one dictionary can be managed.
* Referencing a new dictionary effectively "discards" any previous one.
@ -1209,8 +1216,8 @@ typedef enum {
typedef enum {
/* Note: this enum controls ZSTD_d_refMultipleDDicts */
ZSTD_d_refSingleDict = 0,
ZSTD_d_refMultipleDicts = 1,
ZSTD_rmd_refSingleDDict = 0,
ZSTD_rmd_refMultipleDDicts = 1
} ZSTD_refMultipleDDicts_e;
typedef enum {
@ -2014,13 +2021,20 @@ ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param
*
* If enabled and dctx is allocated on the heap, then additional memory will be allocated
* to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict()
* using a given ZSTD_DCtx, rather than overwriting the previous DCtx referenced, will
* store all references, and at decompression time, the appropriate dictID is selected
* from the set of DDicts based on the dictID in the frame.
* using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead
* store all references. At decompression time, the appropriate dictID is selected
* from the set of DDicts based on the dictID in the frame.
*
* WARNING: Enabling this parameter will trigger memory allocation for the hash table, and disabling
* this parameter will free the memory allocated for the hashtable. Memory is allocated as per
* ZSTD_DCtx::customMem.
* Usage is simply calling ZSTD_refDDict() on multiple dict buffers.
*
* Param has values of byte ZSTD_refMultipleDDicts_e
*
* WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory
* allocation for the hash table. ZSTD_freeDCtx() also frees this memory.
* Memory is allocated as per ZSTD_DCtx::customMem.
*
* Although this function allocates memory for the table, the user is still responsible for
* memory management of the underlying ZSTD_DDict* themselves.
*/
#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4

View File

@ -1770,6 +1770,19 @@ static int basicUnitTests(U32 const seed, double compressibility)
size_t dictSize;
U32 dictID;
size_t dictHeaderSize;
size_t dictBufferFixedSize = 144;
unsigned char const dictBufferFixed[144] = {0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f,
0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01,
0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18,
0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c,
0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04,
0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61,
0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65,
0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69};
if (dictBuffer==NULL || samplesSizes==NULL) {
free(dictBuffer);
@ -1865,19 +1878,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize);
DISPLAYLEVEL(3, "test%3i : check dict header size correctness : ", testNb++);
{ unsigned char const dictBufferFixed[144] = { 0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f,
0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01,
0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18,
0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c,
0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04,
0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61,
0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65,
0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69 };
dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, 144);
{ dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, dictBufferFixedSize);
if (dictHeaderSize != 115) goto _output_error;
}
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize);
@ -2331,6 +2332,67 @@ static int basicUnitTests(U32 const seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with multiple ddicts : ", testNb++);
{
const size_t numDicts = 128;
const size_t numFrames = 4;
size_t i;
ZSTD_DCtx* dctx = ZSTD_createDCtx();
ZSTD_DDict** ddictTable = (ZSTD_DDict**)malloc(sizeof(ZSTD_DDict*)*numDicts);
ZSTD_CDict** cdictTable = (ZSTD_CDict**)malloc(sizeof(ZSTD_CDict*)*numDicts);
U32 dictIDSeed = seed;
/* Create new compressed buffer that will hold frames with differing dictIDs */
char* dictBufferMulti = (char*)malloc(sizeof(char) * dictBufferFixedSize); /* Modifiable copy of fixed full dict buffer */
ZSTD_memcpy(dictBufferMulti, dictBufferFixed, dictBufferFixedSize);
cSize = 0;
/* Create a bunch of DDicts with random dict IDs */
for (i = 0; i < numDicts; ++i) {
U32 currDictID = FUZ_rand(&dictIDSeed);
MEM_writeLE32(dictBufferMulti+ZSTD_FRAMEIDSIZE, currDictID);
ddictTable[i] = ZSTD_createDDict(dictBufferMulti, dictBufferFixedSize);
cdictTable[i] = ZSTD_createCDict(dictBufferMulti, dictBufferFixedSize, 3);
if (!ddictTable[i] || !cdictTable[i] || ZSTD_getDictID_fromCDict(cdictTable[i]) != ZSTD_getDictID_fromDDict(ddictTable[i])) {
goto _output_error;
}
}
/* Compress a few frames using random CDicts */
{
size_t off = 0;
/* only use the first half so we don't push against size limit of compressedBuffer */
size_t const segSize = (CNBuffSize / 2) / numFrames;
for (i = 0; i < numFrames; i++) {
size_t dictIdx = FUZ_rand(&dictIDSeed) % numDicts;
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
{ CHECK_NEWV(r, ZSTD_compress_usingCDict(cctx,
(BYTE*)compressedBuffer + off, CNBuffSize - off,
(BYTE*)CNBuffer + segSize * (size_t)i, segSize,
cdictTable[dictIdx]));
off += r;
}
}
cSize = off;
}
/* We should succeed to decompression even though different dicts were used on different frames */
ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
ZSTD_DCtx_setParameter(dctx, ZSTD_d_refMultipleDDicts, ZSTD_rmd_refMultipleDDicts);
/* Reference every single ddict we made */
for (i = 0; i < numDicts; ++i) {
CHECK_Z( ZSTD_DCtx_refDDict(dctx, ddictTable[i]));
}
CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
ZSTD_freeDCtx(dctx);
for (i = 0; i < numDicts; ++i) {
ZSTD_freeCDict(cdictTable[i]);
ZSTD_freeDDict(ddictTable[i]);
}
free(dictBufferMulti);
free(ddictTable);
free(cdictTable);
}
DISPLAYLEVEL(3, "OK \n");
ZSTD_freeCCtx(cctx);
free(dictBuffer);
free(samplesSizes);