Merge pull request #2446 from senhuang42/multiple_ddicts_v3
[RFC] Support references to multiple DDictsdev
commit
69085db61c
|
@ -576,7 +576,7 @@ class Freestanding(object):
|
|||
)
|
||||
if self._xxh64_prefix is not None:
|
||||
replacements.append(
|
||||
(re.compile(r"([^\w]|^)(?P<orig>XXH64)_"), self._xxh64_prefix)
|
||||
(re.compile(r"([^\w]|^)(?P<orig>XXH64)[\(_]"), self._xxh64_prefix)
|
||||
)
|
||||
for filepath in self._dst_lib_file_paths():
|
||||
file = FileLines(filepath)
|
||||
|
|
|
@ -62,6 +62,7 @@
|
|||
#include "../common/fse.h"
|
||||
#define HUF_STATIC_LINKING_ONLY
|
||||
#include "../common/huf.h"
|
||||
#include "../common/xxhash.h" /* XXH64_reset, XXH64_update, XXH64_digest, XXH64 */
|
||||
#include "../common/zstd_internal.h" /* blockProperties_t */
|
||||
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
|
||||
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
|
||||
|
@ -72,6 +73,144 @@
|
|||
#endif
|
||||
|
||||
|
||||
|
||||
/*************************************
|
||||
* Multiple DDicts Hashset internals *
|
||||
*************************************/
|
||||
|
||||
#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
|
||||
#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
|
||||
* Currently, that means a 0.75 load factor.
|
||||
* So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
|
||||
* the load factor of the ddict hash set.
|
||||
*/
|
||||
|
||||
#define DDICT_HASHSET_TABLE_BASE_SIZE 64
|
||||
#define DDICT_HASHSET_RESIZE_FACTOR 2
|
||||
|
||||
/* Hash function to determine starting position of dict insertion within the table
|
||||
* Returns an index between [0, hashSet->ddictPtrTableSize]
|
||||
*/
|
||||
static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) {
|
||||
const U64 hash = XXH64(&dictID, sizeof(U32), 0);
|
||||
/* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */
|
||||
return hash & (hashSet->ddictPtrTableSize - 1);
|
||||
}
|
||||
|
||||
/* Adds DDict to a hashset without resizing it.
|
||||
* If inserting a DDict with a dictID that already exists in the set, replaces the one in the set.
|
||||
* Returns 0 if successful, or a zstd error code if something went wrong.
|
||||
*/
|
||||
static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) {
|
||||
const U32 dictID = ZSTD_getDictID_fromDDict(ddict);
|
||||
size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
|
||||
const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
|
||||
RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!");
|
||||
DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
|
||||
while (hashSet->ddictPtrTable[idx] != NULL) {
|
||||
/* Replace existing ddict if inserting ddict with same dictID */
|
||||
if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) {
|
||||
DEBUGLOG(4, "DictID already exists, replacing rather than adding");
|
||||
hashSet->ddictPtrTable[idx] = ddict;
|
||||
return 0;
|
||||
}
|
||||
idx &= idxRangeMask;
|
||||
idx++;
|
||||
}
|
||||
DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
|
||||
hashSet->ddictPtrTable[idx] = ddict;
|
||||
hashSet->ddictPtrCount++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and
|
||||
* rehashes all values, allocates new table, frees old table.
|
||||
* Returns 0 on success, otherwise a zstd error code.
|
||||
*/
|
||||
static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
|
||||
size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR;
|
||||
const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem);
|
||||
const ZSTD_DDict** oldTable = hashSet->ddictPtrTable;
|
||||
size_t oldTableSize = hashSet->ddictPtrTableSize;
|
||||
size_t i;
|
||||
|
||||
DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize);
|
||||
RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!");
|
||||
hashSet->ddictPtrTable = newTable;
|
||||
hashSet->ddictPtrTableSize = newTableSize;
|
||||
hashSet->ddictPtrCount = 0;
|
||||
for (i = 0; i < oldTableSize; ++i) {
|
||||
if (oldTable[i] != NULL) {
|
||||
FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), "");
|
||||
}
|
||||
}
|
||||
ZSTD_customFree((void*)oldTable, customMem);
|
||||
DEBUGLOG(4, "Finished re-hash");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Fetches a DDict with the given dictID
|
||||
* Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL.
|
||||
*/
|
||||
static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) {
|
||||
size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
|
||||
const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
|
||||
DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
|
||||
for (;;) {
|
||||
size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]);
|
||||
if (currDictID == dictID || currDictID == 0) {
|
||||
/* currDictID == 0 implies a NULL ddict entry */
|
||||
break;
|
||||
} else {
|
||||
idx &= idxRangeMask; /* Goes to start of table when we reach the end */
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
|
||||
return hashSet->ddictPtrTable[idx];
|
||||
}
|
||||
|
||||
/* Allocates space for and returns a ddict hash set
|
||||
* The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with.
|
||||
* Returns NULL if allocation failed.
|
||||
*/
|
||||
static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
|
||||
ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem);
|
||||
DEBUGLOG(4, "Allocating new hash set");
|
||||
ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem);
|
||||
ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
|
||||
ret->ddictPtrCount = 0;
|
||||
if (!ret || !ret->ddictPtrTable) {
|
||||
return NULL;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself.
|
||||
* Note: The ZSTD_DDict* within the table are NOT freed.
|
||||
*/
|
||||
static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
|
||||
DEBUGLOG(4, "Freeing ddict hash set");
|
||||
if (hashSet && hashSet->ddictPtrTable) {
|
||||
ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem);
|
||||
}
|
||||
if (hashSet) {
|
||||
ZSTD_customFree(hashSet, customMem);
|
||||
}
|
||||
}
|
||||
|
||||
/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set.
|
||||
* Returns 0 on success, or a ZSTD error.
|
||||
*/
|
||||
static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) {
|
||||
DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize);
|
||||
if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) {
|
||||
FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), "");
|
||||
}
|
||||
FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), "");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*-*************************************************************
|
||||
* Context management
|
||||
***************************************************************/
|
||||
|
@ -101,6 +240,7 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
|
|||
dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
|
||||
dctx->outBufferMode = ZSTD_bm_buffered;
|
||||
dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
|
||||
dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
|
||||
}
|
||||
|
||||
static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
|
||||
|
@ -120,8 +260,8 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
|
|||
dctx->noForwardProgress = 0;
|
||||
dctx->oversizedDuration = 0;
|
||||
dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
|
||||
dctx->ddictSet = NULL;
|
||||
ZSTD_DCtx_resetParameters(dctx);
|
||||
dctx->validateChecksum = 1;
|
||||
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
dctx->dictContentEndForFuzzing = NULL;
|
||||
#endif
|
||||
|
@ -178,6 +318,10 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
|
|||
if (dctx->legacyContext)
|
||||
ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion);
|
||||
#endif
|
||||
if (dctx->ddictSet) {
|
||||
ZSTD_freeDDictHashSet(dctx->ddictSet, cMem);
|
||||
dctx->ddictSet = NULL;
|
||||
}
|
||||
ZSTD_customFree(dctx, cMem);
|
||||
return 0;
|
||||
}
|
||||
|
@ -190,6 +334,29 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
|
|||
ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */
|
||||
}
|
||||
|
||||
/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on
|
||||
* the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then
|
||||
* accordingly sets the ddict to be used to decompress the frame.
|
||||
*
|
||||
* If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is.
|
||||
*
|
||||
* ZSTD_d_refMultipleDDicts must be enabled for this function to be called.
|
||||
*/
|
||||
static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) {
|
||||
assert(dctx->refMultipleDDicts && dctx->ddictSet);
|
||||
DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame");
|
||||
if (dctx->ddict) {
|
||||
const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID);
|
||||
if (frameDDict) {
|
||||
DEBUGLOG(4, "DDict found!");
|
||||
ZSTD_clearDict(dctx);
|
||||
dctx->dictID = dctx->fParams.dictID;
|
||||
dctx->ddict = frameDDict;
|
||||
dctx->dictUses = ZSTD_use_indefinitely;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************************************
|
||||
* Frame header decoding
|
||||
|
@ -441,12 +608,19 @@ unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
|
|||
|
||||
/** ZSTD_decodeFrameHeader() :
|
||||
* `headerSize` must be the size provided by ZSTD_frameHeaderSize().
|
||||
* If multiple DDict references are enabled, also will choose the correct DDict to use.
|
||||
* @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
|
||||
static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
|
||||
{
|
||||
size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
|
||||
if (ZSTD_isError(result)) return result; /* invalid header */
|
||||
RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
|
||||
|
||||
/* Reference DDict requested by frame if dctx references multiple ddicts */
|
||||
if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) {
|
||||
ZSTD_DCtx_selectFrameDDict(dctx);
|
||||
}
|
||||
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
/* Skip the dictID check in fuzzing mode, because it makes the search
|
||||
* harder.
|
||||
|
@ -1391,6 +1565,16 @@ size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
|
|||
if (ddict) {
|
||||
dctx->ddict = ddict;
|
||||
dctx->dictUses = ZSTD_use_indefinitely;
|
||||
if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) {
|
||||
if (dctx->ddictSet == NULL) {
|
||||
dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem);
|
||||
if (!dctx->ddictSet) {
|
||||
RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!");
|
||||
}
|
||||
}
|
||||
assert(!dctx->staticSize); /* Impossible: ddictSet cannot have been allocated if static dctx */
|
||||
FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), "");
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -1436,6 +1620,10 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
|
|||
bounds.lowerBound = (int)ZSTD_d_validateChecksum;
|
||||
bounds.upperBound = (int)ZSTD_d_ignoreChecksum;
|
||||
return bounds;
|
||||
case ZSTD_d_refMultipleDDicts:
|
||||
bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
|
||||
bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
|
||||
return bounds;
|
||||
default:;
|
||||
}
|
||||
bounds.error = ERROR(parameter_unsupported);
|
||||
|
@ -1473,6 +1661,9 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value
|
|||
case ZSTD_d_forceIgnoreChecksum:
|
||||
*value = (int)dctx->forceIgnoreChecksum;
|
||||
return 0;
|
||||
case ZSTD_d_refMultipleDDicts:
|
||||
*value = (int)dctx->refMultipleDDicts;
|
||||
return 0;
|
||||
default:;
|
||||
}
|
||||
RETURN_ERROR(parameter_unsupported, "");
|
||||
|
@ -1499,6 +1690,13 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
|
|||
CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value);
|
||||
dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value;
|
||||
return 0;
|
||||
case ZSTD_d_refMultipleDDicts:
|
||||
CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value);
|
||||
if (dctx->staticSize != 0) {
|
||||
RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!");
|
||||
}
|
||||
dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
|
||||
return 0;
|
||||
default:;
|
||||
}
|
||||
RETURN_ERROR(parameter_unsupported, "");
|
||||
|
@ -1680,6 +1878,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
|||
} }
|
||||
#endif
|
||||
{ size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
|
||||
if (zds->refMultipleDDicts && zds->ddictSet) {
|
||||
ZSTD_DCtx_selectFrameDDict(zds);
|
||||
}
|
||||
DEBUGLOG(5, "header size : %u", (U32)hSize);
|
||||
if (ZSTD_isError(hSize)) {
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
||||
|
|
|
@ -99,6 +99,13 @@ typedef enum {
|
|||
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
||||
} ZSTD_dictUses_e;
|
||||
|
||||
/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
|
||||
typedef struct {
|
||||
const ZSTD_DDict** ddictPtrTable;
|
||||
size_t ddictPtrTableSize;
|
||||
size_t ddictPtrCount;
|
||||
} ZSTD_DDictHashSet;
|
||||
|
||||
struct ZSTD_DCtx_s
|
||||
{
|
||||
const ZSTD_seqSymbol* LLTptr;
|
||||
|
@ -136,6 +143,8 @@ struct ZSTD_DCtx_s
|
|||
U32 dictID;
|
||||
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
||||
ZSTD_dictUses_e dictUses;
|
||||
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
|
||||
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
|
||||
|
||||
/* streaming */
|
||||
ZSTD_dStreamStage streamStage;
|
||||
|
|
41
lib/zstd.h
41
lib/zstd.h
|
@ -546,12 +546,14 @@ typedef enum {
|
|||
* ZSTD_d_format
|
||||
* ZSTD_d_stableOutBuffer
|
||||
* ZSTD_d_forceIgnoreChecksum
|
||||
* ZSTD_d_refMultipleDDicts
|
||||
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
|
||||
* note : never ever use experimentalParam? names directly
|
||||
*/
|
||||
ZSTD_d_experimentalParam1=1000,
|
||||
ZSTD_d_experimentalParam2=1001,
|
||||
ZSTD_d_experimentalParam3=1002
|
||||
ZSTD_d_experimentalParam3=1002,
|
||||
ZSTD_d_experimentalParam4=1003
|
||||
|
||||
} ZSTD_dParameter;
|
||||
|
||||
|
@ -999,6 +1001,13 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, s
|
|||
/*! ZSTD_DCtx_refDDict() :
|
||||
* Reference a prepared dictionary, to be used to decompress next frames.
|
||||
* The dictionary remains active for decompression of future frames using same DCtx.
|
||||
*
|
||||
* If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function
|
||||
* will store the DDict references in a table, and the DDict used for decompression
|
||||
* will be determined at decompression time, as per the dict ID in the frame.
|
||||
* The memory for the table is allocated on the first call to refDDict, and can be
|
||||
* freed with ZSTD_freeDCtx().
|
||||
*
|
||||
* @result : 0, or an error code (which can be tested with ZSTD_isError()).
|
||||
* Note 1 : Currently, only one dictionary can be managed.
|
||||
* Referencing a new dictionary effectively "discards" any previous one.
|
||||
|
@ -1205,6 +1214,12 @@ typedef enum {
|
|||
ZSTD_d_ignoreChecksum = 1
|
||||
} ZSTD_forceIgnoreChecksum_e;
|
||||
|
||||
typedef enum {
|
||||
/* Note: this enum controls ZSTD_d_refMultipleDDicts */
|
||||
ZSTD_rmd_refSingleDDict = 0,
|
||||
ZSTD_rmd_refMultipleDDicts = 1
|
||||
} ZSTD_refMultipleDDicts_e;
|
||||
|
||||
typedef enum {
|
||||
/* Note: this enum and the behavior it controls are effectively internal
|
||||
* implementation details of the compressor. They are expected to continue
|
||||
|
@ -2003,6 +2018,30 @@ ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param
|
|||
*/
|
||||
#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3
|
||||
|
||||
/* ZSTD_d_refMultipleDDicts
|
||||
* Experimental parameter.
|
||||
* Default is 0 == disabled. Set to 1 to enable
|
||||
*
|
||||
* If enabled and dctx is allocated on the heap, then additional memory will be allocated
|
||||
* to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict()
|
||||
* using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead
|
||||
* store all references. At decompression time, the appropriate dictID is selected
|
||||
* from the set of DDicts based on the dictID in the frame.
|
||||
*
|
||||
* Usage is simply calling ZSTD_refDDict() on multiple dict buffers.
|
||||
*
|
||||
* Param has values of byte ZSTD_refMultipleDDicts_e
|
||||
*
|
||||
* WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory
|
||||
* allocation for the hash table. ZSTD_freeDCtx() also frees this memory.
|
||||
* Memory is allocated as per ZSTD_DCtx::customMem.
|
||||
*
|
||||
* Although this function allocates memory for the table, the user is still responsible for
|
||||
* memory management of the underlying ZSTD_DDict* themselves.
|
||||
*/
|
||||
#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4
|
||||
|
||||
|
||||
/*! ZSTD_DCtx_setFormat() :
|
||||
* Instruct the decoder context about what kind of data to decode next.
|
||||
* This instruction is mandatory to decode data without a fully-formed header,
|
||||
|
|
|
@ -1770,6 +1770,19 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
|||
size_t dictSize;
|
||||
U32 dictID;
|
||||
size_t dictHeaderSize;
|
||||
size_t dictBufferFixedSize = 144;
|
||||
unsigned char const dictBufferFixed[144] = {0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f,
|
||||
0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01,
|
||||
0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18,
|
||||
0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c,
|
||||
0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04,
|
||||
0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61,
|
||||
0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65,
|
||||
0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69};
|
||||
|
||||
if (dictBuffer==NULL || samplesSizes==NULL) {
|
||||
free(dictBuffer);
|
||||
|
@ -1865,19 +1878,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
|||
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize);
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : check dict header size correctness : ", testNb++);
|
||||
{ unsigned char const dictBufferFixed[144] = { 0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f,
|
||||
0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01,
|
||||
0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
|
||||
0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18,
|
||||
0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c,
|
||||
0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04,
|
||||
0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61,
|
||||
0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65,
|
||||
0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69 };
|
||||
dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, 144);
|
||||
{ dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, dictBufferFixedSize);
|
||||
if (dictHeaderSize != 115) goto _output_error;
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize);
|
||||
|
@ -2331,6 +2332,74 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
|||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with multiple ddicts : ", testNb++);
|
||||
{
|
||||
const size_t numDicts = 128;
|
||||
const size_t numFrames = 4;
|
||||
size_t i;
|
||||
ZSTD_DCtx* dctx = ZSTD_createDCtx();
|
||||
ZSTD_DDict** ddictTable = (ZSTD_DDict**)malloc(sizeof(ZSTD_DDict*)*numDicts);
|
||||
ZSTD_CDict** cdictTable = (ZSTD_CDict**)malloc(sizeof(ZSTD_CDict*)*numDicts);
|
||||
U32 dictIDSeed = seed;
|
||||
/* Create new compressed buffer that will hold frames with differing dictIDs */
|
||||
char* dictBufferMulti = (char*)malloc(sizeof(char) * dictBufferFixedSize); /* Modifiable copy of fixed full dict buffer */
|
||||
|
||||
ZSTD_memcpy(dictBufferMulti, dictBufferFixed, dictBufferFixedSize);
|
||||
/* Create a bunch of DDicts with random dict IDs */
|
||||
for (i = 0; i < numDicts; ++i) {
|
||||
U32 currDictID = FUZ_rand(&dictIDSeed);
|
||||
MEM_writeLE32(dictBufferMulti+ZSTD_FRAMEIDSIZE, currDictID);
|
||||
ddictTable[i] = ZSTD_createDDict(dictBufferMulti, dictBufferFixedSize);
|
||||
cdictTable[i] = ZSTD_createCDict(dictBufferMulti, dictBufferFixedSize, 3);
|
||||
if (!ddictTable[i] || !cdictTable[i] || ZSTD_getDictID_fromCDict(cdictTable[i]) != ZSTD_getDictID_fromDDict(ddictTable[i])) {
|
||||
goto _output_error;
|
||||
}
|
||||
}
|
||||
/* Compress a few frames using random CDicts */
|
||||
{
|
||||
size_t off = 0;
|
||||
/* only use the first half so we don't push against size limit of compressedBuffer */
|
||||
size_t const segSize = (CNBuffSize / 2) / numFrames;
|
||||
for (i = 0; i < numFrames; i++) {
|
||||
size_t dictIdx = FUZ_rand(&dictIDSeed) % numDicts;
|
||||
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
|
||||
{ CHECK_NEWV(r, ZSTD_compress_usingCDict(cctx,
|
||||
(BYTE*)compressedBuffer + off, CNBuffSize - off,
|
||||
(BYTE*)CNBuffer + segSize * (size_t)i, segSize,
|
||||
cdictTable[dictIdx]));
|
||||
off += r;
|
||||
}
|
||||
}
|
||||
cSize = off;
|
||||
}
|
||||
|
||||
/* We should succeed to decompression even though different dicts were used on different frames */
|
||||
ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
|
||||
ZSTD_DCtx_setParameter(dctx, ZSTD_d_refMultipleDDicts, ZSTD_rmd_refMultipleDDicts);
|
||||
/* Reference every single ddict we made */
|
||||
for (i = 0; i < numDicts; ++i) {
|
||||
CHECK_Z( ZSTD_DCtx_refDDict(dctx, ddictTable[i]));
|
||||
}
|
||||
CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
|
||||
/* Streaming decompression should also work */
|
||||
{
|
||||
ZSTD_inBuffer in = {compressedBuffer, cSize, 0};
|
||||
ZSTD_outBuffer out = {decodedBuffer, CNBuffSize, 0};
|
||||
while (in.pos < in.size) {
|
||||
CHECK_Z(ZSTD_decompressStream(dctx, &out, &in));
|
||||
}
|
||||
}
|
||||
ZSTD_freeDCtx(dctx);
|
||||
for (i = 0; i < numDicts; ++i) {
|
||||
ZSTD_freeCDict(cdictTable[i]);
|
||||
ZSTD_freeDDict(ddictTable[i]);
|
||||
}
|
||||
free(dictBufferMulti);
|
||||
free(ddictTable);
|
||||
free(cdictTable);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
ZSTD_freeCCtx(cctx);
|
||||
free(dictBuffer);
|
||||
free(samplesSizes);
|
||||
|
|
Loading…
Reference in New Issue