|
|
|
@ -2891,22 +2891,28 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Dictionaries that assign zero probability to symbols that show up causes problems
|
|
|
|
|
when FSE encoding. Refuse dictionaries that assign zero probability to symbols
|
|
|
|
|
that we may encounter during compression.
|
|
|
|
|
NOTE: This behavior is not standard and could be improved in the future. */
|
|
|
|
|
static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
|
|
|
|
|
* when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check
|
|
|
|
|
* and only dictionaries with 100% valid symbols can be assumed valid.
|
|
|
|
|
*/
|
|
|
|
|
static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue)
|
|
|
|
|
{
|
|
|
|
|
U32 s;
|
|
|
|
|
RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted, "dict fse tables don't have all symbols");
|
|
|
|
|
for (s = 0; s <= maxSymbolValue; ++s) {
|
|
|
|
|
RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted, "dict fse tables don't have all symbols");
|
|
|
|
|
if (dictMaxSymbolValue < maxSymbolValue) {
|
|
|
|
|
return FSE_repeat_check;
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
for (s = 0; s <= maxSymbolValue; ++s) {
|
|
|
|
|
if (normalizedCounter[s] == 0) {
|
|
|
|
|
return FSE_repeat_check;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return FSE_repeat_valid;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
|
|
|
|
short* offcodeNCount, unsigned* offcodeMaxValue,
|
|
|
|
|
const void* const dict, size_t dictSize)
|
|
|
|
|
{
|
|
|
|
|
short offcodeNCount[MaxOff+1];
|
|
|
|
|
unsigned offcodeMaxValue = MaxOff;
|
|
|
|
|
const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */
|
|
|
|
|
const BYTE* const dictEnd = dictPtr + dictSize;
|
|
|
|
|
dictPtr += 8;
|
|
|
|
@ -2928,16 +2934,16 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
{ unsigned offcodeLog;
|
|
|
|
|
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
|
|
|
|
|
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
|
|
|
|
|
RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
|
|
|
|
|
RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
|
|
|
|
|
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
|
|
|
|
|
/* fill all offset symbols to avoid garbage at end of table */
|
|
|
|
|
RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
|
|
|
|
|
bs->entropy.fse.offcodeCTable,
|
|
|
|
|
offcodeNCount, MaxOff, offcodeLog,
|
|
|
|
|
workspace, HUF_WORKSPACE_SIZE)),
|
|
|
|
|
dictionary_corrupted, "");
|
|
|
|
|
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
|
|
|
|
|
dictPtr += offcodeHeaderSize;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -2946,13 +2952,12 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
|
|
|
|
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
|
|
|
|
|
RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
|
|
|
|
|
RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
|
|
|
|
|
/* Every match length code must have non-zero probability */
|
|
|
|
|
FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML), "");
|
|
|
|
|
RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
|
|
|
|
|
bs->entropy.fse.matchlengthCTable,
|
|
|
|
|
matchlengthNCount, matchlengthMaxValue, matchlengthLog,
|
|
|
|
|
workspace, HUF_WORKSPACE_SIZE)),
|
|
|
|
|
dictionary_corrupted, "");
|
|
|
|
|
bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML);
|
|
|
|
|
dictPtr += matchlengthHeaderSize;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -2961,13 +2966,12 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
|
|
|
|
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
|
|
|
|
|
RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
|
|
|
|
|
RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
|
|
|
|
|
/* Every literal length code must have non-zero probability */
|
|
|
|
|
FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL), "");
|
|
|
|
|
RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
|
|
|
|
|
bs->entropy.fse.litlengthCTable,
|
|
|
|
|
litlengthNCount, litlengthMaxValue, litlengthLog,
|
|
|
|
|
workspace, HUF_WORKSPACE_SIZE)),
|
|
|
|
|
dictionary_corrupted, "");
|
|
|
|
|
bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL);
|
|
|
|
|
dictPtr += litlengthHeaderSize;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -2977,6 +2981,22 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
|
|
|
|
|
bs->rep[2] = MEM_readLE32(dictPtr+8);
|
|
|
|
|
dictPtr += 12;
|
|
|
|
|
|
|
|
|
|
{ size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
|
|
|
|
|
U32 offcodeMax = MaxOff;
|
|
|
|
|
if (dictContentSize <= ((U32)-1) - 128 KB) {
|
|
|
|
|
U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
|
|
|
|
|
offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
|
|
|
|
|
}
|
|
|
|
|
/* All offset values <= dictContentSize + 128 KB must be representable for a valid table */
|
|
|
|
|
bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff));
|
|
|
|
|
|
|
|
|
|
/* All repCodes must be <= dictContentSize and != 0 */
|
|
|
|
|
{ U32 u;
|
|
|
|
|
for (u=0; u<3; u++) {
|
|
|
|
|
RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");
|
|
|
|
|
RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");
|
|
|
|
|
} } }
|
|
|
|
|
|
|
|
|
|
return dictPtr - (const BYTE*)dict;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -2999,8 +3019,6 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|
|
|
|
{
|
|
|
|
|
const BYTE* dictPtr = (const BYTE*)dict;
|
|
|
|
|
const BYTE* const dictEnd = dictPtr + dictSize;
|
|
|
|
|
short offcodeNCount[MaxOff+1];
|
|
|
|
|
unsigned offcodeMaxValue = MaxOff;
|
|
|
|
|
size_t dictID;
|
|
|
|
|
size_t eSize;
|
|
|
|
|
|
|
|
|
@ -3009,32 +3027,16 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|
|
|
|
assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
|
|
|
|
|
|
|
|
|
|
dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ );
|
|
|
|
|
eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize);
|
|
|
|
|
eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize);
|
|
|
|
|
FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed");
|
|
|
|
|
dictPtr += eSize;
|
|
|
|
|
|
|
|
|
|
{ size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
|
|
|
|
|
U32 offcodeMax = MaxOff;
|
|
|
|
|
if (dictContentSize <= ((U32)-1) - 128 KB) {
|
|
|
|
|
U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
|
|
|
|
|
offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
|
|
|
|
|
}
|
|
|
|
|
/* All offset values <= dictContentSize + 128 KB must be representable */
|
|
|
|
|
FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)), "");
|
|
|
|
|
/* All repCodes must be <= dictContentSize and != 0*/
|
|
|
|
|
{ U32 u;
|
|
|
|
|
for (u=0; u<3; u++) {
|
|
|
|
|
RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");
|
|
|
|
|
RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");
|
|
|
|
|
} }
|
|
|
|
|
|
|
|
|
|
bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid;
|
|
|
|
|
bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
|
|
|
|
|
bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
|
|
|
|
|
{
|
|
|
|
|
size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
|
|
|
|
|
FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
|
|
|
|
|
ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
|
|
|
|
|
return dictID;
|
|
|
|
|
}
|
|
|
|
|
return dictID;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** ZSTD_compress_insertDictionary() :
|
|
|
|
|