Reject dictionaries with incomplete entropy tables
If a dictionary specifies that a symbol has probability zero in its `matchLength`, `literalLength`, or `offset` FSE table, but the symbol appears when compressing input, the compressor fails. Ensure that dictionaries support all `matchLength`, and `literalLength` codes. They must also support all of the `offset` codes required to represent every possible offset that can appear in the first block.dev
parent
d760529a05
commit
f9c9af3c2e
|
@ -2448,6 +2448,20 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
|
|||
}
|
||||
|
||||
|
||||
/* Dictionaries that assign zero probability to symbols that show up causes problems
|
||||
when FSE encoding. Refuse dictionaries that assign zero probability to symbols
|
||||
that we may encounter during compression.
|
||||
NOTE: This behavior is not standard and could be improved in the future. */
|
||||
static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
|
||||
U32 s;
|
||||
if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted);
|
||||
for (s = 0; s <= maxSymbolValue; ++s) {
|
||||
if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Dictionary format :
|
||||
Magic == ZSTD_DICT_MAGIC (4 bytes)
|
||||
HUF_writeCTable(256)
|
||||
|
@ -2464,17 +2478,19 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
|
|||
{
|
||||
const BYTE* dictPtr = (const BYTE*)dict;
|
||||
const BYTE* const dictEnd = dictPtr + dictSize;
|
||||
short offcodeNCount[MaxOff+1];
|
||||
unsigned offcodeMaxValue = MaxOff;
|
||||
|
||||
{ size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize);
|
||||
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
|
||||
dictPtr += hufHeaderSize;
|
||||
}
|
||||
|
||||
{ short offcodeNCount[MaxOff+1];
|
||||
unsigned offcodeMaxValue = MaxOff, offcodeLog;
|
||||
{ unsigned offcodeLog;
|
||||
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
|
||||
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
|
||||
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
|
||||
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
|
||||
CHECK_E (FSE_buildCTable(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
|
||||
dictPtr += offcodeHeaderSize;
|
||||
}
|
||||
|
@ -2484,6 +2500,8 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
|
|||
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
|
||||
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
|
||||
if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
|
||||
/* Every match length code must have non-zero probability */
|
||||
CHECK_F (ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
|
||||
CHECK_E (FSE_buildCTable(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
|
||||
dictPtr += matchlengthHeaderSize;
|
||||
}
|
||||
|
@ -2493,6 +2511,8 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
|
|||
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
|
||||
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
|
||||
if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
|
||||
/* Every literal length code must have non-zero probability */
|
||||
CHECK_F (ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
|
||||
CHECK_E(FSE_buildCTable(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
|
||||
dictPtr += litlengthHeaderSize;
|
||||
}
|
||||
|
@ -2503,6 +2523,13 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
|
|||
cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
|
||||
dictPtr += 12;
|
||||
|
||||
{ size_t const maxOffset = (dictEnd - dictPtr) + 128 KB; /* The maximum offset that must be supported */
|
||||
/* Calculate minimum offset code required to represent maxOffset */
|
||||
unsigned const offcodeMax = ZSTD_highbit32(maxOffset);
|
||||
/* Every possible supported offset <= dictContentSize + 128 KB must be representable */
|
||||
CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
|
||||
}
|
||||
|
||||
cctx->flagStaticTables = 1;
|
||||
return dictPtr - (const BYTE*)dict;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue