Reject dictionaries with incomplete entropy tables

If a dictionary specifies that a symbol has probability zero in its
`matchLength`, `literalLength`, or `offset` FSE table, but the symbol
appears when compressing input, the compressor fails.

Ensure that dictionaries support all `matchLength`, and `literalLength`
codes.  They must also support all of the `offset` codes required to
represent every possible offset that can appear in the first block.
dev
Nick Terrell 2016-10-19 17:22:08 -07:00
parent d760529a05
commit f9c9af3c2e
1 changed files with 29 additions and 2 deletions

View File

@ -2448,6 +2448,20 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
}
/* Dictionaries that assign zero probability to symbols that show up causes problems
when FSE encoding. Refuse dictionaries that assign zero probability to symbols
that we may encounter during compression.
NOTE: This behavior is not standard and could be improved in the future. */
static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
U32 s;
if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted);
for (s = 0; s <= maxSymbolValue; ++s) {
if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted);
}
return 0;
}
/* Dictionary format :
Magic == ZSTD_DICT_MAGIC (4 bytes)
HUF_writeCTable(256)
@ -2464,17 +2478,19 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
{
const BYTE* dictPtr = (const BYTE*)dict;
const BYTE* const dictEnd = dictPtr + dictSize;
short offcodeNCount[MaxOff+1];
unsigned offcodeMaxValue = MaxOff;
{ size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize);
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
dictPtr += hufHeaderSize;
}
{ short offcodeNCount[MaxOff+1];
unsigned offcodeMaxValue = MaxOff, offcodeLog;
{ unsigned offcodeLog;
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
CHECK_E (FSE_buildCTable(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
dictPtr += offcodeHeaderSize;
}
@ -2484,6 +2500,8 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
/* Every match length code must have non-zero probability */
CHECK_F (ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
CHECK_E (FSE_buildCTable(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
dictPtr += matchlengthHeaderSize;
}
@ -2493,6 +2511,8 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
/* Every literal length code must have non-zero probability */
CHECK_F (ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
CHECK_E(FSE_buildCTable(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
dictPtr += litlengthHeaderSize;
}
@ -2503,6 +2523,13 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
dictPtr += 12;
{ size_t const maxOffset = (dictEnd - dictPtr) + 128 KB; /* The maximum offset that must be supported */
/* Calculate minimum offset code required to represent maxOffset */
unsigned const offcodeMax = ZSTD_highbit32(maxOffset);
/* Every possible supported offset <= dictContentSize + 128 KB must be representable */
CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
}
cctx->flagStaticTables = 1;
return dictPtr - (const BYTE*)dict;
}