changed dynamic fse threshold for offset

recent experienced showed that
default distribution table for offset
can get it wrong pretty quickly with the nb of symbols,
while it remains a reasonable choice much longer for lengths symbols.

Changed the formula,
so that dynamic threshold is now 32 symbols for offsets.
It remains at 64 symbols for lengths.

Detection based on defaultNormLog
This commit is contained in:
Yann Collet 2018-05-25 17:41:16 -07:00
parent 744f36aac4
commit a7fdceeccd

View File

@ -1743,8 +1743,6 @@ ZSTD_selectEncodingType(
ZSTD_defaultPolicy_e const isDefaultAllowed,
ZSTD_strategy const strategy)
{
#define MIN_SEQ_FOR_DYNAMIC_FSE 64
#define MAX_SEQ_FOR_STATIC_FSE 1000
ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
if (mostFrequent == nbSeq) {
*repeatMode = FSE_repeat_none;
@ -1761,11 +1759,14 @@ ZSTD_selectEncodingType(
}
if (strategy < ZSTD_lazy) {
if (isDefaultAllowed) {
if ((*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
size_t const staticFse_nbSeq_max = 1000;
size_t const dynamicFse_nbSeq_min = 1 << defaultNormLog; /* 32 for offset, 64 for lengths */
assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */
if ((*repeatMode == FSE_repeat_valid) && (nbSeq < staticFse_nbSeq_max)) {
DEBUGLOG(5, "Selected set_repeat");
return set_repeat;
}
if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) {
if ((nbSeq < dynamicFse_nbSeq_min) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) {
DEBUGLOG(5, "Selected set_basic");
/* The format allows default tables to be repeated, but it isn't useful.
* When using simple heuristics to select encoding type, we don't want