changed dynamic fse threshold for offset
recent experienced showed that default distribution table for offset can get it wrong pretty quickly with the nb of symbols, while it remains a reasonable choice much longer for lengths symbols. Changed the formula, so that dynamic threshold is now 32 symbols for offsets. It remains at 64 symbols for lengths. Detection based on defaultNormLog
This commit is contained in:
parent
744f36aac4
commit
a7fdceeccd
@ -1743,8 +1743,6 @@ ZSTD_selectEncodingType(
|
||||
ZSTD_defaultPolicy_e const isDefaultAllowed,
|
||||
ZSTD_strategy const strategy)
|
||||
{
|
||||
#define MIN_SEQ_FOR_DYNAMIC_FSE 64
|
||||
#define MAX_SEQ_FOR_STATIC_FSE 1000
|
||||
ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
|
||||
if (mostFrequent == nbSeq) {
|
||||
*repeatMode = FSE_repeat_none;
|
||||
@ -1761,11 +1759,14 @@ ZSTD_selectEncodingType(
|
||||
}
|
||||
if (strategy < ZSTD_lazy) {
|
||||
if (isDefaultAllowed) {
|
||||
if ((*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
|
||||
size_t const staticFse_nbSeq_max = 1000;
|
||||
size_t const dynamicFse_nbSeq_min = 1 << defaultNormLog; /* 32 for offset, 64 for lengths */
|
||||
assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */
|
||||
if ((*repeatMode == FSE_repeat_valid) && (nbSeq < staticFse_nbSeq_max)) {
|
||||
DEBUGLOG(5, "Selected set_repeat");
|
||||
return set_repeat;
|
||||
}
|
||||
if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) {
|
||||
if ((nbSeq < dynamicFse_nbSeq_min) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) {
|
||||
DEBUGLOG(5, "Selected set_basic");
|
||||
/* The format allows default tables to be repeated, but it isn't useful.
|
||||
* When using simple heuristics to select encoding type, we don't want
|
||||
|
Loading…
x
Reference in New Issue
Block a user