From a7fdceeccd0880f8804d6d4fe7ce6ab0c6cbb98b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 25 May 2018 17:41:16 -0700 Subject: [PATCH] changed dynamic fse threshold for offset recent experienced showed that default distribution table for offset can get it wrong pretty quickly with the nb of symbols, while it remains a reasonable choice much longer for lengths symbols. Changed the formula, so that dynamic threshold is now 32 symbols for offsets. It remains at 64 symbols for lengths. Detection based on defaultNormLog --- lib/compress/zstd_compress.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index df7cb8aa..e6b363ae 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1743,8 +1743,6 @@ ZSTD_selectEncodingType( ZSTD_defaultPolicy_e const isDefaultAllowed, ZSTD_strategy const strategy) { -#define MIN_SEQ_FOR_DYNAMIC_FSE 64 -#define MAX_SEQ_FOR_STATIC_FSE 1000 ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); if (mostFrequent == nbSeq) { *repeatMode = FSE_repeat_none; @@ -1761,11 +1759,14 @@ ZSTD_selectEncodingType( } if (strategy < ZSTD_lazy) { if (isDefaultAllowed) { - if ((*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + size_t const staticFse_nbSeq_max = 1000; + size_t const dynamicFse_nbSeq_min = 1 << defaultNormLog; /* 32 for offset, 64 for lengths */ + assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ + if ((*repeatMode == FSE_repeat_valid) && (nbSeq < staticFse_nbSeq_max)) { DEBUGLOG(5, "Selected set_repeat"); return set_repeat; } - if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) { + if ((nbSeq < dynamicFse_nbSeq_min) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) { DEBUGLOG(5, "Selected set_basic"); /* The format allows default tables to be repeated, but it isn't useful. * When using simple heuristics to select encoding type, we don't want