From a95e9e80d17b43e50be5cad40d760613ed42514c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 18 May 2018 14:09:42 -0700 Subject: [PATCH] adding some debug functions to observe statistics --- lib/compress/zstd_compress.c | 51 ++++++++++++++------------- lib/compress/zstd_compress_internal.h | 27 ++++++++++++++ lib/compress/zstd_opt.c | 1 - 3 files changed, 53 insertions(+), 26 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 1b488284..88c81533 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1561,15 +1561,16 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) mlCodeTable[seqStorePtr->longLengthPos] = MaxML; } + typedef enum { ZSTD_defaultDisallowed = 0, ZSTD_defaultAllowed = 1 } ZSTD_defaultPolicy_e; -MEM_STATIC -symbolEncodingType_e ZSTD_selectEncodingType( - FSE_repeat* repeatMode, size_t const mostFrequent, size_t nbSeq, - U32 defaultNormLog, ZSTD_defaultPolicy_e const isDefaultAllowed) +MEM_STATIC symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, size_t const mostFrequent, size_t nbSeq, + U32 defaultNormLog, ZSTD_defaultPolicy_e const isDefaultAllowed) { #define MIN_SEQ_FOR_DYNAMIC_FSE 64 #define MAX_SEQ_FOR_STATIC_FSE 1000 @@ -1605,17 +1606,17 @@ symbolEncodingType_e ZSTD_selectEncodingType( return set_compressed; } -MEM_STATIC -size_t ZSTD_buildCTable(void* dst, size_t dstCapacity, - FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, - U32* count, U32 max, - BYTE const* codeTable, size_t nbSeq, - S16 const* defaultNorm, U32 defaultNormLog, U32 defaultMax, - FSE_CTable const* prevCTable, size_t prevCTableSize, - void* workspace, size_t workspaceSize) +MEM_STATIC size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + U32* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* workspace, size_t workspaceSize) { BYTE* op = (BYTE*)dst; - BYTE const* const oend = op + dstCapacity; + const BYTE* const oend = op + dstCapacity; switch (type) { case set_rle: @@ -1865,9 +1866,9 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed); { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, - prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), - workspace, HUF_WORKSPACE_SIZE); + count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), + workspace, HUF_WORKSPACE_SIZE); if (ZSTD_isError(countSize)) return countSize; op += countSize; } } @@ -1880,9 +1881,9 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy); { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), - workspace, HUF_WORKSPACE_SIZE); + count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), + workspace, HUF_WORKSPACE_SIZE); if (ZSTD_isError(countSize)) return countSize; op += countSize; } } @@ -1893,9 +1894,9 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed); { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, - prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), - workspace, HUF_WORKSPACE_SIZE); + count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), + workspace, HUF_WORKSPACE_SIZE); if (ZSTD_isError(countSize)) return countSize; op += countSize; } } @@ -1917,9 +1918,9 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, } MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, - ZSTD_entropyCTables_t const* prevEntropy, + const ZSTD_entropyCTables_t* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, - ZSTD_CCtx_params const* cctxParams, + const ZSTD_CCtx_params* cctxParams, void* dst, size_t dstCapacity, size_t srcSize, U32* workspace, int bmi2) { @@ -1934,7 +1935,7 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, if (ZSTD_isError(cSize)) return cSize; /* Check compressibility */ - { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize); /* note : fixed formula, maybe should depend on compression level, or strategy */ + { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize); /* note : fixed formula; to be refined, depending on compression level, or strategy */ if (cSize >= maxCSize) return 0; /* block not compressed */ } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 92dd1824..5d6a1e94 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -633,6 +633,33 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, return contiguous; } + +/* debug functions */ + +MEM_STATIC double ZSTD_fWeight(U32 rawStat) +{ + U32 const fp_accuracy = 8; + U32 const fp_multiplier = (1 << fp_accuracy); + U32 const stat = rawStat + 1; + U32 const hb = ZSTD_highbit32(stat); + U32 const BWeight = hb * fp_multiplier; + U32 const FWeight = (stat << fp_accuracy) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + fp_accuracy < 31); + return (double)weight / fp_multiplier; +} + +MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) +{ + unsigned u, sum; + for (u=0, sum=0; u<=max; u++) sum += table[u]; + DEBUGLOG(2, "total nb elts: %u", sum); + for (u=0; u<=max; u++) { + DEBUGLOG(2, "%2u: %5u (%.2f)", + u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); + } +} + #if defined (__cplusplus) } #endif diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 9288e0ac..03c5b5b6 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -1057,7 +1057,6 @@ size_t ZSTD_compressBlock_btultra( /* re-inforce weight of collected statistics */ ZSTD_upscaleStats(&ms->opt); } - DEBUGLOG(5, "base=%p, src=%p, src-base=%zi", ms->window.base, src, (const BYTE*)src - (const BYTE*)ms->window.base); return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/); }