diff --git a/lib/common/debug.h b/lib/common/debug.h index 0c04ad2c..b4fc89d4 100644 --- a/lib/common/debug.h +++ b/lib/common/debug.h @@ -57,9 +57,9 @@ extern "C" { #endif -/* static assert is triggered at compile time, leaving no runtime artefact, - * but can only work with compile-time constants. - * This variant can only be used inside a function. */ +/* static assert is triggered at compile time, leaving no runtime artefact. + * static assert only works with compile-time constants. + * Also, this variant can only be used inside a function. */ #define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) @@ -70,9 +70,19 @@ extern "C" { # define DEBUGLEVEL 0 #endif + +/* DEBUGFILE can be defined externally, + * typically through compiler command line. + * note : currently useless. + * Value must be stderr or stdout */ +#ifndef DEBUGFILE +# define DEBUGFILE stderr +#endif + + /* recommended values for DEBUGLEVEL : - * 0 : no debug, all run-time functions disabled - * 1 : no display, enables assert() only + * 0 : release mode, no debug, all run-time checks disabled + * 1 : enables assert() only, no display * 2 : reserved, for currently active debug path * 3 : events once per object lifetime (CCtx, CDict, etc.) * 4 : events once per frame @@ -81,7 +91,7 @@ extern "C" { * 7+: events at every position (*very* verbose) * * It's generally inconvenient to output traces > 5. - * In which case, it's possible to selectively enable higher verbosity levels + * In which case, it's possible to selectively trigger high verbosity levels * by modifying g_debug_level. */ @@ -95,11 +105,12 @@ extern "C" { #if (DEBUGLEVEL>=2) # include -extern int g_debuglevel; /* here, this variable is only declared, - it actually lives in debug.c, - and is shared by the whole process. - It's typically used to enable very verbose levels - on selective conditions (such as position in src) */ +extern int g_debuglevel; /* the variable is only declared, + it actually lives in debug.c, + and is shared by the whole process. + It's not thread-safe. + It's useful when enabling very verbose levels + on selective conditions (such as position in src) */ # define RAWLOG(l, ...) { \ if (l<=g_debuglevel) { \ diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index dcca6ed5..ce233f47 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -67,20 +67,31 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) } +/* ZSTD_downscaleStat() : + * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) + * return the resulting sum of elements */ static U32 ZSTD_downscaleStat(U32* table, U32 lastEltIndex, int malus) { U32 s, sum=0; + DEBUGLOG(2, "ZSTD_downscaleStat (nbElts=%u)", lastEltIndex+1); assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); - for (s=0; s<=lastEltIndex; s++) { + for (s=0; s> (ZSTD_FREQ_DIV+malus)); sum += table[s]; } return sum; } -static void ZSTD_rescaleFreqs(optState_t* const optPtr, - const BYTE* const src, size_t const srcSize, - int optLevel) +/* ZSTD_rescaleFreqs() : + * if first block (detected by optPtr->litLengthSum == 0) : init statistics + * take hints from dictionary if there is one + * or init from zero, using src for literals stats, or flat 1 for match symbols + * otherwise downscale existing stats, to be used as seed for next block. + */ +static void +ZSTD_rescaleFreqs(optState_t* const optPtr, + const BYTE* const src, size_t const srcSize, + int optLevel) { optPtr->priceType = zop_dynamic; @@ -778,6 +789,20 @@ static U32 ZSTD_totalLen(ZSTD_optimal_t sol) return sol.litlen + sol.mlen; } + +static void +listStats(const U32* table, int lastEltID) +{ + int const nbElts = lastEltID + 1; + int enb; + for (enb=0; enb < nbElts; enb++) { + (void)table; + //RAWLOG(2, "%3i:%3i, ", enb, table[enb]); + RAWLOG(2, "%4i,", table[enb]); + } + RAWLOG(2, " \n"); +} + FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, seqStore_t* seqStore, @@ -809,6 +834,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); ip += (ip==prefixStart); + DEBUGLOG(2, "OffCode table on entry : "); + listStats(optStatePtr->offCodeFreq, MaxOff); + + DEBUGLOG(2, "Literals table on entry : "); + listStats(optStatePtr->litFreq, MaxLit); + /* Match Loop */ while (ip < ilimit) { U32 cur, last_pos = 0; @@ -1036,6 +1067,12 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ } /* while (ip < ilimit) */ + DEBUGLOG(2, "OffCode table on exit : "); + listStats(optStatePtr->offCodeFreq, MaxOff); + + DEBUGLOG(2, "Literals table on exit : "); + listStats(optStatePtr->litFreq, MaxLit); + /* Return the last literals size */ return iend - anchor; } @@ -1054,8 +1091,8 @@ size_t ZSTD_compressBlock_btopt( static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus) { U32 s, sum=0; - assert(ZSTD_FREQ_DIV+bonus > 0); - for (s=0; s<=lastEltIndex; s++) { + assert(ZSTD_FREQ_DIV+bonus >= 0); + for (s=0; smatchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1); optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1); } + /* ZSTD_initStats_ultra(): * make a first compression pass, just to seed stats with more accurate starting values. * only works on first block, with no dictionary and no ldm. - * this function must not fail, hence its usage conditions must be respected. + * this function cannot error, hence its constract must be respected. */ -static void ZSTD_initStats_ultra( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) +static void +ZSTD_initStats_ultra(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) { U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ + memcpy(tmpRep, rep, sizeof(tmpRep)); - DEBUGLOG(5, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); + DEBUGLOG(2, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); + DEBUGLOG(2, "repCodes: %u, %u, %u", tmpRep[0], tmpRep[1], tmpRep[2]); assert(ms->opt.litLengthSum == 0); /* first block */ assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ - memcpy(tmpRep, rep, sizeof(tmpRep)); ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ /* invalidate first scan from history */ @@ -1107,7 +1148,7 @@ size_t ZSTD_compressBlock_btultra( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { - DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); + DEBUGLOG(2, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); } @@ -1115,9 +1156,9 @@ size_t ZSTD_compressBlock_btultra2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize) { - DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); + DEBUGLOG(2, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); - /* 2-pass strategy + /* 2-pass strategy: * this strategy makes a first pass over first block to collect statistics * and seed next round's statistics with it. * After 1st pass, function forgets everything, and starts a new block. diff --git a/programs/benchzstd.c b/programs/benchzstd.c index 30864f47..90357b5b 100644 --- a/programs/benchzstd.c +++ b/programs/benchzstd.c @@ -359,7 +359,8 @@ BMK_benchMemAdvancedNoAlloc( srcPtrs[nbBlocks] = srcPtr; srcSizes[nbBlocks] = thisBlockSize; cPtrs[nbBlocks] = cPtr; - cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize); +#warning force streaming mode + cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize) - 1; resPtrs[nbBlocks] = resPtr; resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize; srcPtr += thisBlockSize;