diff --git a/programs/bench.c b/programs/bench.c index f577ed02..b3a8222d 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -63,6 +63,8 @@ #define MB *(1 <<20) #define GB *(1U<<30) +#define BMK_RUNTEST_DEFAULT_MS 1000 + static const size_t maxMemory = (sizeof(size_t)==4) ? /* 32-bit */ (2 GB - 64 MB) : /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t)*8)-31)); @@ -375,32 +377,37 @@ BMK_runOutcome_t BMK_benchFunction( struct BMK_timedFnState_s { U64 timeSpent_ns; U64 timeBudget_ns; + U64 runBudget_ns; BMK_runTime_t fastestRun; unsigned nbLoops; UTIL_time_t coolTime; }; /* typedef'd to BMK_timedFnState_t within bench.h */ -BMK_timedFnState_t* BMK_createTimedFnState(unsigned nbSeconds) { +BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms) +{ BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r)); if (r == NULL) return NULL; /* malloc() error */ - BMK_resetTimedFnState(r, nbSeconds); + BMK_resetTimedFnState(r, total_ms, run_ms); return r; } -void BMK_resetTimedFnState(BMK_timedFnState_t* r, unsigned nbSeconds) { - r->timeSpent_ns = 0; - r->timeBudget_ns = (U64)nbSeconds * TIMELOOP_NANOSEC; - if (!nbSeconds) r->timeBudget_ns = 1; - r->fastestRun.nanoSecPerRun = (U64)(-1LL); - r->fastestRun.sumOfReturn = (size_t)(-1LL); - r->nbLoops = 1; - r->coolTime = UTIL_getTime(); -} - void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); } +void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms) +{ + if (!total_ms) total_ms = 1 ; + if (!run_ms) run_ms = 1; + if (run_ms > total_ms) run_ms = total_ms; + timedFnState->timeSpent_ns = 0; + timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000; + timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000; + timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL); + timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL); + timedFnState->nbLoops = 1; + timedFnState->coolTime = UTIL_getTime(); +} /* Tells if nb of seconds set in timedFnState for all runs is spent. * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */ @@ -421,6 +428,8 @@ BMK_runOutcome_t BMK_benchTimedFn( void * const * dstBlockBuffers, const size_t * dstBlockCapacities, size_t* blockResults) { + U64 const runBudget_ns = cont->runBudget_ns; + U64 const runTimeMin_ns = runBudget_ns / 2; int completed = 0; BMK_runTime_t bestRunTime = cont->fastestRun; @@ -453,9 +462,9 @@ BMK_runOutcome_t BMK_benchTimedFn( cont->timeSpent_ns += loopDuration_ns; /* estimate nbLoops for next run to last approximately 1 second */ - if (loopDuration_ns > (TIMELOOP_NANOSEC / 50)) { + if (loopDuration_ns > (runBudget_ns / 50)) { U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun); - cont->nbLoops = (U32)(TIMELOOP_NANOSEC / fastestRun_ns) + 1; + cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1; } else { /* previous run was too short : blindly increase workload by x multiplier */ const unsigned multiplier = 10; @@ -463,7 +472,7 @@ BMK_runOutcome_t BMK_benchTimedFn( cont->nbLoops *= multiplier; } - if(loopDuration_ns < MINUSABLETIME) { + if(loopDuration_ns < runTimeMin_ns) { /* don't report results for which benchmark run time was too small : increased risks of rounding errors */ assert(completed == 0); continue; @@ -775,8 +784,8 @@ BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, void ** const resPtrs = (void**)malloc(maxNbBlocks * sizeof(void*)); size_t* const resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); - BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(adv->nbSeconds); - BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(adv->nbSeconds); + BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS); + BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS); ZSTD_CCtx* const cctx = ZSTD_createCCtx(); ZSTD_DCtx* const dctx = ZSTD_createDCtx(); diff --git a/programs/bench.h b/programs/bench.h index 4f5332a9..ec3cffe8 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -255,24 +255,33 @@ BMK_runOutcome_t BMK_benchFunction( -/* ==== Benchmarking any function, providing intermediate results ==== */ +/* ==== Benchmark any function, providing intermediate results ==== */ -/* state information needed by benchFunctionTimed */ +/* state information tracking benchmark session */ typedef struct BMK_timedFnState_s BMK_timedFnState_t; -BMK_timedFnState_t* BMK_createTimedFnState(unsigned nbSeconds); -void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned nbSeconds); +/* BMK_createTimedFnState() and BMK_resetTimedFnState() : + * Create/Set BMK_timedFnState_t for next benchmark session, + * which shall last a minimum of total_ms milliseconds, + * producing intermediate results, paced at interval of (approximately) run_ms. + */ +BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms); +void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms); void BMK_freeTimedFnState(BMK_timedFnState_t* state); +/* Tells if duration of all benchmark runs has exceeded total_ms + */ +int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState); + + /* BMK_benchTimedFn() : - * Similar to BMK_benchFunction(), - * tries to find automatically `nbLoops`, so that each run lasts approximately 1 second. - * Note : minimum `nbLoops` is 1, a run may last more than 1 second if benchFn is slow. - * Most arguments are the same as BMK_benchFunction() - * Usage - initialize a timedFnState, selecting a total nbSeconds allocated for _all_ benchmarks run - * call BMK_benchTimedFn() repetitively, collecting intermediate results (each run is supposed to last about 1 seconds) - * Check if time budget is spent using BMK_isCompleted_TimedFn() + * Similar to BMK_benchFunction(), most arguments being identical. + * Automatically determines `nbLoops` so that each result is regularly produced at interval of about run_ms. + * Note : minimum `nbLoops` is 1, therefore a run may last more than run_ms, and possibly even more than total_ms. + * Usage - initialize timedFnState, select benchmark duration (total_ms) and each measurement duration (run_ms) + * call BMK_benchTimedFn() repetitively, each measurement is supposed to last about run_ms + * Check if total time budget is spent or exceeded, using BMK_isCompleted_TimedFn() */ BMK_runOutcome_t BMK_benchTimedFn( BMK_timedFnState_t* timedFnState, @@ -284,9 +293,6 @@ BMK_runOutcome_t BMK_benchTimedFn( size_t* blockResults); -/* Tells if total nb of benchmark runs has exceeded amount of time set in timedFnState - */ -int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState); diff --git a/programs/zstdcli.c b/programs/zstdcli.c index d5a2216d..7a9b621f 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -846,13 +846,13 @@ int main(int argCount, const char* argv[]) if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel(); if (cLevelLast < cLevel) cLevelLast = cLevel; if (cLevelLast > cLevel) - DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); + DISPLAYLEVEL(3, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); if(filenameIdx) { if(separateFiles) { unsigned i; for(i = 0; i < filenameIdx; i++) { int c; - DISPLAYLEVEL(2, "Benchmarking %s \n", filenameTable[i]); + DISPLAYLEVEL(3, "Benchmarking %s \n", filenameTable[i]); for(c = cLevel; c <= cLevelLast; c++) { BMK_benchFilesAdvanced(&filenameTable[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams); } diff --git a/tests/fullbench.c b/tests/fullbench.c index ef1f111b..fd4815c9 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -514,10 +514,11 @@ static size_t benchMem(U32 benchNb, { size_t i; for (i=0; i