Merge branch 'bench' into largeNbDicts

dev
Yann Collet 2018-08-28 11:26:46 -07:00
commit 0491037db9
6 changed files with 99 additions and 73 deletions

View File

@ -63,6 +63,8 @@
#define MB *(1 <<20)
#define GB *(1U<<30)
#define BMK_RUNTEST_DEFAULT_MS 1000
static const size_t maxMemory = (sizeof(size_t)==4) ?
/* 32-bit */ (2 GB - 64 MB) :
/* 64-bit */ (size_t)(1ULL << ((sizeof(size_t)*8)-31));
@ -375,32 +377,37 @@ BMK_runOutcome_t BMK_benchFunction(
struct BMK_timedFnState_s {
U64 timeSpent_ns;
U64 timeBudget_ns;
U64 runBudget_ns;
BMK_runTime_t fastestRun;
unsigned nbLoops;
UTIL_time_t coolTime;
}; /* typedef'd to BMK_timedFnState_t within bench.h */
BMK_timedFnState_t* BMK_createTimedFnState(unsigned nbSeconds) {
BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
{
BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
if (r == NULL) return NULL; /* malloc() error */
BMK_resetTimedFnState(r, nbSeconds);
BMK_resetTimedFnState(r, total_ms, run_ms);
return r;
}
void BMK_resetTimedFnState(BMK_timedFnState_t* r, unsigned nbSeconds) {
r->timeSpent_ns = 0;
r->timeBudget_ns = (U64)nbSeconds * TIMELOOP_NANOSEC;
if (!nbSeconds) r->timeBudget_ns = 1;
r->fastestRun.nanoSecPerRun = (U64)(-1LL);
r->fastestRun.sumOfReturn = (size_t)(-1LL);
r->nbLoops = 1;
r->coolTime = UTIL_getTime();
}
void BMK_freeTimedFnState(BMK_timedFnState_t* state) {
free(state);
}
void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
{
if (!total_ms) total_ms = 1 ;
if (!run_ms) run_ms = 1;
if (run_ms > total_ms) run_ms = total_ms;
timedFnState->timeSpent_ns = 0;
timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000;
timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000;
timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL);
timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
timedFnState->nbLoops = 1;
timedFnState->coolTime = UTIL_getTime();
}
/* Tells if nb of seconds set in timedFnState for all runs is spent.
* note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
@ -421,6 +428,8 @@ BMK_runOutcome_t BMK_benchTimedFn(
void * const * dstBlockBuffers, const size_t * dstBlockCapacities,
size_t* blockResults)
{
U64 const runBudget_ns = cont->runBudget_ns;
U64 const runTimeMin_ns = runBudget_ns / 2;
int completed = 0;
BMK_runTime_t bestRunTime = cont->fastestRun;
@ -453,9 +462,9 @@ BMK_runOutcome_t BMK_benchTimedFn(
cont->timeSpent_ns += loopDuration_ns;
/* estimate nbLoops for next run to last approximately 1 second */
if (loopDuration_ns > (TIMELOOP_NANOSEC / 50)) {
if (loopDuration_ns > (runBudget_ns / 50)) {
U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
cont->nbLoops = (U32)(TIMELOOP_NANOSEC / fastestRun_ns) + 1;
cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1;
} else {
/* previous run was too short : blindly increase workload by x multiplier */
const unsigned multiplier = 10;
@ -463,7 +472,7 @@ BMK_runOutcome_t BMK_benchTimedFn(
cont->nbLoops *= multiplier;
}
if(loopDuration_ns < MINUSABLETIME) {
if(loopDuration_ns < runTimeMin_ns) {
/* don't report results for which benchmark run time was too small : increased risks of rounding errors */
assert(completed == 0);
continue;
@ -775,8 +784,8 @@ BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize,
void ** const resPtrs = (void**)malloc(maxNbBlocks * sizeof(void*));
size_t* const resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(adv->nbSeconds);
BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(adv->nbSeconds);
BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_DCtx* const dctx = ZSTD_createDCtx();

View File

@ -255,24 +255,33 @@ BMK_runOutcome_t BMK_benchFunction(
/* ==== Benchmarking any function, providing intermediate results ==== */
/* ==== Benchmark any function, providing intermediate results ==== */
/* state information needed by benchFunctionTimed */
/* state information tracking benchmark session */
typedef struct BMK_timedFnState_s BMK_timedFnState_t;
BMK_timedFnState_t* BMK_createTimedFnState(unsigned nbSeconds);
void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned nbSeconds);
/* BMK_createTimedFnState() and BMK_resetTimedFnState() :
* Create/Set BMK_timedFnState_t for next benchmark session,
* which shall last a minimum of total_ms milliseconds,
* producing intermediate results, paced at interval of (approximately) run_ms.
*/
BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms);
void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms);
void BMK_freeTimedFnState(BMK_timedFnState_t* state);
/* Tells if duration of all benchmark runs has exceeded total_ms
*/
int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState);
/* BMK_benchTimedFn() :
* Similar to BMK_benchFunction(),
* tries to find automatically `nbLoops`, so that each run lasts approximately 1 second.
* Note : minimum `nbLoops` is 1, a run may last more than 1 second if benchFn is slow.
* Most arguments are the same as BMK_benchFunction()
* Usage - initialize a timedFnState, selecting a total nbSeconds allocated for _all_ benchmarks run
* call BMK_benchTimedFn() repetitively, collecting intermediate results (each run is supposed to last about 1 seconds)
* Check if time budget is spent using BMK_isCompleted_TimedFn()
* Similar to BMK_benchFunction(), most arguments being identical.
* Automatically determines `nbLoops` so that each result is regularly produced at interval of about run_ms.
* Note : minimum `nbLoops` is 1, therefore a run may last more than run_ms, and possibly even more than total_ms.
* Usage - initialize timedFnState, select benchmark duration (total_ms) and each measurement duration (run_ms)
* call BMK_benchTimedFn() repetitively, each measurement is supposed to last about run_ms
* Check if total time budget is spent or exceeded, using BMK_isCompleted_TimedFn()
*/
BMK_runOutcome_t BMK_benchTimedFn(
BMK_timedFnState_t* timedFnState,
@ -284,9 +293,6 @@ BMK_runOutcome_t BMK_benchTimedFn(
size_t* blockResults);
/* Tells if total nb of benchmark runs has exceeded amount of time set in timedFnState
*/
int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState);

View File

@ -846,13 +846,13 @@ int main(int argCount, const char* argv[])
if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel();
if (cLevelLast < cLevel) cLevelLast = cLevel;
if (cLevelLast > cLevel)
DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
DISPLAYLEVEL(3, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
if(filenameIdx) {
if(separateFiles) {
unsigned i;
for(i = 0; i < filenameIdx; i++) {
int c;
DISPLAYLEVEL(2, "Benchmarking %s \n", filenameTable[i]);
DISPLAYLEVEL(3, "Benchmarking %s \n", filenameTable[i]);
for(c = cLevel; c <= cLevelLast; c++) {
BMK_benchFilesAdvanced(&filenameTable[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams);
}

View File

@ -200,7 +200,7 @@ zstreamtest-dll : $(ZSTDDIR)/common/xxhash.c # xxh symbols not exposed from dll
zstreamtest-dll : $(ZSTREAM_LOCAL_FILES)
$(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT)
paramgrill : DEBUGFLAGS = -DNDEBUG # turn off assert() for speed measurements
paramgrill : DEBUGFLAGS = # turn off assert() by default for speed measurements
paramgrill : $(ZSTD_FILES) $(PRGDIR)/bench.c $(PRGDIR)/datagen.c paramgrill.c
$(CC) $(FLAGS) $^ -lm -o $@$(EXT)

View File

@ -514,10 +514,11 @@ static size_t benchMem(U32 benchNb,
{ size_t i; for (i=0; i<dstBuffSize; i++) dstBuff[i]=(BYTE)i; }
/* benchmark loop */
{ BMK_timedFnState_t* const tfs = BMK_createTimedFnState(g_nbIterations);
{ BMK_timedFnState_t* const tfs = BMK_createTimedFnState(g_nbIterations * 1000, 1000);
BMK_runTime_t bestResult;
bestResult.sumOfReturn = 0;
bestResult.nanoSecPerRun = (unsigned long long)(-1LL);
assert(tfs != NULL);
for (;;) {
void* const dstBuffv = dstBuff;
BMK_runOutcome_t const bOutcome =

View File

@ -468,7 +468,7 @@ static void paramVariation(paramValues_t* ptr, memoTable_t* mtAll, const U32 nbC
static paramValues_t randomParams(void)
{
varInds_t v; paramValues_t p;
for(v = 0; v <= NUM_PARAMS; v++) {
for(v = 0; v < NUM_PARAMS; v++) {
p.vals[v] = rangeMap(v, FUZ_rand(&g_rand) % rangetable[v]);
}
return p;
@ -632,32 +632,39 @@ static void BMK_translateAdvancedParams(FILE* f, const paramValues_t params) {
varInds_t v;
int first = 1;
fprintf(f,"--zstd=");
for(v = 0; v < NUM_PARAMS; v++) {
if(g_silenceParams[v]) { continue; }
if(!first) { fprintf(f, ","); }
for (v = 0; v < NUM_PARAMS; v++) {
if (g_silenceParams[v]) { continue; }
if (!first) { fprintf(f, ","); }
fprintf(f,"%s=", g_paramNames[v]);
if(v == strt_ind) { fprintf(f,"%u", params.vals[v]); }
if (v == strt_ind) { fprintf(f,"%u", params.vals[v]); }
else { displayParamVal(f, v, params.vals[v], 0); }
first = 0;
}
fprintf(f, "\n");
}
static void BMK_displayOneResult(FILE* f, winnerInfo_t res, const size_t srcSize) {
varInds_t v;
int first = 1;
res.params = cParamUnsetMin(res.params);
fprintf(f," {");
for(v = 0; v < NUM_PARAMS; v++) {
if(g_silenceParams[v]) { continue; }
if(!first) { fprintf(f, ","); }
displayParamVal(f, v, res.params.vals[v], 3);
first = 0;
}
static void BMK_displayOneResult(FILE* f, winnerInfo_t res, const size_t srcSize)
{
varInds_t v;
int first = 1;
res.params = cParamUnsetMin(res.params);
fprintf(f, " {");
for (v = 0; v < NUM_PARAMS; v++) {
if (g_silenceParams[v]) { continue; }
if (!first) { fprintf(f, ","); }
displayParamVal(f, v, res.params.vals[v], 3);
first = 0;
}
fprintf(f, " }, /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
(double)srcSize / res.result.cSize, (double)res.result.cSpeed / MB_UNIT, (double)res.result.dSpeed / MB_UNIT);
{ double const ratio = res.result.cSize ?
(double)srcSize / res.result.cSize : 0;
double const cSpeedMBps = (double)res.result.cSpeed / MB_UNIT;
double const dSpeedMBps = (double)res.result.dSpeed / MB_UNIT;
fprintf(f, " }, /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
ratio, cSpeedMBps, dSpeedMBps);
}
}
/* Writes to f the results of a parameter benchmark */
@ -1427,8 +1434,8 @@ BMK_benchMemInvertible( buffers_t buf, contexts_t ctx,
/* init args */
int compressionCompleted = (mode == BMK_decodeOnly);
int decompressionCompleted = (mode == BMK_compressOnly);
BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(nbSeconds);
BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(nbSeconds);
BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(nbSeconds * 1000, 1000);
BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(nbSeconds * 1000, 1000);
BMK_initCCtxArgs cctxprep;
BMK_initDCtxArgs dctxprep;
cctxprep.cctx = cctx;
@ -1440,6 +1447,8 @@ BMK_benchMemInvertible( buffers_t buf, contexts_t ctx,
dctxprep.dictBuffer = dictBuffer;
dctxprep.dictBufferSize = dictBufferSize;
assert(timeStateCompress != NULL);
assert(timeStateDecompress != NULL);
while(!compressionCompleted) {
BMK_runOutcome_t const cOutcome = BMK_benchTimedFn(timeStateCompress,
&local_defaultCompress, cctx,
@ -1540,12 +1549,13 @@ static int allBench(BMK_benchResult_t* resultPtr,
const constraint_t target,
BMK_benchResult_t* winnerResult, int feas)
{
BMK_benchResult_t resultMax, benchres;
BMK_benchResult_t benchres;
U64 loopDurationC = 0, loopDurationD = 0;
double uncertaintyConstantC = 3., uncertaintyConstantD = 3.;
double winnerRS;
/* initial benchmarking, gives exact ratio and memory, warms up future runs */
CBENCHMARK(1, benchres, tmp, BMK_both, 1);
CBENCHMARK(1, benchres, tmp, BMK_both, 2);
winnerRS = resultScore(*winnerResult, buf.srcSize, target);
DEBUGOUTPUT("WinnerScore: %f\n ", winnerRS);
@ -1554,12 +1564,12 @@ static int allBench(BMK_benchResult_t* resultPtr,
/* calculate uncertainty in compression / decompression runs */
if(benchres.cSpeed) {
loopDurationC = ((buf.srcSize * TIMELOOP_NANOSEC) / benchres.cSpeed);
loopDurationC = (((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.cSpeed);
uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC);
}
if(benchres.dSpeed) {
loopDurationD = ((buf.srcSize * TIMELOOP_NANOSEC) / benchres.dSpeed);
loopDurationD = (((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.dSpeed);
uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD);
}
@ -1568,27 +1578,25 @@ static int allBench(BMK_benchResult_t* resultPtr,
return WORSE_RESULT;
}
/* second run, if first run is too short, gives approximate cSpeed + dSpeed */
CBENCHMARK(loopDurationC < TIMELOOP_NANOSEC / 10, benchres, tmp, BMK_compressOnly, 1);
CBENCHMARK(loopDurationD < TIMELOOP_NANOSEC / 10, benchres, tmp, BMK_decodeOnly, 1);
/* ensure all measurements last a minimum time, to reduce measurement errors */
assert(loopDurationC >= TIMELOOP_NANOSEC / 10);
assert(loopDurationD >= TIMELOOP_NANOSEC / 10);
*resultPtr = benchres;
/* optimistic assumption of benchres */
resultMax = benchres;
resultMax.cSpeed *= uncertaintyConstantC * VARIANCE;
resultMax.dSpeed *= uncertaintyConstantD * VARIANCE;
{ BMK_benchResult_t resultMax = benchres;
resultMax.cSpeed *= uncertaintyConstantC * VARIANCE;
resultMax.dSpeed *= uncertaintyConstantD * VARIANCE;
/* disregard infeasible results in feas mode */
/* disregard if resultMax < winner in infeas mode */
if((feas && !feasible(resultMax, target)) ||
(!feas && (winnerRS > resultScore(resultMax, buf.srcSize, target)))) {
return WORSE_RESULT;
/* disregard infeasible results in feas mode */
/* disregard if resultMax < winner in infeas mode */
if((feas && !feasible(resultMax, target)) ||
(!feas && (winnerRS > resultScore(resultMax, buf.srcSize, target)))) {
return WORSE_RESULT;
}
}
CBENCHMARK(loopDurationC < TIMELOOP_NANOSEC, benchres, tmp, BMK_compressOnly, 1);
CBENCHMARK(loopDurationD < TIMELOOP_NANOSEC, benchres, tmp, BMK_decodeOnly, 1);
*resultPtr = benchres;
/* compare by resultScore when in infeas */
@ -1601,6 +1609,7 @@ static int allBench(BMK_benchResult_t* resultPtr,
}
}
#define INFEASIBLE_THRESHOLD 200
/* Memoized benchmarking, won't benchmark anything which has already been benchmarked before. */
static int benchMemo(BMK_benchResult_t* resultPtr,
@ -1628,6 +1637,7 @@ static int benchMemo(BMK_benchResult_t* resultPtr,
return res;
}
typedef struct {
U64 cSpeed_min;
U64 dSpeed_min;