benchFunction Timed Wrappers
Add BMK_benchFunctionTimed Add BMK_init_customResultCont.. Change benchMem to use benchFunctionTimed Minor Fixes/Adjustments
This commit is contained in:
parent
a8eea99ebe
commit
ab26f24c9c
157
programs/bench.c
157
programs/bench.c
@ -335,6 +335,70 @@ BMK_customReturn_t BMK_benchFunction(
|
|||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BMK_customResultContinuation_t BMK_init_customResultContinuation(unsigned iter) {
|
||||||
|
BMK_customResultContinuation_t c;
|
||||||
|
c.completed = 0;
|
||||||
|
c.state.nbLoops = 1;
|
||||||
|
c.state.coolTime = UTIL_getTime();
|
||||||
|
c.state.timeRemaining = (U64)iter * TIMELOOP_NANOSEC;
|
||||||
|
c.intermediateResult.error = 0;
|
||||||
|
c.intermediateResult.result.nanoSecPerRun = (U64)(-1LL);
|
||||||
|
c.intermediateResult.result.sumOfReturn = 0;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MINUSABLETIME 500000000ULL
|
||||||
|
|
||||||
|
//how to use minusabletime?
|
||||||
|
//only report times which are > minUsable
|
||||||
|
void BMK_benchFunctionTimed(
|
||||||
|
size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* benchPayload,
|
||||||
|
size_t (*initFn)(void*), void* initPayload,
|
||||||
|
size_t blockCount,
|
||||||
|
const void* const * const srcBlockBuffers, const size_t* srcBlockSizes,
|
||||||
|
void* const * const dstBlockBuffers, const size_t* dstBlockCapacities,
|
||||||
|
BMK_customResultContinuation_t* cont)
|
||||||
|
{
|
||||||
|
U64 fastest = cont->intermediateResult.result.nanoSecPerRun;
|
||||||
|
int completed = 0;
|
||||||
|
|
||||||
|
while(!cont->completed && !completed)
|
||||||
|
{
|
||||||
|
/* Overheat protection */
|
||||||
|
if (UTIL_clockSpanMicro(cont->state.coolTime) > ACTIVEPERIOD_MICROSEC) {
|
||||||
|
DEBUGOUTPUT("\rcooling down ... \r");
|
||||||
|
UTIL_sleep(COOLPERIOD_SEC);
|
||||||
|
cont->state.coolTime = UTIL_getTime();
|
||||||
|
}
|
||||||
|
|
||||||
|
cont->intermediateResult = BMK_benchFunction(benchFn, benchPayload, initFn, initPayload,
|
||||||
|
blockCount, srcBlockBuffers, srcBlockSizes, dstBlockBuffers, dstBlockCapacities, cont->state.nbLoops);
|
||||||
|
if(cont->intermediateResult.error) { /* completed w/ error */
|
||||||
|
cont->completed = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
{ U64 const loopDuration = cont->intermediateResult.result.nanoSecPerRun * cont->state.nbLoops;
|
||||||
|
cont->completed = (cont->state.timeRemaining <= loopDuration);
|
||||||
|
cont->state.timeRemaining -= loopDuration;
|
||||||
|
if (loopDuration > 0) {
|
||||||
|
fastest = MIN(fastest, cont->intermediateResult.result.nanoSecPerRun);
|
||||||
|
cont->intermediateResult.result.nanoSecPerRun = fastest;
|
||||||
|
cont->state.nbLoops = (U32)(TIMELOOP_NANOSEC / fastest) + 1;
|
||||||
|
} else {
|
||||||
|
const unsigned multiplier = 2;
|
||||||
|
assert(cont->state.nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */
|
||||||
|
cont->state.nbLoops *= multiplier;
|
||||||
|
}
|
||||||
|
if(loopDuration < MINUSABLETIME) { /* don't report results which have time too low */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
completed = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* benchMem with no allocation */
|
/* benchMem with no allocation */
|
||||||
static BMK_return_t BMK_benchMemAdvancedNoAlloc(
|
static BMK_return_t BMK_benchMemAdvancedNoAlloc(
|
||||||
const void ** const srcPtrs, size_t* const srcSizes,
|
const void ** const srcPtrs, size_t* const srcSizes,
|
||||||
@ -350,7 +414,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
|
|||||||
ZSTD_CCtx* ctx, ZSTD_DCtx* dctx,
|
ZSTD_CCtx* ctx, ZSTD_DCtx* dctx,
|
||||||
int displayLevel, const char* displayName, const BMK_advancedParams_t* adv)
|
int displayLevel, const char* displayName, const BMK_advancedParams_t* adv)
|
||||||
{
|
{
|
||||||
size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
|
size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize); /* avoid div by 0 */
|
||||||
BMK_return_t results;
|
BMK_return_t results;
|
||||||
size_t const loadedCompressedSize = srcSize;
|
size_t const loadedCompressedSize = srcSize;
|
||||||
size_t cSize = 0;
|
size_t cSize = 0;
|
||||||
@ -428,12 +492,9 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
|
|||||||
U32 markNb = 0;
|
U32 markNb = 0;
|
||||||
DISPLAYLEVEL(2, "\r%79s\r", "");
|
DISPLAYLEVEL(2, "\r%79s\r", "");
|
||||||
|
|
||||||
|
|
||||||
if (adv->mode != BMK_decodeOnly) {
|
if (adv->mode != BMK_decodeOnly) {
|
||||||
BMK_initCCtxArgs cctxprep;
|
BMK_initCCtxArgs cctxprep;
|
||||||
BMK_customReturn_t compressionResults;
|
|
||||||
int completed = 0;
|
|
||||||
U64 totalLoops = 0, totalTime = 0, fastest = (U64)(-1LL);
|
|
||||||
UTIL_time_t coolTime = UTIL_getTime();
|
|
||||||
cctxprep.ctx = ctx;
|
cctxprep.ctx = ctx;
|
||||||
cctxprep.dictBuffer = dictBuffer;
|
cctxprep.dictBuffer = dictBuffer;
|
||||||
cctxprep.dictBufferSize = dictBufferSize;
|
cctxprep.dictBufferSize = dictBufferSize;
|
||||||
@ -443,40 +504,21 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
|
|||||||
/* Compression */
|
/* Compression */
|
||||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize);
|
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize);
|
||||||
if(adv->loopMode == BMK_timeMode) {
|
if(adv->loopMode == BMK_timeMode) {
|
||||||
U64 maxTime = adv->nbSeconds * TIMELOOP_NANOSEC;
|
BMK_customResultContinuation_t cont = BMK_init_customResultContinuation(adv->nbSeconds);
|
||||||
unsigned nbLoops = 1;
|
while(!cont.completed) {
|
||||||
while(!completed) {
|
BMK_benchFunctionTimed(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep,
|
||||||
/* Overheat protection */
|
nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, &cont);
|
||||||
if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) {
|
if(cont.intermediateResult.error) {
|
||||||
DEBUGOUTPUT("\rcooling down ... \r");
|
results.error = cont.intermediateResult.error;
|
||||||
UTIL_sleep(COOLPERIOD_SEC);
|
|
||||||
coolTime = UTIL_getTime();
|
|
||||||
}
|
|
||||||
|
|
||||||
compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep,
|
|
||||||
nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, nbLoops);
|
|
||||||
if(compressionResults.error) {
|
|
||||||
results.error = compressionResults.error;
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
ratio = (double)(srcSize / cont.intermediateResult.result.sumOfReturn);
|
||||||
{ U64 loopDuration = compressionResults.result.nanoSecPerRun * nbLoops;
|
|
||||||
totalLoops += nbLoops;
|
|
||||||
totalTime += loopDuration;
|
|
||||||
if (loopDuration > 0) { // nanoSec / run
|
|
||||||
fastest = MIN(fastest, compressionResults.result.nanoSecPerRun);
|
|
||||||
nbLoops = (U32)(TIMELOOP_NANOSEC / fastest) + 1;
|
|
||||||
} else {
|
|
||||||
assert(nbLoops < 40000000); /* avoid overflow */
|
|
||||||
nbLoops *= 2;
|
|
||||||
}
|
|
||||||
completed = (totalTime >= maxTime);
|
|
||||||
{
|
{
|
||||||
int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
||||||
double const compressionSpeed = (((double)srcSize * totalLoops) / totalTime) * 1000;
|
double const compressionSpeed = ((double)srcSize / cont.intermediateResult.result.nanoSecPerRun) * 1000;
|
||||||
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
|
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
|
||||||
results.result.cSpeed = compressionSpeed * 1000000;
|
results.result.cSpeed = compressionSpeed * 1000000;
|
||||||
results.result.cSize = compressionResults.result.sumOfReturn;
|
results.result.cSize = cont.intermediateResult.result.sumOfReturn;
|
||||||
ratio = (double)srcSize / results.result.cSize;
|
ratio = (double)srcSize / results.result.cSize;
|
||||||
markNb = (markNb+1) % NB_MARKS;
|
markNb = (markNb+1) % NB_MARKS;
|
||||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
|
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
|
||||||
@ -485,9 +527,8 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
|
|||||||
cSpeedAccuracy, compressionSpeed);
|
cSpeedAccuracy, compressionSpeed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep,
|
BMK_customReturn_t compressionResults = BMK_benchFunction(&local_defaultCompress, (void*)ctx, &local_initCCtx, (void*)&cctxprep,
|
||||||
nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, adv->nbSeconds);
|
nbBlocks, srcPtrs, srcSizes, cPtrs, cSizes, adv->nbSeconds);
|
||||||
if(compressionResults.error) {
|
if(compressionResults.error) {
|
||||||
results.error = compressionResults.error;
|
results.error = compressionResults.error;
|
||||||
@ -517,49 +558,23 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
|
|||||||
if(adv->mode != BMK_compressOnly) {
|
if(adv->mode != BMK_compressOnly) {
|
||||||
BMK_initDCtxArgs dctxprep;
|
BMK_initDCtxArgs dctxprep;
|
||||||
BMK_customReturn_t decompressionResults;
|
BMK_customReturn_t decompressionResults;
|
||||||
U64 totalLoops = 0, totalTime = 0, fastest = (U64)(-1LL);
|
|
||||||
int completed = 0;
|
|
||||||
UTIL_time_t coolTime = UTIL_getTime();
|
|
||||||
dctxprep.dctx = dctx;
|
dctxprep.dctx = dctx;
|
||||||
dctxprep.dictBuffer = dictBuffer;
|
dctxprep.dictBuffer = dictBuffer;
|
||||||
dctxprep.dictBufferSize = dictBufferSize;
|
dctxprep.dictBufferSize = dictBufferSize;
|
||||||
if(adv->loopMode == BMK_timeMode) {
|
if(adv->loopMode == BMK_timeMode) {
|
||||||
U64 maxTime = adv->nbSeconds * TIMELOOP_NANOSEC;
|
BMK_customResultContinuation_t cont = BMK_init_customResultContinuation(adv->nbSeconds);
|
||||||
unsigned nbLoops = 1;
|
while(!cont.completed) {
|
||||||
while(!completed) {
|
BMK_benchFunctionTimed(&local_defaultDecompress, (void*)(dctx), &local_initDCtx, (void*)&dctxprep,
|
||||||
/* Overheat protection */
|
nbBlocks, (const void * const *)cPtrs, cSizes, resPtrs, resSizes, &cont);
|
||||||
if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) {
|
if(cont.intermediateResult.error) {
|
||||||
DEBUGOUTPUT("\rcooling down ... \r");
|
results.error = cont.intermediateResult.error;
|
||||||
UTIL_sleep(COOLPERIOD_SEC);
|
|
||||||
coolTime = UTIL_getTime();
|
|
||||||
}
|
|
||||||
|
|
||||||
decompressionResults = BMK_benchFunction(
|
|
||||||
&local_defaultDecompress, (void*)(dctx),
|
|
||||||
&local_initDCtx, (void*)&dctxprep, nbBlocks,
|
|
||||||
(const void * const *)cPtrs, cSizes, resPtrs, resSizes,
|
|
||||||
nbLoops);
|
|
||||||
|
|
||||||
if(decompressionResults.error) {
|
|
||||||
results.error = decompressionResults.error;
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
{ U64 loopDuration = decompressionResults.result.nanoSecPerRun * nbLoops;
|
|
||||||
totalLoops += nbLoops;
|
|
||||||
totalTime += loopDuration;
|
|
||||||
if (loopDuration > 0) {
|
|
||||||
fastest = MIN(fastest, loopDuration / nbLoops);
|
|
||||||
nbLoops = (U32)(TIMELOOP_NANOSEC / fastest) + 1;
|
|
||||||
} else {
|
|
||||||
assert(nbLoops < 40000000); /* avoid overflow */
|
|
||||||
nbLoops *= 2;
|
|
||||||
}
|
|
||||||
completed = (totalTime >= maxTime);
|
|
||||||
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
||||||
double const compressionSpeed = results.result.cSpeed / 1000000;
|
double const compressionSpeed = results.result.cSpeed / 1000000;
|
||||||
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
|
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
|
||||||
double const decompressionSpeed = ((double)srcSize * totalLoops / totalTime) * 1000;
|
double const decompressionSpeed = ((double)srcSize / cont.intermediateResult.result.nanoSecPerRun) * 1000;
|
||||||
results.result.dSpeed = decompressionSpeed * 1000000;
|
results.result.dSpeed = decompressionSpeed * 1000000;
|
||||||
markNb = (markNb+1) % NB_MARKS;
|
markNb = (markNb+1) % NB_MARKS;
|
||||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
|
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
|
||||||
@ -569,7 +584,6 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
|
|||||||
decompressionSpeed);
|
decompressionSpeed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
decompressionResults = BMK_benchFunction(
|
decompressionResults = BMK_benchFunction(
|
||||||
&local_defaultDecompress, (void*)(dctx),
|
&local_defaultDecompress, (void*)(dctx),
|
||||||
@ -643,11 +657,12 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
|
|||||||
if (displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */
|
if (displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */
|
||||||
double const cSpeed = results.result.cSpeed / 1000000;
|
double const cSpeed = results.result.cSpeed / 1000000;
|
||||||
double const dSpeed = results.result.dSpeed / 1000000;
|
double const dSpeed = results.result.dSpeed / 1000000;
|
||||||
if (adv->additionalParam)
|
if (adv->additionalParam) {
|
||||||
DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam);
|
DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam);
|
||||||
else
|
} else {
|
||||||
DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName);
|
DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
DISPLAYLEVEL(2, "%2i#\n", cLevel);
|
DISPLAYLEVEL(2, "%2i#\n", cLevel);
|
||||||
} /* Bench */
|
} /* Bench */
|
||||||
return results;
|
return results;
|
||||||
|
@ -161,7 +161,7 @@ BMK_return_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize,
|
|||||||
/* This function times the execution of 2 argument functions, benchFn and initFn */
|
/* This function times the execution of 2 argument functions, benchFn and initFn */
|
||||||
|
|
||||||
/* benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload)
|
/* benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload)
|
||||||
* is run a variable number of times, specified by mode and iter args
|
* is run iter times
|
||||||
* initFn - (*initFn)(initPayload) is run once per benchmark at the beginning. This argument can
|
* initFn - (*initFn)(initPayload) is run once per benchmark at the beginning. This argument can
|
||||||
* be NULL, in which case nothing is run.
|
* be NULL, in which case nothing is run.
|
||||||
* blockCount - number of blocks (size of srcBuffers, srcSizes, dstBuffers, dstCapacities)
|
* blockCount - number of blocks (size of srcBuffers, srcSizes, dstBuffers, dstCapacities)
|
||||||
@ -188,6 +188,38 @@ BMK_customReturn_t BMK_benchFunction(
|
|||||||
void* const * const dstBuffers, const size_t* dstCapacities,
|
void* const * const dstBuffers, const size_t* dstCapacities,
|
||||||
unsigned sec);
|
unsigned sec);
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
unsigned nbLoops;
|
||||||
|
U64 timeRemaining;
|
||||||
|
UTIL_time_t coolTime;
|
||||||
|
} BMK_timeState_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int completed;
|
||||||
|
BMK_customReturn_t intermediateResult; /* since the wrapper can't err, don't need ERROR_STRUCT(cRC, just check here) */
|
||||||
|
BMK_timeState_t state;
|
||||||
|
} BMK_customResultContinuation_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* initializes the last argument of benchFunctionTimed, with iter being the number of seconds to bench (see below)
|
||||||
|
*/
|
||||||
|
BMK_customResultContinuation_t BMK_init_customResultContinuation(unsigned iter);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Benchmarks custom functions like BMK_benchFunction(), but runs for iter seconds rather than a fixed number of iterations
|
||||||
|
* arguments mostly the same other than BMK_benchFunction()
|
||||||
|
* Usage - benchFunctionTimed will return in approximately one second, where the intermediate results can be found in
|
||||||
|
* the *cont passed in and be displayed/used as wanted. Keep calling BMK_benchFunctionTimed() until cont->completed = 1
|
||||||
|
* to continue updating intermediate result.
|
||||||
|
*/
|
||||||
|
void BMK_benchFunctionTimed(
|
||||||
|
size_t (*benchFn)(const void*, size_t, void*, size_t, void*), void* benchPayload,
|
||||||
|
size_t (*initFn)(void*), void* initPayload,
|
||||||
|
size_t blockCount,
|
||||||
|
const void* const * const srcBlockBuffers, const size_t* srcBlockSizes,
|
||||||
|
void* const * const dstBlockBuffers, const size_t* dstBlockCapacities,
|
||||||
|
BMK_customResultContinuation_t* cont);
|
||||||
|
|
||||||
#endif /* BENCH_H_121279284357 */
|
#endif /* BENCH_H_121279284357 */
|
||||||
|
|
||||||
#if defined (__cplusplus)
|
#if defined (__cplusplus)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user