improve benchmark measurement for small inputs
by invoking time() once per batch, instead of once per compression / decompression. Batch is dynamically resized so that each round lasts approximately 1 second. Also : increases time accuracy to nanosecond
This commit is contained in:
parent
70163bf0d3
commit
d3364aa39e
@ -22,7 +22,7 @@
|
|||||||
* Compiler Warnings
|
* Compiler Warnings
|
||||||
****************************************/
|
****************************************/
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
|
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@ -34,6 +34,7 @@
|
|||||||
#include <stdlib.h> /* malloc, free */
|
#include <stdlib.h> /* malloc, free */
|
||||||
#include <string.h> /* memset */
|
#include <string.h> /* memset */
|
||||||
#include <stdio.h> /* fprintf, fopen */
|
#include <stdio.h> /* fprintf, fopen */
|
||||||
|
#include <assert.h> /* assert */
|
||||||
|
|
||||||
#include "mem.h"
|
#include "mem.h"
|
||||||
#define ZSTD_STATIC_LINKING_ONLY
|
#define ZSTD_STATIC_LINKING_ONLY
|
||||||
@ -51,8 +52,9 @@
|
|||||||
# define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT)
|
# define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define TIMELOOP_MICROSEC 1*1000000ULL /* 1 second */
|
#define TIMELOOP_MICROSEC (1*1000000ULL) /* 1 second */
|
||||||
#define ACTIVEPERIOD_MICROSEC 70*1000000ULL /* 70 seconds */
|
#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
|
||||||
|
#define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */
|
||||||
#define COOLPERIOD_SEC 10
|
#define COOLPERIOD_SEC 10
|
||||||
|
|
||||||
#define KB *(1 <<10)
|
#define KB *(1 <<10)
|
||||||
@ -264,7 +266,9 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
|||||||
{ U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL);
|
{ U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL);
|
||||||
U64 const crcOrig = g_decodeOnly ? 0 : XXH64(srcBuffer, srcSize, 0);
|
U64 const crcOrig = g_decodeOnly ? 0 : XXH64(srcBuffer, srcSize, 0);
|
||||||
UTIL_time_t coolTime;
|
UTIL_time_t coolTime;
|
||||||
U64 const maxTime = (g_nbSeconds * TIMELOOP_MICROSEC) + 1;
|
U64 const maxTime = (g_nbSeconds * TIMELOOP_NANOSEC) + 1;
|
||||||
|
U32 nbDecodeLoops = (U32)((100 MB) / (srcSize+1)) + 1; /* initial conservative speed estimate */
|
||||||
|
U32 nbCompressionLoops = (U32)((2 MB) / (srcSize+1)) + 1; /* initial conservative speed estimate */
|
||||||
U64 totalCTime=0, totalDTime=0;
|
U64 totalCTime=0, totalDTime=0;
|
||||||
U32 cCompleted=g_decodeOnly, dCompleted=0;
|
U32 cCompleted=g_decodeOnly, dCompleted=0;
|
||||||
# define NB_MARKS 4
|
# define NB_MARKS 4
|
||||||
@ -283,18 +287,16 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!g_decodeOnly) {
|
if (!g_decodeOnly) {
|
||||||
UTIL_time_t clockStart;
|
|
||||||
/* Compression */
|
/* Compression */
|
||||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize);
|
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize);
|
||||||
if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize); /* warm up and erase result buffer */
|
if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize); /* warm up and erase result buffer */
|
||||||
|
|
||||||
UTIL_sleepMilli(1); /* give processor time to other processes */
|
UTIL_sleepMilli(5); /* give processor time to other processes */
|
||||||
UTIL_waitForNextTick();
|
UTIL_waitForNextTick();
|
||||||
clockStart = UTIL_getTime();
|
|
||||||
|
|
||||||
if (!cCompleted) { /* still some time to do compression tests */
|
if (!cCompleted) { /* still some time to do compression tests */
|
||||||
U64 const clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
|
|
||||||
U32 nbLoops = 0;
|
U32 nbLoops = 0;
|
||||||
|
UTIL_time_t const clockStart = UTIL_getTime();
|
||||||
ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, g_nbWorkers);
|
ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, g_nbWorkers);
|
||||||
ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel);
|
ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel);
|
||||||
ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag);
|
ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag);
|
||||||
@ -314,7 +316,9 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
|||||||
ZSTD_CCtx_setParameter(ctx, ZSTD_p_targetLength, comprParams->targetLength);
|
ZSTD_CCtx_setParameter(ctx, ZSTD_p_targetLength, comprParams->targetLength);
|
||||||
ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionStrategy, comprParams->strategy);
|
ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionStrategy, comprParams->strategy);
|
||||||
ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize);
|
ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize);
|
||||||
do {
|
|
||||||
|
if (!g_nbSeconds) nbCompressionLoops=1;
|
||||||
|
for (nbLoops=0; nbLoops<nbCompressionLoops; nbLoops++) {
|
||||||
U32 blockNb;
|
U32 blockNb;
|
||||||
for (blockNb=0; blockNb<nbBlocks; blockNb++) {
|
for (blockNb=0; blockNb<nbBlocks; blockNb++) {
|
||||||
#if 0 /* direct compression function, for occasional comparison */
|
#if 0 /* direct compression function, for occasional comparison */
|
||||||
@ -343,12 +347,16 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
|||||||
}
|
}
|
||||||
blockTable[blockNb].cSize = out.pos;
|
blockTable[blockNb].cSize = out.pos;
|
||||||
#endif
|
#endif
|
||||||
|
} }
|
||||||
|
{ U64 const loopDuration = UTIL_clockSpanNano(clockStart);
|
||||||
|
if (loopDuration > 0) {
|
||||||
|
if (loopDuration < fastestC * nbCompressionLoops)
|
||||||
|
fastestC = loopDuration / nbCompressionLoops;
|
||||||
|
nbCompressionLoops = (1000000000 / fastestC) + 1;
|
||||||
|
} else {
|
||||||
|
assert(nbCompressionLoops < 40000000); /* avoid overflow */
|
||||||
|
nbCompressionLoops *= 100;
|
||||||
}
|
}
|
||||||
nbLoops++;
|
|
||||||
} while (UTIL_clockSpanMicro(clockStart) < clockLoop);
|
|
||||||
{ U64 const loopDuration = UTIL_clockSpanMicro(clockStart);
|
|
||||||
if (loopDuration < fastestC*nbLoops)
|
|
||||||
fastestC = loopDuration / nbLoops;
|
|
||||||
totalCTime += loopDuration;
|
totalCTime += loopDuration;
|
||||||
cCompleted = (totalCTime >= maxTime); /* end compression tests */
|
cCompleted = (totalCTime >= maxTime); /* end compression tests */
|
||||||
} }
|
} }
|
||||||
@ -358,7 +366,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
|||||||
ratio = (double)srcSize / (double)cSize;
|
ratio = (double)srcSize / (double)cSize;
|
||||||
markNb = (markNb+1) % NB_MARKS;
|
markNb = (markNb+1) % NB_MARKS;
|
||||||
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
||||||
double const compressionSpeed = (double)srcSize / fastestC;
|
double const compressionSpeed = ((double)srcSize / fastestC) * 1000;
|
||||||
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
|
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
|
||||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
|
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
|
||||||
marks[markNb], displayName, (U32)srcSize, (U32)cSize,
|
marks[markNb], displayName, (U32)srcSize, (U32)cSize,
|
||||||
@ -376,16 +384,16 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
|||||||
/* Decompression */
|
/* Decompression */
|
||||||
if (!dCompleted) memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */
|
if (!dCompleted) memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */
|
||||||
|
|
||||||
UTIL_sleepMilli(1); /* give processor time to other processes */
|
UTIL_sleepMilli(5); /* give processor time to other processes */
|
||||||
UTIL_waitForNextTick();
|
UTIL_waitForNextTick();
|
||||||
|
|
||||||
if (!dCompleted) {
|
if (!dCompleted) {
|
||||||
U64 clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
|
|
||||||
U32 nbLoops = 0;
|
U32 nbLoops = 0;
|
||||||
ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictBufferSize);
|
ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictBufferSize);
|
||||||
UTIL_time_t const clockStart = UTIL_getTime();
|
UTIL_time_t const clockStart = UTIL_getTime();
|
||||||
if (!ddict) EXM_THROW(2, "ZSTD_createDDict() allocation failure");
|
if (!ddict) EXM_THROW(2, "ZSTD_createDDict() allocation failure");
|
||||||
do {
|
if (!g_nbSeconds) nbDecodeLoops = 1;
|
||||||
|
for (nbLoops=0; nbLoops < nbDecodeLoops; nbLoops++) {
|
||||||
U32 blockNb;
|
U32 blockNb;
|
||||||
for (blockNb=0; blockNb<nbBlocks; blockNb++) {
|
for (blockNb=0; blockNb<nbBlocks; blockNb++) {
|
||||||
size_t const regenSize = ZSTD_decompress_usingDDict(dctx,
|
size_t const regenSize = ZSTD_decompress_usingDDict(dctx,
|
||||||
@ -397,22 +405,26 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
|||||||
blockNb, (U32)blockTable[blockNb].cSize, ZSTD_getErrorName(regenSize));
|
blockNb, (U32)blockTable[blockNb].cSize, ZSTD_getErrorName(regenSize));
|
||||||
}
|
}
|
||||||
blockTable[blockNb].resSize = regenSize;
|
blockTable[blockNb].resSize = regenSize;
|
||||||
}
|
} }
|
||||||
nbLoops++;
|
|
||||||
} while (UTIL_clockSpanMicro(clockStart) < clockLoop);
|
|
||||||
ZSTD_freeDDict(ddict);
|
ZSTD_freeDDict(ddict);
|
||||||
{ U64 const loopDuration = UTIL_clockSpanMicro(clockStart);
|
{ U64 const loopDuration = UTIL_clockSpanNano(clockStart);
|
||||||
if (loopDuration < fastestD*nbLoops)
|
if (loopDuration > 0) {
|
||||||
fastestD = loopDuration / nbLoops;
|
if (loopDuration < fastestD * nbDecodeLoops)
|
||||||
|
fastestD = loopDuration / nbDecodeLoops;
|
||||||
|
nbDecodeLoops = (1000000000/*1sec*/ / fastestD) + 1;
|
||||||
|
} else {
|
||||||
|
assert(nbDecodeLoops < 40000000); /* avoid overflow */
|
||||||
|
nbDecodeLoops *= 100;
|
||||||
|
}
|
||||||
totalDTime += loopDuration;
|
totalDTime += loopDuration;
|
||||||
dCompleted = (totalDTime >= maxTime);
|
dCompleted = (totalDTime >= maxTime);
|
||||||
} }
|
} }
|
||||||
|
|
||||||
markNb = (markNb+1) % NB_MARKS;
|
markNb = (markNb+1) % NB_MARKS;
|
||||||
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
||||||
double const compressionSpeed = (double)srcSize / fastestC;
|
double const compressionSpeed = ((double)srcSize / fastestC) * 1000;
|
||||||
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
|
int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
|
||||||
double const decompressionSpeed = (double)srcSize / fastestD;
|
double const decompressionSpeed = ((double)srcSize / fastestD) * 1000;
|
||||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
|
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
|
||||||
marks[markNb], displayName, (U32)srcSize, (U32)cSize,
|
marks[markNb], displayName, (U32)srcSize, (U32)cSize,
|
||||||
ratioAccuracy, ratio,
|
ratioAccuracy, ratio,
|
||||||
@ -461,8 +473,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
|
|||||||
} /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */
|
} /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */
|
||||||
|
|
||||||
if (g_displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */
|
if (g_displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */
|
||||||
double cSpeed = (double)srcSize / fastestC;
|
double cSpeed = ((double)srcSize / fastestC) * 1000;
|
||||||
double dSpeed = (double)srcSize / fastestD;
|
double dSpeed = ((double)srcSize / fastestD) * 1000;
|
||||||
if (g_additionalParam)
|
if (g_additionalParam)
|
||||||
DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam);
|
DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam);
|
||||||
else
|
else
|
||||||
@ -634,7 +646,8 @@ static void BMK_benchFileTable(const char* const * const fileNamesTable, unsigne
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility, const ZSTD_compressionParameters* compressionParams)
|
static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility,
|
||||||
|
const ZSTD_compressionParameters* compressionParams)
|
||||||
{
|
{
|
||||||
char name[20] = {0};
|
char name[20] = {0};
|
||||||
size_t benchedSize = 10000000;
|
size_t benchedSize = 10000000;
|
||||||
|
@ -142,7 +142,9 @@ static int g_utilDisplayLevel;
|
|||||||
}
|
}
|
||||||
return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
|
return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined(__APPLE__) && defined(__MACH__)
|
#elif defined(__APPLE__) && defined(__MACH__)
|
||||||
|
|
||||||
#include <mach/mach_time.h>
|
#include <mach/mach_time.h>
|
||||||
#define UTIL_TIME_INITIALIZER 0
|
#define UTIL_TIME_INITIALIZER 0
|
||||||
typedef U64 UTIL_time_t;
|
typedef U64 UTIL_time_t;
|
||||||
@ -167,7 +169,9 @@ static int g_utilDisplayLevel;
|
|||||||
}
|
}
|
||||||
return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom);
|
return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom);
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif (PLATFORM_POSIX_VERSION >= 200112L) && (defined __UCLIBC__ || ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) || __GLIBC__ > 2))
|
#elif (PLATFORM_POSIX_VERSION >= 200112L) && (defined __UCLIBC__ || ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) || __GLIBC__ > 2))
|
||||||
|
|
||||||
#define UTIL_TIME_INITIALIZER { 0, 0 }
|
#define UTIL_TIME_INITIALIZER { 0, 0 }
|
||||||
typedef struct timespec UTIL_freq_t;
|
typedef struct timespec UTIL_freq_t;
|
||||||
typedef struct timespec UTIL_time_t;
|
typedef struct timespec UTIL_time_t;
|
||||||
@ -217,12 +221,18 @@ static int g_utilDisplayLevel;
|
|||||||
#define SEC_TO_MICRO 1000000
|
#define SEC_TO_MICRO 1000000
|
||||||
|
|
||||||
/* returns time span in microseconds */
|
/* returns time span in microseconds */
|
||||||
UTIL_STATIC U64 UTIL_clockSpanMicro( UTIL_time_t clockStart )
|
UTIL_STATIC U64 UTIL_clockSpanMicro(UTIL_time_t clockStart )
|
||||||
{
|
{
|
||||||
UTIL_time_t const clockEnd = UTIL_getTime();
|
UTIL_time_t const clockEnd = UTIL_getTime();
|
||||||
return UTIL_getSpanTimeMicro(clockStart, clockEnd);
|
return UTIL_getSpanTimeMicro(clockStart, clockEnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* returns time span in microseconds */
|
||||||
|
UTIL_STATIC U64 UTIL_clockSpanNano(UTIL_time_t clockStart )
|
||||||
|
{
|
||||||
|
UTIL_time_t const clockEnd = UTIL_getTime();
|
||||||
|
return UTIL_getSpanTimeNano(clockStart, clockEnd);
|
||||||
|
}
|
||||||
|
|
||||||
UTIL_STATIC void UTIL_waitForNextTick(void)
|
UTIL_STATIC void UTIL_waitForNextTick(void)
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user