From d993a288e067ea55d406f8fc547d55b9ad6e725b Mon Sep 17 00:00:00 2001 From: Han Zhu Date: Wed, 20 Jul 2022 11:14:51 -0700 Subject: [PATCH] [largeNbDicts] Add an option to print out median speed Summary: Added an option -p# where -p0 (default) sets the aggregation method to fastest speed while -p1 sets the aggregation method to median. Also added a new column in the csv file to report this option's value. Test Plan: `` $ ./largeNbDicts -1 --nbDicts=1 -D ~/benchmarks/html/html_8_16K.32K.dict ~/benchmarks/html/html_8_16K/* loading 7450 files... created src buffer of size 83.4 MB split input into 7450 blocks loading dictionary /home/zhuhan/benchmarks/html/html_8_16K.32K.dict compressing at level 1 without dictionary : Ratio=3.03 (28827863 bytes) compressed using a 32768 bytes dictionary : Ratio=4.28 (20410262 bytes) generating 1 dictionaries, using 0.1 MB of memory Compression Speed : 306.0 MB/s Fastest Speed : 310.6 MB/s $ ./largeNbDicts -1 --nbDicts=1 -p1 -D ~/benchmarks/html/html_8_16K.32K.dict ~/benchmarks/html/html_8_16K/* loading 7450 files... created src buffer of size 83.4 MB split input into 7450 blocks loading dictionary /home/zhuhan/benchmarks/html/html_8_16K.32K.dict compressing at level 1 without dictionary : Ratio=3.03 (28827863 bytes) compressed using a 32768 bytes dictionary : Ratio=4.28 (20410262 bytes) generating 1 dictionaries, using 0.1 MB of memory Compression Speed : 306.9 MB/s Median Speed : 298.4 MB/s ``` --- contrib/largeNbDicts/largeNbDicts.c | 89 ++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 22 deletions(-) diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c index bb1c16d4..72b4b41e 100644 --- a/contrib/largeNbDicts/largeNbDicts.c +++ b/contrib/largeNbDicts/largeNbDicts.c @@ -19,7 +19,7 @@ /*--- Dependencies ---*/ #include /* size_t */ -#include /* malloc, free, abort */ +#include /* malloc, free, abort, qsort*/ #include /* fprintf */ #include /* UINT_MAX */ #include /* assert */ @@ -650,13 +650,40 @@ size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity return result; } +typedef enum { + fastest = 0, + median = 1, +} metricAggregatePref_e; -static int benchMem(slice_collection_t dstBlocks, - slice_collection_t srcBlocks, +/* compareFunction() : + * Sort input in decreasing order when used with qsort() */ +int compareFunction(const void *a, const void *b) +{ + double x = *(const double *)a; + double y = *(const double *)b; + if (x < y) + return 1; + else if (x > y) + return -1; + return 0; +} + +double aggregateData(double *data, size_t size, + metricAggregatePref_e metricAggregatePref) +{ + qsort(data, size, sizeof(*data), compareFunction); + if (metricAggregatePref == fastest) + return data[0]; + else /* median */ + return (data[(size - 1) / 2] + data[size / 2]) / 2; +} + +static int benchMem(slice_collection_t dstBlocks, slice_collection_t srcBlocks, ddict_collection_t ddictionaries, - cdict_collection_t cdictionaries, - unsigned nbRounds, int benchCompression, - const char* exeName, ZSTD_CCtx_params* cctxParams) + cdict_collection_t cdictionaries, unsigned nbRounds, + int benchCompression, const char *exeName, + ZSTD_CCtx_params *cctxParams, + metricAggregatePref_e metricAggregatePref) { assert(dstBlocks.nbSlices == srcBlocks.nbSlices); if (benchCompression) assert(cctxParams); @@ -664,7 +691,7 @@ static int benchMem(slice_collection_t dstBlocks, unsigned const ms_per_round = RUN_TIME_DEFAULT_MS; unsigned const total_time_ms = nbRounds * ms_per_round; - double bestSpeed = 0.; + double *const speedPerRound = (double *)malloc(nbRounds * sizeof(double)); BMK_timedFnState_t* const benchState = BMK_createTimedFnState(total_time_ms, ms_per_round); @@ -688,6 +715,7 @@ static int benchMem(slice_collection_t dstBlocks, .blockResults = NULL }; + size_t roundNb = 0; for (;;) { BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp); CONTROL(BMK_isSuccessful_runOutcome(outcome)); @@ -697,16 +725,24 @@ static int benchMem(slice_collection_t dstBlocks, double const dTime_sec = (double)dTime_ns / 1000000000; size_t const srcSize = result.sumOfReturn; double const speed_MBps = (double)srcSize / dTime_sec / (1 MB); - if (speed_MBps > bestSpeed) bestSpeed = speed_MBps; + speedPerRound[roundNb] = speed_MBps; if (benchCompression) - DISPLAY("Compression Speed : %.1f MB/s \r", bestSpeed); + DISPLAY("Compression Speed : %.1f MB/s \r", speed_MBps); else - DISPLAY("Decompression Speed : %.1f MB/s \r", bestSpeed); + DISPLAY("Decompression Speed : %.1f MB/s \r", speed_MBps); fflush(stdout); if (BMK_isCompleted_TimedFn(benchState)) break; + roundNb++; } DISPLAY("\n"); + /* BMK_benchTimedFn may not run exactly nbRounds iterations */ + double speedAggregated = + aggregateData(speedPerRound, roundNb + 1, metricAggregatePref); + if (metricAggregatePref == fastest) + DISPLAY("Fastest Speed : %.1f MB/s \n", speedAggregated); + else + DISPLAY("Median Speed : %.1f MB/s \n", speedAggregated); char* csvFileName = malloc(strlen(exeName) + 5); strcpy(csvFileName, exeName); @@ -719,7 +755,7 @@ static int benchMem(slice_collection_t dstBlocks, /* Print table headers */ fprintf( csvFile, - "Compression/Decompression,Level,nbDicts,dictAttachPref,Speed\n"); + "Compression/Decompression,Level,nbDicts,dictAttachPref,metricAggregatePref,Speed\n"); } else { fclose(csvFile); csvFile = fopen(csvFileName, "at"); @@ -734,10 +770,10 @@ static int benchMem(slice_collection_t dstBlocks, ZSTD_CCtxParams_getParameter(cctxParams, ZSTD_c_forceAttachDict, &dictAttachPref); } - fprintf(csvFile, "%s,%d,%ld,%d,%.1f\n", + fprintf(csvFile, "%s,%d,%ld,%d,%d,%.1f\n", benchCompression ? "Compression" : "Decompression", cLevel, benchCompression ? ci.nbDicts : di.nbDicts, dictAttachPref, - bestSpeed); + metricAggregatePref, speedAggregated); fclose(csvFile); free(csvFileName); @@ -754,13 +790,11 @@ static int benchMem(slice_collection_t dstBlocks, * dictionary : optional (can be NULL), file to load as dictionary, * if none provided : will be calculated on the fly by the program. * @return : 0 is success, 1+ otherwise */ -int bench(const char** fileNameTable, unsigned nbFiles, - const char* dictionary, - size_t blockSize, int clevel, - unsigned nbDictMax, unsigned nbBlocks, +int bench(const char **fileNameTable, unsigned nbFiles, const char *dictionary, + size_t blockSize, int clevel, unsigned nbDictMax, unsigned nbBlocks, unsigned nbRounds, int benchCompression, - ZSTD_dictContentType_e dictContentType, ZSTD_CCtx_params* cctxParams, - const char* exeName) + ZSTD_dictContentType_e dictContentType, ZSTD_CCtx_params *cctxParams, + const char *exeName, metricAggregatePref_e metricAggregatePref) { int result = 0; @@ -855,7 +889,9 @@ int bench(const char** fileNameTable, unsigned nbFiles, buffer_collection_t resultCollection = createBufferCollection_fromSliceCollection(srcSlices); CONTROL(resultCollection.buffer.ptr != NULL); - result = benchMem(dstSlices, resultCollection.slices, ddictionaries, cdictionaries, nbRounds, benchCompression, exeName, cctxParams); + result = benchMem(dstSlices, resultCollection.slices, ddictionaries, + cdictionaries, nbRounds, benchCompression, exeName, + cctxParams, metricAggregatePref); freeBufferCollection(resultCollection); } else { @@ -869,7 +905,9 @@ int bench(const char** fileNameTable, unsigned nbFiles, buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices); CONTROL(resultCollection.buffer.ptr != NULL); - result = benchMem(resultCollection.slices, dstSlices, ddictionaries, cdictionaries, nbRounds, benchCompression, exeName, NULL); + result = benchMem(resultCollection.slices, dstSlices, ddictionaries, + cdictionaries, nbRounds, benchCompression, exeName, + NULL, metricAggregatePref); freeBufferCollection(resultCollection); } @@ -947,6 +985,7 @@ int usage(const char* exeName) DISPLAY ("-# : use compression level # (default: %u) \n", CLEVEL_DEFAULT); DISPLAY ("-D # : use # as a dictionary (default: create one) \n"); DISPLAY ("-i# : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S); + DISPLAY ("-p# : print speed for all rounds 0=fastest 1=median (default: 0)"); DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n"); DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n"); DISPLAY ("-h : help (this text) \n"); @@ -987,6 +1026,7 @@ int main (int argc, const char** argv) ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto; ZSTD_dictAttachPref_e dictAttachPref = ZSTD_dictDefaultAttach; ZSTD_paramSwitch_e prefetchCDictTables = ZSTD_ps_auto; + metricAggregatePref_e metricAggregatePref = fastest; for (int argNb = 1; argNb < argc ; argNb++) { const char* argument = argv[argNb]; @@ -996,6 +1036,7 @@ int main (int argc, const char** argv) if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; } if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; } if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "-p")) { metricAggregatePref = (int)readU32FromChar(&argument); continue;} if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; } if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; } @@ -1030,7 +1071,11 @@ int main (int argc, const char** argv) ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_forceAttachDict, dictAttachPref); ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_prefetchCDictTables, prefetchCDictTables); - int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dictContentType, cctxParams, exeName); + int result = + bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, + dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, + benchCompression, dictContentType, cctxParams, exeName, + metricAggregatePref); UTIL_freeFileNamesTable(filenameTable); free(nameTable);