From d1fc507ef998f511f6f1da7edc57670bb6b3404f Mon Sep 17 00:00:00 2001 From: Jennifer Liu Date: Wed, 25 Jul 2018 17:05:54 -0700 Subject: [PATCH] Initial benchmarking result for fastCover --- .../benchmarkDictBuilder/Makefile | 10 +++-- .../benchmarkDictBuilder/README.md | 40 ++++++++++-------- .../benchmarkDictBuilder/benchmark.c | 42 +++++++++++++++---- 3 files changed, 62 insertions(+), 30 deletions(-) diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile b/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile index 72ce04f2..68149488 100644 --- a/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile +++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/Makefile @@ -2,9 +2,10 @@ ARG := CC ?= gcc CFLAGS ?= -O3 -INCLUDES := -I ../randomDictBuilder -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder +INCLUDES := -I ../randomDictBuilder -I ../fastCover -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder RANDOM_FILE := ../randomDictBuilder/random.c +FAST_FILE := ../fastCover/fastCover.c IO_FILE := ../randomDictBuilder/io.c all: run clean @@ -21,8 +22,8 @@ test: benchmarkTest clean benchmarkTest: benchmark test.sh sh test.sh -benchmark: benchmark.o io.o random.o libzstd.a - $(CC) $(CFLAGS) benchmark.o io.o random.o libzstd.a -o benchmark +benchmark: benchmark.o io.o random.o fastCover.o libzstd.a + $(CC) $(CFLAGS) benchmark.o io.o random.o fastCover.o libzstd.a -o benchmark benchmark.o: benchmark.c $(CC) $(CFLAGS) $(INCLUDES) -c benchmark.c @@ -30,6 +31,9 @@ benchmark.o: benchmark.c random.o: $(RANDOM_FILE) $(CC) $(CFLAGS) $(INCLUDES) -c $(RANDOM_FILE) +fastCover.o: $(FAST_FILE) + $(CC) $(CFLAGS) $(INCLUDES) -c $(FAST_FILE) + io.o: $(IO_FILE) $(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE) diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md b/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md index de783a0e..e02d592c 100644 --- a/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md +++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/README.md @@ -18,30 +18,34 @@ github: | Algorithm | Speed(sec) | Compression Ratio | | ------------- |:-------------:| ------------------:| | nodict | 0.000004 | 2.999642 | -| random | 0.180238 | 8.786957 | -| cover | 33.891987 | 10.430999 | -| legacy | 1.077569 | 8.989482 | +| random | 0.135459 | 8.786957 | +| cover | 50.341079 | 10.641263 | +| legacy | 0.866283 | 8.989482 | +| fastCover | 13.450947 | 10.215174 | hg-commands | Algorithm | Speed(sec) | Compression Ratio | | ------------- |:-------------:| ------------------:| -| nodict | 0.000006 | 2.425291 | -| random | 0.088735 | 3.489515 | -| cover | 35.447300 | 4.030274 | -| legacy | 1.048509 | 3.911896 | +| nodict | 0.000020 | 2.425291 | +| random | 0.088828 | 3.489515 | +| cover | 60.028672 | 4.131136 | +| legacy | 0.852481 | 3.911896 | +| fastCover | 9.524284 | 3.977229 | + +hg-changelog +| Algorithm | Speed(sec) | Compression Ratio | +| ------------- |:-------------:| ------------------:| +| nodict | 0.000004 | 1.377613 | +| random | 0.621812 | 2.096785 | +| cover | 217.510962 | 2.188654 | +| legacy | 2.559194 | 2.058273 | +| fastCover | 51.132516 | 2.124185 | hg-manifest | Algorithm | Speed(sec) | Compression Ratio | | ------------- |:-------------:| ------------------:| | nodict | 0.000005 | 1.866385 | -| random | 1.148231 | 2.309485 | -| cover | 509.685257 | 2.575331 | -| legacy | 10.705866 | 2.506775 | - -hg-changelog -| Algorithm | Speed(sec) | Compression Ratio | -| ------------- |:-------------:| ------------------:| -| nodict | 0.000005 | 1.377613 | -| random | 0.706434 | 2.096785 | -| cover | 122.815783 | 2.175706 | -| legacy | 3.010318 | 2.058273 | +| random | 1.035220 | 2.309485 | +| cover | 930.480173 | 2.582597 | +| legacy | 8.916513 | 2.506775 | +| fastCover | 116.871089 | 2.525689 | diff --git a/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c b/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c index 64041964..865ecb34 100644 --- a/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c +++ b/contrib/experimental_dict_builders/benchmarkDictBuilder/benchmark.c @@ -5,6 +5,7 @@ #include #include #include "random.h" +#include "fastCover.h" #include "dictBuilder.h" #include "zstd_internal.h" /* includes zstd.h */ #include "io.h" @@ -71,10 +72,11 @@ typedef struct { */ dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize, ZDICT_random_params_t *randomParams, ZDICT_cover_params_t *coverParams, - ZDICT_legacy_params_t *legacyParams) { + ZDICT_legacy_params_t *legacyParams, ZDICT_fastCover_params_t *fastParams) { unsigned const displayLevel = randomParams ? randomParams->zParams.notificationLevel : coverParams ? coverParams->zParams.notificationLevel : legacyParams ? legacyParams->zParams.notificationLevel : + fastParams ? fastParams->zParams.notificationLevel : DEFAULT_DISPLAYLEVEL; /* no dict */ void* const dictBuffer = malloc(maxDictSize); @@ -94,6 +96,9 @@ dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize, } else if(legacyParams) { dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, info->srcBuffer, info->samplesSizes, info->nbSamples, *legacyParams); + } else if(fastParams) { + dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer, + info->samplesSizes, info->nbSamples, fastParams); } else { dictSize = 0; } @@ -216,25 +221,29 @@ void freeDictInfo(dictInfo* info) { * @return 0 if benchmark successfully, 1 otherwise */ int benchmarkDictBuilder(sampleInfo *srcInfo, unsigned maxDictSize, ZDICT_random_params_t *randomParam, - ZDICT_cover_params_t *coverParam, ZDICT_legacy_params_t *legacyParam) { + ZDICT_cover_params_t *coverParam, ZDICT_legacy_params_t *legacyParam, + ZDICT_fastCover_params_t *fastParam) { /* Local variables */ const unsigned displayLevel = randomParam ? randomParam->zParams.notificationLevel : coverParam ? coverParam->zParams.notificationLevel : legacyParam ? legacyParam->zParams.notificationLevel : + fastParam ? fastParam->zParams.notificationLevel: DEFAULT_DISPLAYLEVEL; /* no dict */ const char* name = randomParam ? "RANDOM" : coverParam ? "COVER" : legacyParam ? "LEGACY" : + fastParam ? "FAST": "NODICT"; /* no dict */ const unsigned cLevel = randomParam ? randomParam->zParams.compressionLevel : coverParam ? coverParam->zParams.compressionLevel : legacyParam ? legacyParam->zParams.compressionLevel : + fastParam ? fastParam->zParams.compressionLevel: DEFAULT_CLEVEL; /* no dict */ int result = 0; /* Calculate speed */ const UTIL_time_t begin = UTIL_getTime(); - dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, randomParam, coverParam, legacyParam); + dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, randomParam, coverParam, legacyParam, fastParam); const U64 timeMicro = UTIL_clockSpanMicro(begin); const double timeSec = timeMicro / (double)SEC_TO_MICRO; if (!dInfo) { @@ -269,7 +278,6 @@ int main(int argCount, const char* argv[]) /* Initialize arguments to default values */ const unsigned k = 200; - const unsigned d = 6; const unsigned cLevel = DEFAULT_CLEVEL; const unsigned dictID = 0; const unsigned maxDictSize = g_defaultMaxDictSize; @@ -319,7 +327,7 @@ int main(int argCount, const char* argv[]) /* with no dict */ { - const int noDictResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL); + const int noDictResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, NULL); if(noDictResult) { result = 1; goto _cleanup; @@ -331,7 +339,7 @@ int main(int argCount, const char* argv[]) ZDICT_random_params_t randomParam; randomParam.zParams = zParams; randomParam.k = k; - const int randomResult = benchmarkDictBuilder(srcInfo, maxDictSize, &randomParam, NULL, NULL); + const int randomResult = benchmarkDictBuilder(srcInfo, maxDictSize, &randomParam, NULL, NULL, NULL); if(randomResult) { result = 1; goto _cleanup; @@ -344,10 +352,9 @@ int main(int argCount, const char* argv[]) memset(&coverParam, 0, sizeof(coverParam)); coverParam.zParams = zParams; coverParam.splitPoint = 1.0; - coverParam.d = d; coverParam.steps = 40; coverParam.nbThreads = 1; - const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL); + const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL); if(coverOptResult) { result = 1; goto _cleanup; @@ -359,13 +366,30 @@ int main(int argCount, const char* argv[]) ZDICT_legacy_params_t legacyParam; legacyParam.zParams = zParams; legacyParam.selectivityLevel = 9; - const int legacyResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, &legacyParam); + const int legacyResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, &legacyParam, NULL); if(legacyResult) { result = 1; goto _cleanup; } } + /* for fastCover */ + { + ZDICT_fastCover_params_t fastParam; + memset(&fastParam, 0, sizeof(fastParam)); + fastParam.zParams = zParams; + fastParam.splitPoint = 1.0; + fastParam.d = 8; + fastParam.f = 23; + fastParam.steps = 40; + fastParam.nbThreads = 1; + const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam); + if(fastOptResult) { + result = 1; + goto _cleanup; + } + } + /* Free allocated memory */ _cleanup: UTIL_freeFileList(extendedFileList, fileNamesBuf);