Rerun cover and fastCover with optimized values

dev
Jennifer Liu 2018-07-26 19:03:01 -07:00
parent 3d7941ce41
commit 759c543312
3 changed files with 169 additions and 139 deletions

View File

@ -13,108 +13,113 @@ Benchmark given input files: make ARG= followed by permitted arguments
make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
###Benchmarking Result:
For every f value for fast, the first one is optimize and the second one has k=200
First Cover is optimize cover, second Cover uses optimized d and k from first one.
For every f value of fastCover, the first one is optimize fastCover and the second one uses optimized d and k from first one.
github:
NODICT 0.000023 2.999642
RANDOM 0.149020 8.786957
LEGACY 0.854277 8.989482
FAST15 8.764078 10.609015
FAST15 0.232610 9.135669
FAST16 9.597777 10.474574
FAST16 0.243698 9.346482
FAST17 9.385449 10.611737
FAST17 0.268376 9.605798
FAST18 9.988885 10.626382
FAST18 0.311769 9.130565
FAST19 10.737259 10.411729
FAST19 0.331885 9.271814
FAST20 10.479782 10.388895
FAST20 0.498416 9.194115
FAST21 21.189883 10.376394
FAST21 1.098532 9.244456
FAST22 39.849935 10.432555
FAST22 2.590561 9.410930
FAST23 75.832399 10.614747
FAST23 6.108487 9.484150
FAST24 139.782714 10.611753
FAST24 13.029406 9.379030
COVER 55.118542 10.641263
NODICT 0.000004 2.999642
RANDOM 0.146096 8.786957
LEGACY 0.956888 8.989482
COVER 56.596152 10.641263
COVER 4.937047 10.641263
FAST15 17.722269 10.586461
FAST15 0.239135 10.586461
FAST16 18.276179 10.492503
FAST16 0.265285 10.492503
FAST17 18.077916 10.611737
FAST17 0.236573 10.611737
FAST18 19.510150 10.621586
FAST18 0.278683 10.621586
FAST19 18.794350 10.629626
FAST19 0.307943 10.629626
FAST20 19.671099 10.610308
FAST20 0.428814 10.610308
FAST21 36.527238 10.625733
FAST21 0.716384 10.625733
FAST22 83.803521 10.625281
FAST22 1.290246 10.625281
FAST23 158.287924 10.602342
FAST23 3.084848 10.602342
FAST24 283.630941 10.603379
FAST24 8.088933 10.603379
hg-commands
NODICT 0.000012 2.425291
RANDOM 0.083071 3.489515
LEGACY 0.835195 3.911896
FAST15 0.163980 3.808375
FAST16 6.373850 4.010783
FAST16 0.160299 3.966604
FAST17 6.668799 4.091602
FAST17 0.172480 4.062773
FAST18 6.266105 4.130824
FAST18 0.171554 4.094666
FAST19 6.869651 4.158180
FAST19 0.209468 4.111289
FAST20 8.267766 4.149707
FAST20 0.331680 4.119873
FAST21 18.824296 4.171784
FAST21 0.783961 4.120884
FAST22 33.321252 4.152035
FAST22 1.854215 4.126626
FAST23 60.775388 4.157595
FAST23 4.040395 4.134222
FAST24 110.910038 4.163091
FAST24 8.505828 4.143533
COVER 61.654796 4.131136
NODICT 0.000007 2.425291
RANDOM 0.084010 3.489515
LEGACY 0.926763 3.911896
COVER 62.036915 4.131136
COVER 2.194398 4.131136
FAST15 12.169025 3.903719
FAST15 0.156552 3.903719
FAST16 11.886255 4.005077
FAST16 0.155506 4.005077
FAST17 11.886955 4.097811
FAST17 0.176327 4.097811
FAST18 12.544698 4.136081
FAST18 0.171796 4.136081
FAST19 12.920868 4.166021
FAST19 0.207029 4.166021
FAST20 15.771429 4.163740
FAST20 0.258685 4.163740
FAST21 33.165829 4.157057
FAST21 0.663088 4.157057
FAST22 68.779201 4.158195
FAST22 1.568439 4.158195
FAST23 121.921931 4.161450
FAST23 2.498972 4.161450
FAST24 221.990451 4.159658
FAST24 5.793594 4.159658
hg-changelog
NODICT 0.000004 1.377613
RANDOM 0.582067 2.096785
LEGACY 2.739515 2.058273
FAST15 35.682665 2.127596
FAST15 0.931621 2.115299
FAST16 36.557988 2.141787
FAST16 1.008155 2.136080
FAST17 36.272242 2.155332
FAST17 0.906803 2.154596
FAST18 35.542043 2.171997
FAST18 1.063101 2.167723
FAST19 37.756934 2.180893
FAST19 1.257291 2.173768
FAST20 40.273755 2.179442
FAST20 1.630522 2.170072
FAST21 54.606548 2.181400
FAST21 2.321266 2.171643
FAST22 72.454066 2.178774
FAST22 5.092888 2.168885
FAST23 106.753208 2.180347
FAST23 14.722222 2.170673
FAST24 171.083201 2.183426
FAST24 27.575575 2.170623
COVER 227.219660 2.188654
RANDOM 0.549307 2.096785
LEGACY 2.273818 2.058273
COVER 219.640608 2.188654
COVER 6.055391 2.188654
FAST15 67.820700 2.127194
FAST15 0.824624 2.127194
FAST16 69.774209 2.145401
FAST16 0.889737 2.145401
FAST17 70.027355 2.157544
FAST17 0.869004 2.157544
FAST18 68.229652 2.173127
FAST18 0.930689 2.173127
FAST19 70.696241 2.179527
FAST19 1.385515 2.179527
FAST20 80.618172 2.183233
FAST20 1.699632 2.183233
FAST21 96.366254 2.180920
FAST21 2.606553 2.180920
FAST22 139.440758 2.184297
FAST22 5.962606 2.184297
FAST23 207.791930 2.187666
FAST23 14.823301 2.187666
FAST24 322.050385 2.189889
FAST24 29.294918 2.189889
hg-manifest
NODICT 0.000007 1.866385
RANDOM 1.086571 2.309485
LEGACY 9.567507 2.506775
FAST15 77.811380 2.380461
FAST15 1.969718 2.317727
FAST16 75.789019 2.469144
FAST16 2.051283 2.375815
FAST17 79.659040 2.539069
FAST17 1.995394 2.501047
FAST18 76.281105 2.578095
FAST18 2.059272 2.564840
FAST19 79.395382 2.590433
FAST19 2.354158 2.591024
FAST20 87.937568 2.597813
FAST20 2.922189 2.597104
FAST21 121.760549 2.598408
FAST21 4.798981 2.600269
FAST22 155.878461 2.594560
FAST22 8.151807 2.601047
FAST23 194.238003 2.596761
FAST23 15.160578 2.592985
FAST24 267.425904 2.597657
FAST24 29.513286 2.600363
COVER 930.675322 2.582597
NODICT 0.000008 1.866385
RANDOM 1.075766 2.309485
LEGACY 8.688387 2.506775
COVER 926.024689 2.582597
COVER 33.630695 2.582597
FAST15 152.845945 2.377689
FAST15 2.206285 2.377689
FAST16 147.772371 2.464814
FAST16 1.937997 2.464814
FAST17 147.729498 2.539834
FAST17 1.966577 2.539834
FAST18 144.156821 2.576924
FAST18 1.954106 2.576924
FAST19 145.678760 2.592479
FAST19 2.096876 2.592479
FAST20 159.634674 2.594551
FAST20 2.568766 2.594551
FAST21 228.116552 2.597128
FAST21 4.634508 2.597128
FAST22 288.890644 2.596971
FAST22 6.618204 2.596971
FAST23 377.196211 2.601416
FAST23 13.497286 2.601416
FAST24 503.208577 2.602830
FAST24 29.538585 2.602830

View File

@ -277,7 +277,8 @@ int main(int argCount, const char* argv[])
int result = 0;
/* Initialize arguments to default values */
const unsigned k = 200;
unsigned k = 200;
unsigned d = 8;
const unsigned cLevel = DEFAULT_CLEVEL;
const unsigned dictID = 0;
const unsigned maxDictSize = g_defaultMaxDictSize;
@ -360,47 +361,6 @@ int main(int argCount, const char* argv[])
}
}
/* for fastCover */
for (unsigned f = 15; f < 25; f++){
DISPLAYLEVEL(2, "current f is %u\n", f);
/* for fastCover (optimizing k) */
{
ZDICT_fastCover_params_t fastParam;
memset(&fastParam, 0, sizeof(fastParam));
fastParam.zParams = zParams;
fastParam.splitPoint = 1.0;
fastParam.d = 8;
fastParam.f = f;
fastParam.steps = 40;
fastParam.nbThreads = 1;
const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100));
if(fastOptResult) {
result = 1;
goto _cleanup;
}
}
/* for fastCover (with k provided) */
{
ZDICT_fastCover_params_t fastParam;
memset(&fastParam, 0, sizeof(fastParam));
fastParam.zParams = zParams;
fastParam.splitPoint = 1.0;
fastParam.d = 8;
fastParam.f = f;
fastParam.k = 200;
fastParam.steps = 40;
fastParam.nbThreads = 1;
const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100));
if(fastOptResult) {
result = 1;
goto _cleanup;
}
}
}
/* for cover */
{
ZDICT_cover_params_t coverParam;
@ -415,8 +375,73 @@ int main(int argCount, const char* argv[])
result = 1;
goto _cleanup;
}
k = coverParam.k;
d = coverParam.d;
/* for COVER with k and d provided */
ZDICT_cover_params_t covernParam;
memset(&covernParam, 0, sizeof(covernParam));
covernParam.zParams = zParams;
covernParam.splitPoint = 1.0;
covernParam.steps = 40;
covernParam.nbThreads = 1;
covernParam.k = k;
covernParam.d = d;
const int coverResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &covernParam, NULL, NULL);
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", covernParam.k, covernParam.d, covernParam.steps, (unsigned)(covernParam.splitPoint * 100));
if(coverResult) {
result = 1;
goto _cleanup;
}
}
/* for fastCover */
for (unsigned f = 15; f < 25; f++){
DISPLAYLEVEL(2, "current f is %u\n", f);
/* for fastCover (optimizing k and d) */
{
ZDICT_fastCover_params_t fastParam;
memset(&fastParam, 0, sizeof(fastParam));
fastParam.zParams = zParams;
fastParam.splitPoint = 1.0;
fastParam.f = f;
fastParam.steps = 40;
fastParam.nbThreads = 1;
const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100));
if(fastOptResult) {
result = 1;
goto _cleanup;
}
k = fastParam.k;
d = fastParam.d;
}
/* for fastCover (with k and d provided) */
{
ZDICT_fastCover_params_t fastParam;
memset(&fastParam, 0, sizeof(fastParam));
fastParam.zParams = zParams;
fastParam.splitPoint = 1.0;
fastParam.d = d;
fastParam.f = f;
fastParam.k = k;
fastParam.steps = 40;
fastParam.nbThreads = 1;
const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100));
if(fastOptResult) {
result = 1;
goto _cleanup;
}
}
}
/* Free allocated memory */
_cleanup:

View File

@ -267,7 +267,7 @@ static void FASTCOVER_computeFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t *
size_t currSampleStart = ctx->offsets[i];
size_t currSampleEnd = ctx->offsets[i+1];
start = currSampleStart;
while (start + f < currSampleEnd) {
while (start + ctx->d <= currSampleEnd) {
const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, ctx->d);
/* if no dmer with same hash value has been seen in current sample */
if (inCurrSample[dmerIndex] == 0) {