Save segmentFreqs in ctx instead of malloc and memset in SelectSegment

dev
Jennifer Liu 2018-07-27 16:51:38 -07:00
parent 49b398e93f
commit 61262f6c0d
4 changed files with 141 additions and 16 deletions

View File

@ -17,6 +17,8 @@ make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
- For every f value of fastCover, the first one is optimize fastCover and the second one uses optimized d and k from first one.
- Fourth column is chosen d and fifth column is chosen k
Version 1:
github:
NODICT 0.000004 2.999642
RANDOM 0.146096 8.786957
@ -124,3 +126,114 @@ FAST23 377.196211 2.601416 8 194
FAST23 13.497286 2.601416 8 194
FAST24 503.208577 2.602830 6 194
FAST24 29.538585 2.602830 6 194
---------------------------------------------------------------
Version 2 (save segmentFreqs in ctx instead of malloc and memset in every call to SelectSegment):
github:
NODICT 0.000005 2.999642
RANDOM 0.141553 8.786957
LEGACY 0.904340 8.989482
COVER 53.621302 10.641263 8 1298
COVER 4.085037 10.641263 8 1298
FAST15 17.636211 10.586461 8 1778
FAST15 0.221236 10.586461 8 1778
FAST16 18.716259 10.492503 6 1778
FAST16 0.251522 10.492503 6 1778
FAST17 17.614391 10.611737 8 1778
FAST17 0.241011 10.611737 8 1778
FAST18 19.926270 10.621586 8 1778
FAST18 0.287195 10.621586 8 1778
FAST19 19.626808 10.629626 8 1778
FAST19 0.340191 10.629626 8 1778
FAST20 18.918657 10.610308 8 1778
FAST20 0.463307 10.610308 8 1778
FAST21 20.502362 10.625733 8 1778
FAST21 0.638202 10.625733 8 1778
FAST22 22.702695 10.625281 8 1778
FAST22 1.353399 10.625281 8 1778
FAST23 28.041990 10.602342 8 1778
FAST23 3.029502 10.602342 8 1778
FAST24 35.662961 10.603379 8 1778
FAST24 6.524258 10.603379 8 1778
hg-commands:
NODICT 0.000005 2.425291
RANDOM 0.080469 3.489515
LEGACY 0.794417 3.911896
COVER 54.198788 4.131136 8 386
COVER 2.191729 4.131136 8 386
FAST15 11.852793 3.903719 6 1106
FAST15 0.175406 3.903719 6 1106
FAST16 12.863315 4.005077 8 530
FAST16 0.158410 4.005077 8 530
FAST17 11.977917 4.097811 8 818
FAST17 0.162381 4.097811 8 818
FAST18 11.749304 4.136081 8 770
FAST18 0.173242 4.136081 8 770
FAST19 11.905785 4.166021 8 530
FAST19 0.186403 4.166021 8 530
FAST20 13.293999 4.163740 8 482
FAST20 0.241508 4.163740 8 482
FAST21 16.623177 4.157057 8 434
FAST21 0.372647 4.157057 8 434
FAST22 20.918409 4.158195 8 290
FAST22 0.570431 4.158195 8 290
FAST23 21.762805 4.161450 8 434
FAST23 1.162206 4.161450 8 434
FAST24 29.133745 4.159658 8 338
FAST24 3.054376 4.159658 8 338
hg-changelog:
NODICT 0.000006 1.377613
RANDOM 0.601346 2.096785
LEGACY 2.544973 2.058273
COVER 222.639708 2.188654 8 98
COVER 6.072892 2.188654 8 98
FAST15 70.394523 2.127194 8 866
FAST15 0.899766 2.127194 8 866
FAST16 69.845529 2.145401 8 338
FAST16 0.881569 2.145401 8 338
FAST17 69.382431 2.157544 8 194
FAST17 0.943291 2.157544 8 194
FAST18 71.348283 2.173127 8 98
FAST18 1.034765 2.173127 8 98
FAST19 71.380923 2.179527 8 98
FAST19 1.254700 2.179527 8 98
FAST20 72.802714 2.183233 6 98
FAST20 1.368704 2.183233 6 98
FAST21 82.042339 2.180920 8 98
FAST21 2.213864 2.180920 8 98
FAST22 90.666200 2.184297 8 98
FAST22 3.590399 2.184297 8 98
FAST23 108.926377 2.187666 6 98
FAST23 8.723759 2.187666 6 98
FAST24 134.296232 2.189889 6 98
FAST24 19.396532 2.189889 6 98
hg-manifest:
NODICT 0.000005 1.866385
RANDOM 0.982192 2.309485
LEGACY 9.507729 2.506775
COVER 922.742066 2.582597 8 434
COVER 36.500276 2.582597 8 434
FAST15 163.886717 2.377689 8 1682
FAST15 2.107328 2.377689 8 1682
FAST16 152.684592 2.464814 8 1538
FAST16 2.157789 2.464814 8 1538
FAST17 154.463459 2.539834 6 1826
FAST17 2.282455 2.539834 6 1826
FAST18 155.540044 2.576924 8 1922
FAST18 2.101807 2.576924 8 1922
FAST19 152.650343 2.592479 6 290
FAST19 2.359461 2.592479 6 290
FAST20 174.623634 2.594551 8 194
FAST20 2.870022 2.594551 8 194
FAST21 219.876653 2.597128 6 194
FAST21 4.386269 2.597128 6 194
FAST22 247.986803 2.596971 6 386
FAST22 6.201144 2.596971 6 386
FAST23 276.051806 2.601416 8 194
FAST23 11.613477 2.601416 8 194
FAST24 328.234024 2.602830 6 194
FAST24 26.710364 2.602830 6 194

View File

@ -1,2 +1,8 @@
echo "Benchmark with in=../../lib/common"
./benchmark in=../../../lib/common
echo "-----------------github--------------------"
./benchmark in=github
echo "-----------------hg-commands--------------------"
./benchmark in=hg-commands
echo "-----------------hg-changelog--------------------"
./benchmark in=hg-changelog
echo "------------------hg-manifest-------------------"
./benchmark in=hg-manifest

View File

@ -1,7 +1,7 @@
ARG :=
CC ?= gcc
CFLAGS ?= -O3
CFLAGS ?= -O3 -g
INCLUDES := -I ../../../programs -I ../randomDictBuilder -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
IO_FILE := ../randomDictBuilder/io.c
@ -9,7 +9,7 @@ IO_FILE := ../randomDictBuilder/io.c
TEST_INPUT := ../../../lib
TEST_OUTPUT := fastCoverDict
all: main run clean
all: main run
.PHONY: test
test: main testrun testshell clean
@ -32,7 +32,7 @@ io.o: $(IO_FILE)
$(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)
libzstd.a:
$(MAKE) -C ../../../lib libzstd.a
$(MAKE) MOREFLAGS=-g -C ../../../lib libzstd.a
mv ../../../lib/libzstd.a .
.PHONY: testrun

View File

@ -82,6 +82,7 @@ typedef struct {
size_t nbTestSamples;
size_t nbDmers;
U32 *freqs;
U16 *segmentFreqs;
unsigned d;
} FASTCOVER_ctx_t;
@ -142,9 +143,6 @@ static FASTCOVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
activeSegment.end = begin;
activeSegment.score = 0;
{
/* Keep track of number of times an index has been seen in current segment */
U16* currfreqs =(U16 *)malloc((1 << parameters.f) * sizeof(U16));
memset(currfreqs, 0, (1 << parameters.f) * sizeof(*currfreqs));
/* Slide the activeSegment through the whole epoch.
* Save the best segment in bestSegment.
*/
@ -152,19 +150,19 @@ static FASTCOVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
/* Get hash value of current dmer */
const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, parameters.f, ctx->d);
/* Add frequency of this index to score if this is the first occurence of index in active segment */
if (currfreqs[index] == 0) {
if (ctx->segmentFreqs[index] == 0) {
activeSegment.score += freqs[index];
}
currfreqs[index] += 1;
ctx->segmentFreqs[index] += 1;
/* Increment end of segment */
activeSegment.end += 1;
/* If the window is now too large, drop the first position */
if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
/* Get hash value of the dmer to be eliminated from active segment */
const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d);
currfreqs[delIndex] -= 1;
ctx->segmentFreqs[delIndex] -= 1;
/* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */
if (currfreqs[delIndex] == 0) {
if (ctx->segmentFreqs[delIndex] == 0) {
activeSegment.score -= freqs[delIndex];
}
/* Increment start of segment */
@ -175,7 +173,12 @@ static FASTCOVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
bestSegment = activeSegment;
}
}
free(currfreqs);
/* Zero out rest of segmentFreqs array */
while (activeSegment.begin < end) {
const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d);
ctx->segmentFreqs[delIndex] -= 1;
activeSegment.begin += 1;
}
}
{
/* Trim off the zero frequency head and tail from the segment. */
@ -245,6 +248,10 @@ static void FASTCOVER_ctx_destroy(FASTCOVER_ctx_t *ctx) {
if (!ctx) {
return;
}
if (ctx->segmentFreqs) {
free(ctx->segmentFreqs);
ctx->segmentFreqs = NULL;
}
if (ctx->freqs) {
free(ctx->freqs);
ctx->freqs = NULL;
@ -347,9 +354,8 @@ static int FASTCOVER_ctx_init(FASTCOVER_ctx_t *ctx, const void *samplesBuffer,
}
/* Initialize frequency array of size 2^f */
ctx->freqs =(U32 *)malloc((1 << f) * sizeof(U32));
memset(ctx->freqs, 0, (1 << f) * sizeof(U32));
ctx->freqs = (U32 *)calloc((1 << f), sizeof(U32));
ctx->segmentFreqs = (U16 *)calloc((1 << f), sizeof(U16));
DISPLAYLEVEL(2, "Computing frequencies\n");
FASTCOVER_computeFrequency(ctx->freqs, f, ctx);