From d49eb40c03845d0961f2819f502c51a11bd7cbe5 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 21 Aug 2017 13:10:03 -0700 Subject: [PATCH 1/4] [cover] Stop when segmentSize is less than d --- lib/dictBuilder/cover.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index 3d445ae8..501b5b49 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -622,9 +622,9 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, /* Select a segment */ COVER_segment_t segment = COVER_selectSegment( ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); - /* Trim the segment if necessary and if it is empty then we are done */ + /* Trim the segment if necessary and if it is too small then we are done */ segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); - if (segmentSize == 0) { + if (segmentSize < parameters.d) { break; } /* We fill the dictionary from the back to allow the best segments to be From 9a54a315aa28a6659b935bd6ce95cb962715ebbc Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 21 Aug 2017 13:30:07 -0700 Subject: [PATCH 2/4] [cover] Convert score to U32 and check for zero --- lib/dictBuilder/cover.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index 501b5b49..07fedc2d 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -382,7 +382,7 @@ static void COVER_group(COVER_ctx_t *ctx, const void *group, typedef struct { U32 begin; U32 end; - double score; + U32 score; } COVER_segment_t; /** @@ -622,6 +622,10 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, /* Select a segment */ COVER_segment_t segment = COVER_selectSegment( ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); + /* If the segment covers no dmers, then we are out of content */ + if (segment.score == 0) { + break; + } /* Trim the segment if necessary and if it is too small then we are done */ segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); if (segmentSize < parameters.d) { From 98de3f6847052019bb0a35a50f294d0d87a137ad Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 21 Aug 2017 14:23:17 -0700 Subject: [PATCH 3/4] [cover] Add dictionary size to compressed size --- lib/dictBuilder/cover.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index 07fedc2d..3770c2ca 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -888,7 +888,7 @@ static void COVER_tryParameters(void *opaque) { goto _compressCleanup; } /* Compress each sample and sum their sizes (or error) */ - totalCompressedSize = 0; + totalCompressedSize = dictBufferCapacity; for (i = 0; i < ctx->nbSamples; ++i) { const size_t size = ZSTD_compress_usingCDict( cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i], From 29c2d9a4d05213adac3bdb8b5855d80079112799 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 21 Aug 2017 14:28:31 -0700 Subject: [PATCH 4/4] [cover] Turn down notification for ZDICT subroutines --- lib/dictBuilder/cover.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index 3770c2ca..64f23f23 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -998,6 +998,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( data->parameters.k = k; data->parameters.d = d; data->parameters.steps = kSteps; + data->parameters.zParams.notificationLevel = g_displayLevel; /* Check the parameters */ if (!COVER_checkParameters(data->parameters)) { DISPLAYLEVEL(1, "Cover parameters incorrect\n");