From 4352e09cb002873f3c2eec5d79eddeefca28160f Mon Sep 17 00:00:00 2001 From: Stella Lau Date: Tue, 18 Jul 2017 18:35:25 -0700 Subject: [PATCH] Avoid recounting match lengths with ZSTD_count --- contrib/long_distance_matching/basic_table.c | 6 +++++- .../circular_buffer_table.c | 13 +++++++------ contrib/long_distance_matching/ldm.c | 18 +++++++++++++----- contrib/long_distance_matching/ldm_hashtable.h | 5 +++-- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/contrib/long_distance_matching/basic_table.c b/contrib/long_distance_matching/basic_table.c index 6c12b508..30c548d2 100644 --- a/contrib/long_distance_matching/basic_table.c +++ b/contrib/long_distance_matching/basic_table.c @@ -62,12 +62,16 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table, const BYTE *pIn, const BYTE *pEnd, U32 minMatchLength, - U32 maxWindowSize) { + U32 maxWindowSize, + U32 *matchLength) { LDM_hashEntry *entry = getBucket(table, hash); (void)checksum; (void)pEnd; + (void)matchLength; + // TODO: Count the entire forward match length rather than check if valid. if (isValidMatch(pIn, entry->offset + table->offsetBase, minMatchLength, maxWindowSize)) { + return entry; } return NULL; diff --git a/contrib/long_distance_matching/circular_buffer_table.c b/contrib/long_distance_matching/circular_buffer_table.c index 653d9e51..104d1b33 100644 --- a/contrib/long_distance_matching/circular_buffer_table.c +++ b/contrib/long_distance_matching/circular_buffer_table.c @@ -15,17 +15,16 @@ // TODO: rename. Number of hash buckets. #define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG) -//#define TMP_ZSTDTOGGLE +#define TMP_ZSTDTOGGLE struct LDM_hashTable { U32 size; // Number of buckets U32 maxEntries; // Rename... LDM_hashEntry *entries; // 1-D array for now. + BYTE *bucketOffsets; // Pointer to current insert position. // Position corresponding to offset=0 in LDM_hashEntry. const BYTE *offsetBase; - BYTE *bucketOffsets; // Pointer to current insert position. - // Last insert was at bucketOffsets - 1? }; LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase) { @@ -174,7 +173,8 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table, const BYTE *pIn, const BYTE *pEnd, U32 minMatchLength, - U32 maxWindowSize) { + U32 maxWindowSize, + U32 *matchLength) { LDM_hashEntry *bucket = getBucket(table, hash); LDM_hashEntry *cur = bucket; // TODO: in order of recency? @@ -183,8 +183,9 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table, const BYTE *pMatch = cur->offset + table->offsetBase; #ifdef TMP_ZSTDTOGGLE if (cur->checksum == checksum && pIn - pMatch <= maxWindowSize) { - U32 matchLength = ZSTD_count(pIn, pMatch, pEnd); - if (matchLength >= minMatchLength) { + U32 forwardMatchLength = ZSTD_count(pIn, pMatch, pEnd); + if (forwardMatchLength >= minMatchLength) { + *matchLength = forwardMatchLength; return cur; } } diff --git a/contrib/long_distance_matching/ldm.c b/contrib/long_distance_matching/ldm.c index 56b22d28..1512ab8c 100644 --- a/contrib/long_distance_matching/ldm.c +++ b/contrib/long_distance_matching/ldm.c @@ -28,6 +28,7 @@ //#define HASH_CHECK //#define RUN_CHECKS +//#define TMP_RECOMPUTE_LENGTHS #include "ldm.h" #include "ldm_hashtable.h" @@ -435,8 +436,10 @@ void LDM_destroyCCtx(LDM_CCtx *cctx) { * Returns 0 if successful and 1 otherwise (i.e. no match can be found * in the remaining input that is long enough). * + * matchLength contains the forward length of the match. */ -static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) { +static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match, + U32 *matchLength) { LDM_hashEntry *entry = NULL; cctx->nextIp = cctx->ip + cctx->step; @@ -459,7 +462,8 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) { entry = HASH_getValidEntry(cctx->hashTable, h, sum, cctx->ip, cctx->iend, LDM_MIN_MATCH_LENGTH, - LDM_WINDOW_SIZE); + LDM_WINDOW_SIZE, + matchLength); #endif if (entry != NULL) { @@ -535,8 +539,7 @@ size_t LDM_compress(const void *src, size_t srcSize, void *dst, size_t maxDstSize) { LDM_CCtx cctx; const BYTE *match = NULL; -// printf("TST: %d\n", LDM_WINDOW_SIZE / LDM_HASHTABLESIZE_U64); -// printf("HASH LOG: %d\n", HASH_ONLY_EVERY_LOG); + U32 forwardMatchLength = 0; LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize); LDM_outputConfiguration(); @@ -555,7 +558,7 @@ size_t LDM_compress(const void *src, size_t srcSize, * is less than the minimum match length), then stop searching for matches * and encode the final literals. */ - while (LDM_findBestMatch(&cctx, &match) == 0) { + while (LDM_findBestMatch(&cctx, &match, &forwardMatchLength) == 0) { U32 backwardsMatchLen = 0; #ifdef COMPUTE_STATS cctx.stats.numMatches++; @@ -577,10 +580,15 @@ size_t LDM_compress(const void *src, size_t srcSize, { const U32 literalLength = cctx.ip - cctx.anchor; const U32 offset = cctx.ip - match; +#ifdef TMP_RECOMPUTE_LENGTHS const U32 matchLength = LDM_countMatchLength( cctx.ip + LDM_MIN_MATCH_LENGTH + backwardsMatchLen, match + LDM_MIN_MATCH_LENGTH + backwardsMatchLen, cctx.ihashLimit) + backwardsMatchLen; +#else + const U32 matchLength = forwardMatchLength + backwardsMatchLen - + LDM_MIN_MATCH_LENGTH; +#endif LDM_outputBlock(&cctx, literalLength, offset, matchLength); diff --git a/contrib/long_distance_matching/ldm_hashtable.h b/contrib/long_distance_matching/ldm_hashtable.h index 7566751d..2ea159f7 100644 --- a/contrib/long_distance_matching/ldm_hashtable.h +++ b/contrib/long_distance_matching/ldm_hashtable.h @@ -41,8 +41,9 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table, const U32 checksum, const BYTE *pIn, const BYTE *pEnd, - U32 minMatchLength, - U32 maxWindowSize); + const U32 minMatchLength, + const U32 maxWindowSize, + U32 *matchLength); hash_t HASH_hashU32(U32 value);