Avoid recounting match lengths with ZSTD_count

This commit is contained in:
Stella Lau 2017-07-18 18:35:25 -07:00
parent 1fa223859f
commit 4352e09cb0
4 changed files with 28 additions and 14 deletions

View File

@ -62,12 +62,16 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const BYTE *pIn, const BYTE *pIn,
const BYTE *pEnd, const BYTE *pEnd,
U32 minMatchLength, U32 minMatchLength,
U32 maxWindowSize) { U32 maxWindowSize,
U32 *matchLength) {
LDM_hashEntry *entry = getBucket(table, hash); LDM_hashEntry *entry = getBucket(table, hash);
(void)checksum; (void)checksum;
(void)pEnd; (void)pEnd;
(void)matchLength;
// TODO: Count the entire forward match length rather than check if valid.
if (isValidMatch(pIn, entry->offset + table->offsetBase, if (isValidMatch(pIn, entry->offset + table->offsetBase,
minMatchLength, maxWindowSize)) { minMatchLength, maxWindowSize)) {
return entry; return entry;
} }
return NULL; return NULL;

View File

@ -15,17 +15,16 @@
// TODO: rename. Number of hash buckets. // TODO: rename. Number of hash buckets.
#define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG) #define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG)
//#define TMP_ZSTDTOGGLE #define TMP_ZSTDTOGGLE
struct LDM_hashTable { struct LDM_hashTable {
U32 size; // Number of buckets U32 size; // Number of buckets
U32 maxEntries; // Rename... U32 maxEntries; // Rename...
LDM_hashEntry *entries; // 1-D array for now. LDM_hashEntry *entries; // 1-D array for now.
BYTE *bucketOffsets; // Pointer to current insert position.
// Position corresponding to offset=0 in LDM_hashEntry. // Position corresponding to offset=0 in LDM_hashEntry.
const BYTE *offsetBase; const BYTE *offsetBase;
BYTE *bucketOffsets; // Pointer to current insert position.
// Last insert was at bucketOffsets - 1?
}; };
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase) { LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase) {
@ -174,7 +173,8 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const BYTE *pIn, const BYTE *pIn,
const BYTE *pEnd, const BYTE *pEnd,
U32 minMatchLength, U32 minMatchLength,
U32 maxWindowSize) { U32 maxWindowSize,
U32 *matchLength) {
LDM_hashEntry *bucket = getBucket(table, hash); LDM_hashEntry *bucket = getBucket(table, hash);
LDM_hashEntry *cur = bucket; LDM_hashEntry *cur = bucket;
// TODO: in order of recency? // TODO: in order of recency?
@ -183,8 +183,9 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const BYTE *pMatch = cur->offset + table->offsetBase; const BYTE *pMatch = cur->offset + table->offsetBase;
#ifdef TMP_ZSTDTOGGLE #ifdef TMP_ZSTDTOGGLE
if (cur->checksum == checksum && pIn - pMatch <= maxWindowSize) { if (cur->checksum == checksum && pIn - pMatch <= maxWindowSize) {
U32 matchLength = ZSTD_count(pIn, pMatch, pEnd); U32 forwardMatchLength = ZSTD_count(pIn, pMatch, pEnd);
if (matchLength >= minMatchLength) { if (forwardMatchLength >= minMatchLength) {
*matchLength = forwardMatchLength;
return cur; return cur;
} }
} }

View File

@ -28,6 +28,7 @@
//#define HASH_CHECK //#define HASH_CHECK
//#define RUN_CHECKS //#define RUN_CHECKS
//#define TMP_RECOMPUTE_LENGTHS
#include "ldm.h" #include "ldm.h"
#include "ldm_hashtable.h" #include "ldm_hashtable.h"
@ -435,8 +436,10 @@ void LDM_destroyCCtx(LDM_CCtx *cctx) {
* Returns 0 if successful and 1 otherwise (i.e. no match can be found * Returns 0 if successful and 1 otherwise (i.e. no match can be found
* in the remaining input that is long enough). * in the remaining input that is long enough).
* *
* matchLength contains the forward length of the match.
*/ */
static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) { static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match,
U32 *matchLength) {
LDM_hashEntry *entry = NULL; LDM_hashEntry *entry = NULL;
cctx->nextIp = cctx->ip + cctx->step; cctx->nextIp = cctx->ip + cctx->step;
@ -459,7 +462,8 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
entry = HASH_getValidEntry(cctx->hashTable, h, sum, entry = HASH_getValidEntry(cctx->hashTable, h, sum,
cctx->ip, cctx->iend, cctx->ip, cctx->iend,
LDM_MIN_MATCH_LENGTH, LDM_MIN_MATCH_LENGTH,
LDM_WINDOW_SIZE); LDM_WINDOW_SIZE,
matchLength);
#endif #endif
if (entry != NULL) { if (entry != NULL) {
@ -535,8 +539,7 @@ size_t LDM_compress(const void *src, size_t srcSize,
void *dst, size_t maxDstSize) { void *dst, size_t maxDstSize) {
LDM_CCtx cctx; LDM_CCtx cctx;
const BYTE *match = NULL; const BYTE *match = NULL;
// printf("TST: %d\n", LDM_WINDOW_SIZE / LDM_HASHTABLESIZE_U64); U32 forwardMatchLength = 0;
// printf("HASH LOG: %d\n", HASH_ONLY_EVERY_LOG);
LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize); LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize);
LDM_outputConfiguration(); LDM_outputConfiguration();
@ -555,7 +558,7 @@ size_t LDM_compress(const void *src, size_t srcSize,
* is less than the minimum match length), then stop searching for matches * is less than the minimum match length), then stop searching for matches
* and encode the final literals. * and encode the final literals.
*/ */
while (LDM_findBestMatch(&cctx, &match) == 0) { while (LDM_findBestMatch(&cctx, &match, &forwardMatchLength) == 0) {
U32 backwardsMatchLen = 0; U32 backwardsMatchLen = 0;
#ifdef COMPUTE_STATS #ifdef COMPUTE_STATS
cctx.stats.numMatches++; cctx.stats.numMatches++;
@ -577,10 +580,15 @@ size_t LDM_compress(const void *src, size_t srcSize,
{ {
const U32 literalLength = cctx.ip - cctx.anchor; const U32 literalLength = cctx.ip - cctx.anchor;
const U32 offset = cctx.ip - match; const U32 offset = cctx.ip - match;
#ifdef TMP_RECOMPUTE_LENGTHS
const U32 matchLength = LDM_countMatchLength( const U32 matchLength = LDM_countMatchLength(
cctx.ip + LDM_MIN_MATCH_LENGTH + backwardsMatchLen, cctx.ip + LDM_MIN_MATCH_LENGTH + backwardsMatchLen,
match + LDM_MIN_MATCH_LENGTH + backwardsMatchLen, match + LDM_MIN_MATCH_LENGTH + backwardsMatchLen,
cctx.ihashLimit) + backwardsMatchLen; cctx.ihashLimit) + backwardsMatchLen;
#else
const U32 matchLength = forwardMatchLength + backwardsMatchLen -
LDM_MIN_MATCH_LENGTH;
#endif
LDM_outputBlock(&cctx, literalLength, offset, matchLength); LDM_outputBlock(&cctx, literalLength, offset, matchLength);

View File

@ -41,8 +41,9 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const U32 checksum, const U32 checksum,
const BYTE *pIn, const BYTE *pIn,
const BYTE *pEnd, const BYTE *pEnd,
U32 minMatchLength, const U32 minMatchLength,
U32 maxWindowSize); const U32 maxWindowSize,
U32 *matchLength);
hash_t HASH_hashU32(U32 value); hash_t HASH_hashU32(U32 value);