Avoid recounting match lengths with ZSTD_count

This commit is contained in:
Stella Lau 2017-07-18 18:35:25 -07:00
parent 1fa223859f
commit 4352e09cb0
4 changed files with 28 additions and 14 deletions

View File

@ -62,12 +62,16 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const BYTE *pIn,
const BYTE *pEnd,
U32 minMatchLength,
U32 maxWindowSize) {
U32 maxWindowSize,
U32 *matchLength) {
LDM_hashEntry *entry = getBucket(table, hash);
(void)checksum;
(void)pEnd;
(void)matchLength;
// TODO: Count the entire forward match length rather than check if valid.
if (isValidMatch(pIn, entry->offset + table->offsetBase,
minMatchLength, maxWindowSize)) {
return entry;
}
return NULL;

View File

@ -15,17 +15,16 @@
// TODO: rename. Number of hash buckets.
#define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG)
//#define TMP_ZSTDTOGGLE
#define TMP_ZSTDTOGGLE
struct LDM_hashTable {
U32 size; // Number of buckets
U32 maxEntries; // Rename...
LDM_hashEntry *entries; // 1-D array for now.
BYTE *bucketOffsets; // Pointer to current insert position.
// Position corresponding to offset=0 in LDM_hashEntry.
const BYTE *offsetBase;
BYTE *bucketOffsets; // Pointer to current insert position.
// Last insert was at bucketOffsets - 1?
};
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase) {
@ -174,7 +173,8 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const BYTE *pIn,
const BYTE *pEnd,
U32 minMatchLength,
U32 maxWindowSize) {
U32 maxWindowSize,
U32 *matchLength) {
LDM_hashEntry *bucket = getBucket(table, hash);
LDM_hashEntry *cur = bucket;
// TODO: in order of recency?
@ -183,8 +183,9 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const BYTE *pMatch = cur->offset + table->offsetBase;
#ifdef TMP_ZSTDTOGGLE
if (cur->checksum == checksum && pIn - pMatch <= maxWindowSize) {
U32 matchLength = ZSTD_count(pIn, pMatch, pEnd);
if (matchLength >= minMatchLength) {
U32 forwardMatchLength = ZSTD_count(pIn, pMatch, pEnd);
if (forwardMatchLength >= minMatchLength) {
*matchLength = forwardMatchLength;
return cur;
}
}

View File

@ -28,6 +28,7 @@
//#define HASH_CHECK
//#define RUN_CHECKS
//#define TMP_RECOMPUTE_LENGTHS
#include "ldm.h"
#include "ldm_hashtable.h"
@ -435,8 +436,10 @@ void LDM_destroyCCtx(LDM_CCtx *cctx) {
* Returns 0 if successful and 1 otherwise (i.e. no match can be found
* in the remaining input that is long enough).
*
* matchLength contains the forward length of the match.
*/
static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match,
U32 *matchLength) {
LDM_hashEntry *entry = NULL;
cctx->nextIp = cctx->ip + cctx->step;
@ -459,7 +462,8 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
entry = HASH_getValidEntry(cctx->hashTable, h, sum,
cctx->ip, cctx->iend,
LDM_MIN_MATCH_LENGTH,
LDM_WINDOW_SIZE);
LDM_WINDOW_SIZE,
matchLength);
#endif
if (entry != NULL) {
@ -535,8 +539,7 @@ size_t LDM_compress(const void *src, size_t srcSize,
void *dst, size_t maxDstSize) {
LDM_CCtx cctx;
const BYTE *match = NULL;
// printf("TST: %d\n", LDM_WINDOW_SIZE / LDM_HASHTABLESIZE_U64);
// printf("HASH LOG: %d\n", HASH_ONLY_EVERY_LOG);
U32 forwardMatchLength = 0;
LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize);
LDM_outputConfiguration();
@ -555,7 +558,7 @@ size_t LDM_compress(const void *src, size_t srcSize,
* is less than the minimum match length), then stop searching for matches
* and encode the final literals.
*/
while (LDM_findBestMatch(&cctx, &match) == 0) {
while (LDM_findBestMatch(&cctx, &match, &forwardMatchLength) == 0) {
U32 backwardsMatchLen = 0;
#ifdef COMPUTE_STATS
cctx.stats.numMatches++;
@ -577,10 +580,15 @@ size_t LDM_compress(const void *src, size_t srcSize,
{
const U32 literalLength = cctx.ip - cctx.anchor;
const U32 offset = cctx.ip - match;
#ifdef TMP_RECOMPUTE_LENGTHS
const U32 matchLength = LDM_countMatchLength(
cctx.ip + LDM_MIN_MATCH_LENGTH + backwardsMatchLen,
match + LDM_MIN_MATCH_LENGTH + backwardsMatchLen,
cctx.ihashLimit) + backwardsMatchLen;
#else
const U32 matchLength = forwardMatchLength + backwardsMatchLen -
LDM_MIN_MATCH_LENGTH;
#endif
LDM_outputBlock(&cctx, literalLength, offset, matchLength);

View File

@ -41,8 +41,9 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const U32 checksum,
const BYTE *pIn,
const BYTE *pEnd,
U32 minMatchLength,
U32 maxWindowSize);
const U32 minMatchLength,
const U32 maxWindowSize,
U32 *matchLength);
hash_t HASH_hashU32(U32 value);