Avoid recounting match lengths with ZSTD_count
This commit is contained in:
parent
1fa223859f
commit
4352e09cb0
@ -62,12 +62,16 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
|
||||
const BYTE *pIn,
|
||||
const BYTE *pEnd,
|
||||
U32 minMatchLength,
|
||||
U32 maxWindowSize) {
|
||||
U32 maxWindowSize,
|
||||
U32 *matchLength) {
|
||||
LDM_hashEntry *entry = getBucket(table, hash);
|
||||
(void)checksum;
|
||||
(void)pEnd;
|
||||
(void)matchLength;
|
||||
// TODO: Count the entire forward match length rather than check if valid.
|
||||
if (isValidMatch(pIn, entry->offset + table->offsetBase,
|
||||
minMatchLength, maxWindowSize)) {
|
||||
|
||||
return entry;
|
||||
}
|
||||
return NULL;
|
||||
|
@ -15,17 +15,16 @@
|
||||
// TODO: rename. Number of hash buckets.
|
||||
#define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG)
|
||||
|
||||
//#define TMP_ZSTDTOGGLE
|
||||
#define TMP_ZSTDTOGGLE
|
||||
|
||||
struct LDM_hashTable {
|
||||
U32 size; // Number of buckets
|
||||
U32 maxEntries; // Rename...
|
||||
LDM_hashEntry *entries; // 1-D array for now.
|
||||
BYTE *bucketOffsets; // Pointer to current insert position.
|
||||
|
||||
// Position corresponding to offset=0 in LDM_hashEntry.
|
||||
const BYTE *offsetBase;
|
||||
BYTE *bucketOffsets; // Pointer to current insert position.
|
||||
// Last insert was at bucketOffsets - 1?
|
||||
};
|
||||
|
||||
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase) {
|
||||
@ -174,7 +173,8 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
|
||||
const BYTE *pIn,
|
||||
const BYTE *pEnd,
|
||||
U32 minMatchLength,
|
||||
U32 maxWindowSize) {
|
||||
U32 maxWindowSize,
|
||||
U32 *matchLength) {
|
||||
LDM_hashEntry *bucket = getBucket(table, hash);
|
||||
LDM_hashEntry *cur = bucket;
|
||||
// TODO: in order of recency?
|
||||
@ -183,8 +183,9 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
|
||||
const BYTE *pMatch = cur->offset + table->offsetBase;
|
||||
#ifdef TMP_ZSTDTOGGLE
|
||||
if (cur->checksum == checksum && pIn - pMatch <= maxWindowSize) {
|
||||
U32 matchLength = ZSTD_count(pIn, pMatch, pEnd);
|
||||
if (matchLength >= minMatchLength) {
|
||||
U32 forwardMatchLength = ZSTD_count(pIn, pMatch, pEnd);
|
||||
if (forwardMatchLength >= minMatchLength) {
|
||||
*matchLength = forwardMatchLength;
|
||||
return cur;
|
||||
}
|
||||
}
|
||||
|
@ -28,6 +28,7 @@
|
||||
|
||||
//#define HASH_CHECK
|
||||
//#define RUN_CHECKS
|
||||
//#define TMP_RECOMPUTE_LENGTHS
|
||||
|
||||
#include "ldm.h"
|
||||
#include "ldm_hashtable.h"
|
||||
@ -435,8 +436,10 @@ void LDM_destroyCCtx(LDM_CCtx *cctx) {
|
||||
* Returns 0 if successful and 1 otherwise (i.e. no match can be found
|
||||
* in the remaining input that is long enough).
|
||||
*
|
||||
* matchLength contains the forward length of the match.
|
||||
*/
|
||||
static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
|
||||
static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match,
|
||||
U32 *matchLength) {
|
||||
|
||||
LDM_hashEntry *entry = NULL;
|
||||
cctx->nextIp = cctx->ip + cctx->step;
|
||||
@ -459,7 +462,8 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
|
||||
entry = HASH_getValidEntry(cctx->hashTable, h, sum,
|
||||
cctx->ip, cctx->iend,
|
||||
LDM_MIN_MATCH_LENGTH,
|
||||
LDM_WINDOW_SIZE);
|
||||
LDM_WINDOW_SIZE,
|
||||
matchLength);
|
||||
#endif
|
||||
|
||||
if (entry != NULL) {
|
||||
@ -535,8 +539,7 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
||||
void *dst, size_t maxDstSize) {
|
||||
LDM_CCtx cctx;
|
||||
const BYTE *match = NULL;
|
||||
// printf("TST: %d\n", LDM_WINDOW_SIZE / LDM_HASHTABLESIZE_U64);
|
||||
// printf("HASH LOG: %d\n", HASH_ONLY_EVERY_LOG);
|
||||
U32 forwardMatchLength = 0;
|
||||
|
||||
LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize);
|
||||
LDM_outputConfiguration();
|
||||
@ -555,7 +558,7 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
||||
* is less than the minimum match length), then stop searching for matches
|
||||
* and encode the final literals.
|
||||
*/
|
||||
while (LDM_findBestMatch(&cctx, &match) == 0) {
|
||||
while (LDM_findBestMatch(&cctx, &match, &forwardMatchLength) == 0) {
|
||||
U32 backwardsMatchLen = 0;
|
||||
#ifdef COMPUTE_STATS
|
||||
cctx.stats.numMatches++;
|
||||
@ -577,10 +580,15 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
||||
{
|
||||
const U32 literalLength = cctx.ip - cctx.anchor;
|
||||
const U32 offset = cctx.ip - match;
|
||||
#ifdef TMP_RECOMPUTE_LENGTHS
|
||||
const U32 matchLength = LDM_countMatchLength(
|
||||
cctx.ip + LDM_MIN_MATCH_LENGTH + backwardsMatchLen,
|
||||
match + LDM_MIN_MATCH_LENGTH + backwardsMatchLen,
|
||||
cctx.ihashLimit) + backwardsMatchLen;
|
||||
#else
|
||||
const U32 matchLength = forwardMatchLength + backwardsMatchLen -
|
||||
LDM_MIN_MATCH_LENGTH;
|
||||
#endif
|
||||
|
||||
LDM_outputBlock(&cctx, literalLength, offset, matchLength);
|
||||
|
||||
|
@ -41,8 +41,9 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
|
||||
const U32 checksum,
|
||||
const BYTE *pIn,
|
||||
const BYTE *pEnd,
|
||||
U32 minMatchLength,
|
||||
U32 maxWindowSize);
|
||||
const U32 minMatchLength,
|
||||
const U32 maxWindowSize,
|
||||
U32 *matchLength);
|
||||
|
||||
hash_t HASH_hashU32(U32 value);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user