Add function to get valid entries only from table
parent
4bb42b02c1
commit
15a041adbf
|
@ -25,17 +25,17 @@ LDFLAGS += -lzstd
|
||||||
|
|
||||||
default: all
|
default: all
|
||||||
|
|
||||||
all: main-basic main-chaining
|
all: main-basic main-circular-buffer
|
||||||
|
|
||||||
main-basic : basic_table.c ldm.c main-ldm.c
|
main-basic : basic_table.c ldm.c main-ldm.c
|
||||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
||||||
|
|
||||||
main-chaining : chaining_table.c ldm.c main-ldm.c
|
main-circular-buffer: circular_buffer_table.c ldm.c main-ldm.c
|
||||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
||||||
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
@rm -f core *.o tmp* result* *.ldm *.ldm.dec \
|
@rm -f core *.o tmp* result* *.ldm *.ldm.dec \
|
||||||
main-basic main-chaining
|
main-basic main-circular-buffer
|
||||||
@echo Cleaning completed
|
@echo Cleaning completed
|
||||||
|
|
||||||
|
|
|
@ -27,12 +27,29 @@ LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) {
|
||||||
return table->entries + hash;
|
return table->entries + hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
LDM_hashEntry *HASH_getEntryFromHash(
|
LDM_hashEntry *HASH_getEntryFromHash(
|
||||||
const LDM_hashTable *table, const hash_t hash, const U32 checksum) {
|
const LDM_hashTable *table, const hash_t hash, const U32 checksum) {
|
||||||
(void)checksum;
|
(void)checksum;
|
||||||
return getBucket(table, hash);
|
return getBucket(table, hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
|
||||||
|
const hash_t hash,
|
||||||
|
const U32 checksum,
|
||||||
|
const BYTE *pIn,
|
||||||
|
int (*isValid)(const BYTE *pIn, const BYTE *pMatch)) {
|
||||||
|
LDM_hashEntry *entry = getBucket(table, hash);
|
||||||
|
(void)checksum;
|
||||||
|
if ((*isValid)(pIn, entry->offset + table->offsetBase)) {
|
||||||
|
return entry;
|
||||||
|
} else {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void HASH_insert(LDM_hashTable *table,
|
void HASH_insert(LDM_hashTable *table,
|
||||||
const hash_t hash, const LDM_hashEntry entry) {
|
const hash_t hash, const LDM_hashEntry entry) {
|
||||||
*getBucket(table, hash) = entry;
|
*getBucket(table, hash) = entry;
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
// refactor code to scale the number of elements appropriately.
|
// refactor code to scale the number of elements appropriately.
|
||||||
|
|
||||||
// Number of elements per hash bucket.
|
// Number of elements per hash bucket.
|
||||||
#define HASH_BUCKET_SIZE_LOG 2 // MAX is 4 for now
|
#define HASH_BUCKET_SIZE_LOG 1 // MAX is 4 for now
|
||||||
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
|
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
|
||||||
|
|
||||||
struct LDM_hashTable {
|
struct LDM_hashTable {
|
||||||
|
@ -44,6 +44,25 @@ static LDM_hashEntry *getLastInsertFromHash(const LDM_hashTable *table,
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
|
||||||
|
const hash_t hash,
|
||||||
|
const U32 checksum,
|
||||||
|
const BYTE *pIn,
|
||||||
|
int (*isValid)(const BYTE *pIn, const BYTE *pMatch)) {
|
||||||
|
LDM_hashEntry *bucket = getBucket(table, hash);
|
||||||
|
LDM_hashEntry *cur = bucket;
|
||||||
|
// TODO: in order of recency?
|
||||||
|
for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
|
||||||
|
// CHeck checksum for faster check.
|
||||||
|
if (cur->checksum == checksum &&
|
||||||
|
(*isValid)(pIn, cur->offset + table->offsetBase)) {
|
||||||
|
return cur;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
|
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
|
||||||
const hash_t hash,
|
const hash_t hash,
|
||||||
const U32 checksum) {
|
const U32 checksum) {
|
|
@ -5,7 +5,7 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
// Insert every (HASH_ONLY_EVERY + 1) into the hash table.
|
// Insert every (HASH_ONLY_EVERY + 1) into the hash table.
|
||||||
#define HASH_ONLY_EVERY 0
|
#define HASH_ONLY_EVERY 31
|
||||||
|
|
||||||
#define LDM_HASHLOG (LDM_MEMORY_USAGE-2)
|
#define LDM_HASHLOG (LDM_MEMORY_USAGE-2)
|
||||||
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
|
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
|
||||||
|
@ -38,8 +38,6 @@ struct LDM_compressStats {
|
||||||
U32 numCollisions;
|
U32 numCollisions;
|
||||||
U32 numHashInserts;
|
U32 numHashInserts;
|
||||||
|
|
||||||
// U64 numInvalidHashes, numValidHashes; // tmp
|
|
||||||
|
|
||||||
U32 offsetHistogram[32];
|
U32 offsetHistogram[32];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -153,45 +151,25 @@ void LDM_printCompressStats(const LDM_compressStats *stats) {
|
||||||
(double) stats->numMatches);
|
(double) stats->numMatches);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
/*
|
|
||||||
printf("Num invalid hashes, num valid hashes, %llu %llu\n",
|
|
||||||
stats->numInvalidHashes, stats->numValidHashes);
|
|
||||||
*/
|
|
||||||
/*
|
|
||||||
printf("num collisions, num hash inserts, %% collisions: %u, %u, %.3f\n",
|
|
||||||
stats->numCollisions, stats->numHashInserts,
|
|
||||||
stats->numHashInserts == 0 ?
|
|
||||||
1.0 : (100.0 * (double)stats->numCollisions) /
|
|
||||||
(double)stats->numHashInserts);
|
|
||||||
*/
|
|
||||||
printf("=====================\n");
|
printf("=====================\n");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) {
|
int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) {
|
||||||
/*
|
|
||||||
if (memcmp(pIn, pMatch, LDM_MIN_MATCH_LENGTH) == 0) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
*/
|
|
||||||
|
|
||||||
//TODO: This seems to be faster for some reason?
|
|
||||||
|
|
||||||
U32 lengthLeft = LDM_MIN_MATCH_LENGTH;
|
U32 lengthLeft = LDM_MIN_MATCH_LENGTH;
|
||||||
const BYTE *curIn = pIn;
|
const BYTE *curIn = pIn;
|
||||||
const BYTE *curMatch = pMatch;
|
const BYTE *curMatch = pMatch;
|
||||||
|
|
||||||
for (; lengthLeft >= 8; lengthLeft -= 8) {
|
if (pIn - pMatch > LDM_WINDOW_SIZE) {
|
||||||
if (MEM_read64(curIn) != MEM_read64(curMatch)) {
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
curIn += 8;
|
|
||||||
curMatch += 8;
|
for (; lengthLeft >= 4; lengthLeft -= 4) {
|
||||||
|
if (MEM_read32(curIn) != MEM_read32(curMatch)) {
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
if (lengthLeft > 0) {
|
curIn += 4;
|
||||||
return (MEM_read32(curIn) == MEM_read32(curMatch));
|
curMatch += 4;
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -307,8 +285,11 @@ static void putHashOfCurrentPositionFromHash(
|
||||||
// Hash only every HASH_ONLY_EVERY times, based on cctx->ip.
|
// Hash only every HASH_ONLY_EVERY times, based on cctx->ip.
|
||||||
// Note: this works only when cctx->step is 1.
|
// Note: this works only when cctx->step is 1.
|
||||||
if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) {
|
if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) {
|
||||||
|
/**
|
||||||
const LDM_hashEntry entry = { cctx->ip - cctx->ibase ,
|
const LDM_hashEntry entry = { cctx->ip - cctx->ibase ,
|
||||||
MEM_read32(cctx->ip) };
|
MEM_read32(cctx->ip) };
|
||||||
|
*/
|
||||||
|
const LDM_hashEntry entry = { cctx->ip - cctx->ibase, sum };
|
||||||
HASH_insert(cctx->hashTable, hash, entry);
|
HASH_insert(cctx->hashTable, hash, entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -438,7 +419,7 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
|
||||||
LDM_hashEntry *entry = NULL;
|
LDM_hashEntry *entry = NULL;
|
||||||
cctx->nextIp = cctx->ip + cctx->step;
|
cctx->nextIp = cctx->ip + cctx->step;
|
||||||
|
|
||||||
do {
|
while (entry == NULL) {
|
||||||
hash_t h;
|
hash_t h;
|
||||||
U32 sum;
|
U32 sum;
|
||||||
setNextHash(cctx);
|
setNextHash(cctx);
|
||||||
|
@ -451,17 +432,14 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
entry = HASH_getEntryFromHash(cctx->hashTable, h, MEM_read32(cctx->ip));
|
entry = HASH_getValidEntry(cctx->hashTable, h, sum, cctx->ip,
|
||||||
|
&LDM_isValidMatch);
|
||||||
|
|
||||||
if (entry != NULL) {
|
if (entry != NULL) {
|
||||||
*match = entry->offset + cctx->ibase;
|
*match = entry->offset + cctx->ibase;
|
||||||
}
|
}
|
||||||
|
|
||||||
putHashOfCurrentPositionFromHash(cctx, h, sum);
|
putHashOfCurrentPositionFromHash(cctx, h, sum);
|
||||||
|
}
|
||||||
} while (entry == NULL ||
|
|
||||||
(cctx->ip - *match > LDM_WINDOW_SIZE ||
|
|
||||||
!LDM_isValidMatch(cctx->ip, *match)));
|
|
||||||
setNextHash(cctx);
|
setNextHash(cctx);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,14 +11,14 @@
|
||||||
#define LDM_OFFSET_SIZE 4
|
#define LDM_OFFSET_SIZE 4
|
||||||
|
|
||||||
// Defines the size of the hash table.
|
// Defines the size of the hash table.
|
||||||
#define LDM_MEMORY_USAGE 16
|
#define LDM_MEMORY_USAGE 20
|
||||||
|
|
||||||
#define LDM_WINDOW_SIZE_LOG 25
|
#define LDM_WINDOW_SIZE_LOG 30
|
||||||
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
|
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
|
||||||
|
|
||||||
//These should be multiples of four.
|
//These should be multiples of four.
|
||||||
#define LDM_MIN_MATCH_LENGTH 1024
|
#define LDM_MIN_MATCH_LENGTH 64
|
||||||
#define LDM_HASH_LENGTH 1024
|
#define LDM_HASH_LENGTH 64
|
||||||
|
|
||||||
typedef struct LDM_compressStats LDM_compressStats;
|
typedef struct LDM_compressStats LDM_compressStats;
|
||||||
typedef struct LDM_CCtx LDM_CCtx;
|
typedef struct LDM_CCtx LDM_CCtx;
|
||||||
|
@ -82,7 +82,8 @@ void LDM_outputHashTableOffsetHistogram(const LDM_CCtx *cctx);
|
||||||
void LDM_printCompressStats(const LDM_compressStats *stats);
|
void LDM_printCompressStats(const LDM_compressStats *stats);
|
||||||
/**
|
/**
|
||||||
* Checks whether the LDM_MIN_MATCH_LENGTH bytes from p are the same as the
|
* Checks whether the LDM_MIN_MATCH_LENGTH bytes from p are the same as the
|
||||||
* LDM_MIN_MATCH_LENGTH bytes from match.
|
* LDM_MIN_MATCH_LENGTH bytes from match and also if
|
||||||
|
* pIn - pMatch <= LDM_WINDOW_SIZE.
|
||||||
*
|
*
|
||||||
* This assumes LDM_MIN_MATCH_LENGTH is a multiple of four.
|
* This assumes LDM_MIN_MATCH_LENGTH is a multiple of four.
|
||||||
*
|
*
|
||||||
|
|
|
@ -7,7 +7,7 @@ typedef U32 hash_t;
|
||||||
|
|
||||||
typedef struct LDM_hashEntry {
|
typedef struct LDM_hashEntry {
|
||||||
U32 offset;
|
U32 offset;
|
||||||
U32 checksum; // Not needed?
|
U32 checksum;
|
||||||
} LDM_hashEntry;
|
} LDM_hashEntry;
|
||||||
|
|
||||||
typedef struct LDM_hashTable LDM_hashTable;
|
typedef struct LDM_hashTable LDM_hashTable;
|
||||||
|
@ -17,10 +17,17 @@ typedef struct LDM_hashTable LDM_hashTable;
|
||||||
|
|
||||||
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase);
|
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase);
|
||||||
|
|
||||||
|
//TODO: unneeded?
|
||||||
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
|
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
|
||||||
const hash_t hash,
|
const hash_t hash,
|
||||||
const U32 checksum);
|
const U32 checksum);
|
||||||
|
|
||||||
|
LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
|
||||||
|
const hash_t hash,
|
||||||
|
const U32 checksum,
|
||||||
|
const BYTE *pIn,
|
||||||
|
int (*isValid)(const BYTE *pIn, const BYTE *pMatch));
|
||||||
|
|
||||||
void HASH_insert(LDM_hashTable *table, const hash_t hash,
|
void HASH_insert(LDM_hashTable *table, const hash_t hash,
|
||||||
const LDM_hashEntry entry);
|
const LDM_hashEntry entry);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue