Add function to get valid entries only from table

dev
Stella Lau 2017-07-17 15:16:58 -07:00
parent 4bb42b02c1
commit 15a041adbf
6 changed files with 70 additions and 48 deletions

View File

@ -25,17 +25,17 @@ LDFLAGS += -lzstd
default: all
all: main-basic main-chaining
all: main-basic main-circular-buffer
main-basic : basic_table.c ldm.c main-ldm.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
main-chaining : chaining_table.c ldm.c main-ldm.c
main-circular-buffer: circular_buffer_table.c ldm.c main-ldm.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
clean:
@rm -f core *.o tmp* result* *.ldm *.ldm.dec \
main-basic main-chaining
main-basic main-circular-buffer
@echo Cleaning completed

View File

@ -27,12 +27,29 @@ LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) {
return table->entries + hash;
}
LDM_hashEntry *HASH_getEntryFromHash(
const LDM_hashTable *table, const hash_t hash, const U32 checksum) {
(void)checksum;
return getBucket(table, hash);
}
LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum,
const BYTE *pIn,
int (*isValid)(const BYTE *pIn, const BYTE *pMatch)) {
LDM_hashEntry *entry = getBucket(table, hash);
(void)checksum;
if ((*isValid)(pIn, entry->offset + table->offsetBase)) {
return entry;
} else {
return NULL;
}
}
void HASH_insert(LDM_hashTable *table,
const hash_t hash, const LDM_hashEntry entry) {
*getBucket(table, hash) = entry;

View File

@ -9,7 +9,7 @@
// refactor code to scale the number of elements appropriately.
// Number of elements per hash bucket.
#define HASH_BUCKET_SIZE_LOG 2 // MAX is 4 for now
#define HASH_BUCKET_SIZE_LOG 1 // MAX is 4 for now
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
struct LDM_hashTable {
@ -44,6 +44,25 @@ static LDM_hashEntry *getLastInsertFromHash(const LDM_hashTable *table,
}
*/
LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum,
const BYTE *pIn,
int (*isValid)(const BYTE *pIn, const BYTE *pMatch)) {
LDM_hashEntry *bucket = getBucket(table, hash);
LDM_hashEntry *cur = bucket;
// TODO: in order of recency?
for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
// CHeck checksum for faster check.
if (cur->checksum == checksum &&
(*isValid)(pIn, cur->offset + table->offsetBase)) {
return cur;
}
}
return NULL;
}
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum) {

View File

@ -5,7 +5,7 @@
#include <string.h>
// Insert every (HASH_ONLY_EVERY + 1) into the hash table.
#define HASH_ONLY_EVERY 0
#define HASH_ONLY_EVERY 31
#define LDM_HASHLOG (LDM_MEMORY_USAGE-2)
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
@ -38,8 +38,6 @@ struct LDM_compressStats {
U32 numCollisions;
U32 numHashInserts;
// U64 numInvalidHashes, numValidHashes; // tmp
U32 offsetHistogram[32];
};
@ -153,45 +151,25 @@ void LDM_printCompressStats(const LDM_compressStats *stats) {
(double) stats->numMatches);
}
printf("\n");
/*
printf("Num invalid hashes, num valid hashes, %llu %llu\n",
stats->numInvalidHashes, stats->numValidHashes);
*/
/*
printf("num collisions, num hash inserts, %% collisions: %u, %u, %.3f\n",
stats->numCollisions, stats->numHashInserts,
stats->numHashInserts == 0 ?
1.0 : (100.0 * (double)stats->numCollisions) /
(double)stats->numHashInserts);
*/
printf("=====================\n");
}
int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) {
/*
if (memcmp(pIn, pMatch, LDM_MIN_MATCH_LENGTH) == 0) {
return 1;
}
return 0;
*/
//TODO: This seems to be faster for some reason?
U32 lengthLeft = LDM_MIN_MATCH_LENGTH;
const BYTE *curIn = pIn;
const BYTE *curMatch = pMatch;
for (; lengthLeft >= 8; lengthLeft -= 8) {
if (MEM_read64(curIn) != MEM_read64(curMatch)) {
if (pIn - pMatch > LDM_WINDOW_SIZE) {
return 0;
}
for (; lengthLeft >= 4; lengthLeft -= 4) {
if (MEM_read32(curIn) != MEM_read32(curMatch)) {
return 0;
}
curIn += 8;
curMatch += 8;
}
if (lengthLeft > 0) {
return (MEM_read32(curIn) == MEM_read32(curMatch));
curIn += 4;
curMatch += 4;
}
return 1;
}
@ -307,8 +285,11 @@ static void putHashOfCurrentPositionFromHash(
// Hash only every HASH_ONLY_EVERY times, based on cctx->ip.
// Note: this works only when cctx->step is 1.
if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) {
/**
const LDM_hashEntry entry = { cctx->ip - cctx->ibase ,
MEM_read32(cctx->ip) };
*/
const LDM_hashEntry entry = { cctx->ip - cctx->ibase, sum };
HASH_insert(cctx->hashTable, hash, entry);
}
@ -438,7 +419,7 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
LDM_hashEntry *entry = NULL;
cctx->nextIp = cctx->ip + cctx->step;
do {
while (entry == NULL) {
hash_t h;
U32 sum;
setNextHash(cctx);
@ -451,17 +432,14 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
return 1;
}
entry = HASH_getEntryFromHash(cctx->hashTable, h, MEM_read32(cctx->ip));
entry = HASH_getValidEntry(cctx->hashTable, h, sum, cctx->ip,
&LDM_isValidMatch);
if (entry != NULL) {
*match = entry->offset + cctx->ibase;
}
putHashOfCurrentPositionFromHash(cctx, h, sum);
} while (entry == NULL ||
(cctx->ip - *match > LDM_WINDOW_SIZE ||
!LDM_isValidMatch(cctx->ip, *match)));
}
setNextHash(cctx);
return 0;
}

View File

@ -11,14 +11,14 @@
#define LDM_OFFSET_SIZE 4
// Defines the size of the hash table.
#define LDM_MEMORY_USAGE 16
#define LDM_MEMORY_USAGE 20
#define LDM_WINDOW_SIZE_LOG 25
#define LDM_WINDOW_SIZE_LOG 30
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
//These should be multiples of four.
#define LDM_MIN_MATCH_LENGTH 1024
#define LDM_HASH_LENGTH 1024
#define LDM_MIN_MATCH_LENGTH 64
#define LDM_HASH_LENGTH 64
typedef struct LDM_compressStats LDM_compressStats;
typedef struct LDM_CCtx LDM_CCtx;
@ -82,7 +82,8 @@ void LDM_outputHashTableOffsetHistogram(const LDM_CCtx *cctx);
void LDM_printCompressStats(const LDM_compressStats *stats);
/**
* Checks whether the LDM_MIN_MATCH_LENGTH bytes from p are the same as the
* LDM_MIN_MATCH_LENGTH bytes from match.
* LDM_MIN_MATCH_LENGTH bytes from match and also if
* pIn - pMatch <= LDM_WINDOW_SIZE.
*
* This assumes LDM_MIN_MATCH_LENGTH is a multiple of four.
*

View File

@ -7,7 +7,7 @@ typedef U32 hash_t;
typedef struct LDM_hashEntry {
U32 offset;
U32 checksum; // Not needed?
U32 checksum;
} LDM_hashEntry;
typedef struct LDM_hashTable LDM_hashTable;
@ -17,10 +17,17 @@ typedef struct LDM_hashTable LDM_hashTable;
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase);
//TODO: unneeded?
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum);
LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum,
const BYTE *pIn,
int (*isValid)(const BYTE *pIn, const BYTE *pMatch));
void HASH_insert(LDM_hashTable *table, const hash_t hash,
const LDM_hashEntry entry);