Add basic chaining table
parent
ca300ce6e0
commit
4bb42b02c1
|
@ -25,13 +25,17 @@ LDFLAGS += -lzstd
|
||||||
|
|
||||||
default: all
|
default: all
|
||||||
|
|
||||||
all: main-ldm
|
all: main-basic main-chaining
|
||||||
|
|
||||||
main-ldm : basic_table.c ldm.c main-ldm.c
|
main-basic : basic_table.c ldm.c main-ldm.c
|
||||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
||||||
|
|
||||||
|
main-chaining : chaining_table.c ldm.c main-ldm.c
|
||||||
|
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
||||||
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
@rm -f core *.o tmp* result* *.ldm *.ldm.dec \
|
@rm -f core *.o tmp* result* *.ldm *.ldm.dec \
|
||||||
main main-ldm
|
main-basic main-chaining
|
||||||
@echo Cleaning completed
|
@echo Cleaning completed
|
||||||
|
|
||||||
|
|
|
@ -2,16 +2,19 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "ldm_hashtable.h"
|
#include "ldm_hashtable.h"
|
||||||
|
#include "mem.h"
|
||||||
|
|
||||||
struct LDM_hashTable {
|
struct LDM_hashTable {
|
||||||
U32 size;
|
U32 size;
|
||||||
LDM_hashEntry *entries;
|
LDM_hashEntry *entries;
|
||||||
|
const BYTE *offsetBase;
|
||||||
};
|
};
|
||||||
|
|
||||||
LDM_hashTable *HASH_createTable(U32 size) {
|
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase) {
|
||||||
LDM_hashTable *table = malloc(sizeof(LDM_hashTable));
|
LDM_hashTable *table = malloc(sizeof(LDM_hashTable));
|
||||||
table->size = size;
|
table->size = size;
|
||||||
table->entries = calloc(size, sizeof(LDM_hashEntry));
|
table->entries = calloc(size, sizeof(LDM_hashEntry));
|
||||||
|
table->offsetBase = offsetBase;
|
||||||
return table;
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,15 +23,19 @@ void HASH_initializeTable(LDM_hashTable *table, U32 size) {
|
||||||
table->entries = calloc(size, sizeof(LDM_hashEntry));
|
table->entries = calloc(size, sizeof(LDM_hashEntry));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) {
|
||||||
|
return table->entries + hash;
|
||||||
|
}
|
||||||
|
|
||||||
LDM_hashEntry *HASH_getEntryFromHash(
|
LDM_hashEntry *HASH_getEntryFromHash(
|
||||||
const LDM_hashTable *table, const hash_t hash) {
|
const LDM_hashTable *table, const hash_t hash, const U32 checksum) {
|
||||||
return &(table->entries[hash]);
|
(void)checksum;
|
||||||
|
return getBucket(table, hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HASH_insert(LDM_hashTable *table,
|
void HASH_insert(LDM_hashTable *table,
|
||||||
const hash_t hash, const LDM_hashEntry entry) {
|
const hash_t hash, const LDM_hashEntry entry) {
|
||||||
*HASH_getEntryFromHash(table, hash) = entry;
|
*getBucket(table, hash) = entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 HASH_getSize(const LDM_hashTable *table) {
|
U32 HASH_getSize(const LDM_hashTable *table) {
|
||||||
|
@ -44,7 +51,7 @@ void HASH_outputTableOccupancy(const LDM_hashTable *hashTable) {
|
||||||
U32 i = 0;
|
U32 i = 0;
|
||||||
U32 ctr = 0;
|
U32 ctr = 0;
|
||||||
for (; i < HASH_getSize(hashTable); i++) {
|
for (; i < HASH_getSize(hashTable); i++) {
|
||||||
if (HASH_getEntryFromHash(hashTable, i)->offset == 0) {
|
if (getBucket(hashTable, i)->offset == 0) {
|
||||||
ctr++;
|
ctr++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -52,5 +59,3 @@ void HASH_outputTableOccupancy(const LDM_hashTable *hashTable) {
|
||||||
HASH_getSize(hashTable), ctr,
|
HASH_getSize(hashTable), ctr,
|
||||||
100.0 * (double)(ctr) / (double)HASH_getSize(hashTable));
|
100.0 * (double)(ctr) / (double)HASH_getSize(hashTable));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,92 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "ldm_hashtable.h"
|
||||||
|
#include "mem.h"
|
||||||
|
|
||||||
|
//TODO: move def somewhere else.
|
||||||
|
//TODO: memory usage is currently no longer LDM_MEMORY_USAGE.
|
||||||
|
// refactor code to scale the number of elements appropriately.
|
||||||
|
|
||||||
|
// Number of elements per hash bucket.
|
||||||
|
#define HASH_BUCKET_SIZE_LOG 2 // MAX is 4 for now
|
||||||
|
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
|
||||||
|
|
||||||
|
struct LDM_hashTable {
|
||||||
|
U32 size;
|
||||||
|
LDM_hashEntry *entries; // 1-D array for now.
|
||||||
|
|
||||||
|
// Position corresponding to offset=0 in LDM_hashEntry.
|
||||||
|
const BYTE *offsetBase;
|
||||||
|
BYTE *bucketOffsets; // Pointer to current insert position.
|
||||||
|
// Last insert was at bucketOffsets - 1?
|
||||||
|
};
|
||||||
|
|
||||||
|
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase) {
|
||||||
|
LDM_hashTable *table = malloc(sizeof(LDM_hashTable));
|
||||||
|
table->size = size;
|
||||||
|
table->entries = calloc(size * HASH_BUCKET_SIZE, sizeof(LDM_hashEntry));
|
||||||
|
table->bucketOffsets = calloc(size, sizeof(BYTE));
|
||||||
|
table->offsetBase = offsetBase;
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
|
||||||
|
static LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) {
|
||||||
|
return table->entries + (hash << HASH_BUCKET_SIZE_LOG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
static LDM_hashEntry *getLastInsertFromHash(const LDM_hashTable *table,
|
||||||
|
const hash_t hash) {
|
||||||
|
LDM_hashEntry *bucket = getBucket(table, hash);
|
||||||
|
BYTE offset = (table->bucketOffsets[hash] - 1) & (HASH_BUCKET_SIZE - 1);
|
||||||
|
return bucket + offset;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
|
||||||
|
const hash_t hash,
|
||||||
|
const U32 checksum) {
|
||||||
|
// Loop through bucket.
|
||||||
|
// TODO: in order of recency???
|
||||||
|
LDM_hashEntry *bucket = getBucket(table, hash);
|
||||||
|
LDM_hashEntry *cur = bucket;
|
||||||
|
for(; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
|
||||||
|
if (cur->checksum == checksum) {
|
||||||
|
return cur;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void HASH_insert(LDM_hashTable *table,
|
||||||
|
const hash_t hash, const LDM_hashEntry entry) {
|
||||||
|
*(getBucket(table, hash) + table->bucketOffsets[hash]) = entry;
|
||||||
|
table->bucketOffsets[hash]++;
|
||||||
|
table->bucketOffsets[hash] &= HASH_BUCKET_SIZE - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
U32 HASH_getSize(const LDM_hashTable *table) {
|
||||||
|
return table->size * HASH_BUCKET_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
void HASH_destroyTable(LDM_hashTable *table) {
|
||||||
|
free(table->entries);
|
||||||
|
free(table->bucketOffsets);
|
||||||
|
free(table);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HASH_outputTableOccupancy(const LDM_hashTable *table) {
|
||||||
|
U32 ctr = 0;
|
||||||
|
LDM_hashEntry *cur = table->entries;
|
||||||
|
LDM_hashEntry *end = table->entries + (table->size * HASH_BUCKET_SIZE);
|
||||||
|
for (; cur < end; ++cur) {
|
||||||
|
if (cur->offset == 0) {
|
||||||
|
ctr++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Hash table size, empty slots, %% empty: %u, %u, %.3f\n",
|
||||||
|
HASH_getSize(table), ctr,
|
||||||
|
100.0 * (double)(ctr) / (double)HASH_getSize(table));
|
||||||
|
}
|
|
@ -20,9 +20,8 @@
|
||||||
#define CHECKSUM_CHAR_OFFSET 10
|
#define CHECKSUM_CHAR_OFFSET 10
|
||||||
//#define RUN_CHECKS
|
//#define RUN_CHECKS
|
||||||
//#define LDM_DEBUG
|
//#define LDM_DEBUG
|
||||||
//
|
|
||||||
#include "ldm.h"
|
|
||||||
|
|
||||||
|
#include "ldm.h"
|
||||||
#include "ldm_hashtable.h"
|
#include "ldm_hashtable.h"
|
||||||
|
|
||||||
// TODO: Scanning speed
|
// TODO: Scanning speed
|
||||||
|
@ -98,6 +97,7 @@ static int intLog2(U32 x) {
|
||||||
|
|
||||||
// TODO: Maybe we would eventually prefer to have linear rather than
|
// TODO: Maybe we would eventually prefer to have linear rather than
|
||||||
// exponential buckets.
|
// exponential buckets.
|
||||||
|
/**
|
||||||
void HASH_outputTableOffsetHistogram(const LDM_CCtx *cctx) {
|
void HASH_outputTableOffsetHistogram(const LDM_CCtx *cctx) {
|
||||||
U32 i = 0;
|
U32 i = 0;
|
||||||
int buckets[32] = { 0 };
|
int buckets[32] = { 0 };
|
||||||
|
@ -119,6 +119,7 @@ void HASH_outputTableOffsetHistogram(const LDM_CCtx *cctx) {
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
void LDM_printCompressStats(const LDM_compressStats *stats) {
|
void LDM_printCompressStats(const LDM_compressStats *stats) {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
@ -127,9 +128,11 @@ void LDM_printCompressStats(const LDM_compressStats *stats) {
|
||||||
//TODO: compute percentage matched?
|
//TODO: compute percentage matched?
|
||||||
printf("Window size, hash table size (bytes): 2^%u, 2^%u\n",
|
printf("Window size, hash table size (bytes): 2^%u, 2^%u\n",
|
||||||
stats->windowSizeLog, stats->hashTableSizeLog);
|
stats->windowSizeLog, stats->hashTableSizeLog);
|
||||||
printf("num matches, total match length: %u, %llu\n",
|
printf("num matches, total match length, %% matched: %u, %llu, %.3f\n",
|
||||||
stats->numMatches,
|
stats->numMatches,
|
||||||
stats->totalMatchLength);
|
stats->totalMatchLength,
|
||||||
|
100.0 * (double)stats->totalMatchLength /
|
||||||
|
(double)(stats->totalMatchLength + stats->totalLiteralLength));
|
||||||
printf("avg match length: %.1f\n", ((double)stats->totalMatchLength) /
|
printf("avg match length: %.1f\n", ((double)stats->totalMatchLength) /
|
||||||
(double)stats->numMatches);
|
(double)stats->numMatches);
|
||||||
printf("avg literal length, total literalLength: %.1f, %llu\n",
|
printf("avg literal length, total literalLength: %.1f, %llu\n",
|
||||||
|
@ -155,11 +158,13 @@ void LDM_printCompressStats(const LDM_compressStats *stats) {
|
||||||
printf("Num invalid hashes, num valid hashes, %llu %llu\n",
|
printf("Num invalid hashes, num valid hashes, %llu %llu\n",
|
||||||
stats->numInvalidHashes, stats->numValidHashes);
|
stats->numInvalidHashes, stats->numValidHashes);
|
||||||
*/
|
*/
|
||||||
|
/*
|
||||||
printf("num collisions, num hash inserts, %% collisions: %u, %u, %.3f\n",
|
printf("num collisions, num hash inserts, %% collisions: %u, %u, %.3f\n",
|
||||||
stats->numCollisions, stats->numHashInserts,
|
stats->numCollisions, stats->numHashInserts,
|
||||||
stats->numHashInserts == 0 ?
|
stats->numHashInserts == 0 ?
|
||||||
1.0 : (100.0 * (double)stats->numCollisions) /
|
1.0 : (100.0 * (double)stats->numCollisions) /
|
||||||
(double)stats->numHashInserts);
|
(double)stats->numHashInserts);
|
||||||
|
*/
|
||||||
printf("=====================\n");
|
printf("=====================\n");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -173,6 +178,7 @@ int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
//TODO: This seems to be faster for some reason?
|
//TODO: This seems to be faster for some reason?
|
||||||
|
|
||||||
U32 lengthLeft = LDM_MIN_MATCH_LENGTH;
|
U32 lengthLeft = LDM_MIN_MATCH_LENGTH;
|
||||||
const BYTE *curIn = pIn;
|
const BYTE *curIn = pIn;
|
||||||
const BYTE *curMatch = pMatch;
|
const BYTE *curMatch = pMatch;
|
||||||
|
@ -286,8 +292,9 @@ static void setNextHash(LDM_CCtx *cctx) {
|
||||||
|
|
||||||
static void putHashOfCurrentPositionFromHash(
|
static void putHashOfCurrentPositionFromHash(
|
||||||
LDM_CCtx *cctx, hash_t hash, U32 sum) {
|
LDM_CCtx *cctx, hash_t hash, U32 sum) {
|
||||||
|
/*
|
||||||
#ifdef COMPUTE_STATS
|
#ifdef COMPUTE_STATS
|
||||||
if (cctx->stats.numHashInserts < LDM_HASHTABLESIZE_U32) {
|
if (cctx->stats.numHashInserts < HASH_getSize(cctx->hashTable)) {
|
||||||
U32 offset = HASH_getEntryFromHash(cctx->hashTable, hash)->offset;
|
U32 offset = HASH_getEntryFromHash(cctx->hashTable, hash)->offset;
|
||||||
cctx->stats.numHashInserts++;
|
cctx->stats.numHashInserts++;
|
||||||
if (offset != 0 && !LDM_isValidMatch(cctx->ip, offset + cctx->ibase)) {
|
if (offset != 0 && !LDM_isValidMatch(cctx->ip, offset + cctx->ibase)) {
|
||||||
|
@ -295,11 +302,13 @@ static void putHashOfCurrentPositionFromHash(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
*/
|
||||||
|
|
||||||
// Hash only every HASH_ONLY_EVERY times, based on cctx->ip.
|
// Hash only every HASH_ONLY_EVERY times, based on cctx->ip.
|
||||||
// Note: this works only when cctx->step is 1.
|
// Note: this works only when cctx->step is 1.
|
||||||
if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) {
|
if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) {
|
||||||
const LDM_hashEntry entry = { cctx->ip - cctx->ibase };
|
const LDM_hashEntry entry = { cctx->ip - cctx->ibase ,
|
||||||
|
MEM_read32(cctx->ip) };
|
||||||
HASH_insert(cctx->hashTable, hash, entry);
|
HASH_insert(cctx->hashTable, hash, entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -393,7 +402,7 @@ void LDM_initializeCCtx(LDM_CCtx *cctx,
|
||||||
cctx->anchor = cctx->ibase;
|
cctx->anchor = cctx->ibase;
|
||||||
|
|
||||||
memset(&(cctx->stats), 0, sizeof(cctx->stats));
|
memset(&(cctx->stats), 0, sizeof(cctx->stats));
|
||||||
cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U32);
|
cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U32, cctx->ibase);
|
||||||
|
|
||||||
//HASH_initializeTable(cctx->hashTable, LDM_HASHTABLESIZE_U32);
|
//HASH_initializeTable(cctx->hashTable, LDM_HASHTABLESIZE_U32);
|
||||||
|
|
||||||
|
@ -425,12 +434,13 @@ void LDM_destroyCCtx(LDM_CCtx *cctx) {
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
|
static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
|
||||||
|
|
||||||
|
LDM_hashEntry *entry = NULL;
|
||||||
cctx->nextIp = cctx->ip + cctx->step;
|
cctx->nextIp = cctx->ip + cctx->step;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
hash_t h;
|
hash_t h;
|
||||||
U32 sum;
|
U32 sum;
|
||||||
LDM_hashEntry *entry;
|
|
||||||
setNextHash(cctx);
|
setNextHash(cctx);
|
||||||
h = cctx->nextHash;
|
h = cctx->nextHash;
|
||||||
sum = cctx->nextSum;
|
sum = cctx->nextSum;
|
||||||
|
@ -441,13 +451,17 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
entry = HASH_getEntryFromHash(cctx->hashTable, h);
|
entry = HASH_getEntryFromHash(cctx->hashTable, h, MEM_read32(cctx->ip));
|
||||||
*match = entry->offset + cctx->ibase;
|
|
||||||
|
if (entry != NULL) {
|
||||||
|
*match = entry->offset + cctx->ibase;
|
||||||
|
}
|
||||||
|
|
||||||
putHashOfCurrentPositionFromHash(cctx, h, sum);
|
putHashOfCurrentPositionFromHash(cctx, h, sum);
|
||||||
|
|
||||||
} while (cctx->ip - *match > LDM_WINDOW_SIZE ||
|
} while (entry == NULL ||
|
||||||
!LDM_isValidMatch(cctx->ip, *match));
|
(cctx->ip - *match > LDM_WINDOW_SIZE ||
|
||||||
|
!LDM_isValidMatch(cctx->ip, *match)));
|
||||||
setNextHash(cctx);
|
setNextHash(cctx);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -510,7 +524,7 @@ void LDM_outputBlock(LDM_CCtx *cctx,
|
||||||
size_t LDM_compress(const void *src, size_t srcSize,
|
size_t LDM_compress(const void *src, size_t srcSize,
|
||||||
void *dst, size_t maxDstSize) {
|
void *dst, size_t maxDstSize) {
|
||||||
LDM_CCtx cctx;
|
LDM_CCtx cctx;
|
||||||
const BYTE *match;
|
const BYTE *match = NULL;
|
||||||
LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize);
|
LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize);
|
||||||
|
|
||||||
/* Hash the first position and put it into the hash table. */
|
/* Hash the first position and put it into the hash table. */
|
||||||
|
|
|
@ -17,8 +17,8 @@
|
||||||
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
|
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
|
||||||
|
|
||||||
//These should be multiples of four.
|
//These should be multiples of four.
|
||||||
#define LDM_MIN_MATCH_LENGTH 4
|
#define LDM_MIN_MATCH_LENGTH 1024
|
||||||
#define LDM_HASH_LENGTH 4
|
#define LDM_HASH_LENGTH 1024
|
||||||
|
|
||||||
typedef struct LDM_compressStats LDM_compressStats;
|
typedef struct LDM_compressStats LDM_compressStats;
|
||||||
typedef struct LDM_CCtx LDM_CCtx;
|
typedef struct LDM_CCtx LDM_CCtx;
|
||||||
|
|
|
@ -7,6 +7,7 @@ typedef U32 hash_t;
|
||||||
|
|
||||||
typedef struct LDM_hashEntry {
|
typedef struct LDM_hashEntry {
|
||||||
U32 offset;
|
U32 offset;
|
||||||
|
U32 checksum; // Not needed?
|
||||||
} LDM_hashEntry;
|
} LDM_hashEntry;
|
||||||
|
|
||||||
typedef struct LDM_hashTable LDM_hashTable;
|
typedef struct LDM_hashTable LDM_hashTable;
|
||||||
|
@ -14,10 +15,11 @@ typedef struct LDM_hashTable LDM_hashTable;
|
||||||
// TODO: rename functions
|
// TODO: rename functions
|
||||||
// TODO: comments
|
// TODO: comments
|
||||||
|
|
||||||
LDM_hashTable *HASH_createTable(U32 size);
|
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase);
|
||||||
|
|
||||||
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
|
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
|
||||||
const hash_t hash);
|
const hash_t hash,
|
||||||
|
const U32 checksum);
|
||||||
|
|
||||||
void HASH_insert(LDM_hashTable *table, const hash_t hash,
|
void HASH_insert(LDM_hashTable *table, const hash_t hash,
|
||||||
const LDM_hashEntry entry);
|
const LDM_hashEntry entry);
|
||||||
|
|
Loading…
Reference in New Issue