Make the meaning of LDM_MEMORY_USAGE consistent across tables
parent
fc41a87964
commit
19258f51c1
|
@ -25,7 +25,7 @@ LDFLAGS += -lzstd
|
|||
|
||||
default: all
|
||||
|
||||
all: main-basic main-circular-buffer main-lag
|
||||
all: main-basic main-circular-buffer
|
||||
|
||||
main-basic : basic_table.c ldm.c main-ldm.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
@ -33,11 +33,8 @@ main-basic : basic_table.c ldm.c main-ldm.c
|
|||
main-circular-buffer: circular_buffer_table.c ldm.c main-ldm.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
main-lag: lag_table.c ldm.c main-ldm.c
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
clean:
|
||||
@rm -f core *.o tmp* result* *.ldm *.ldm.dec \
|
||||
main-basic main-circular-buffer main-lag
|
||||
main-basic main-circular-buffer
|
||||
@echo Cleaning completed
|
||||
|
||||
|
|
|
@ -1,9 +1,12 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "ldm.h"
|
||||
#include "ldm_hashtable.h"
|
||||
#include "mem.h"
|
||||
|
||||
#define LDM_HASHLOG ((LDM_MEMORY_USAGE) - 4)
|
||||
|
||||
struct LDM_hashTable {
|
||||
U32 size;
|
||||
LDM_hashEntry *entries;
|
||||
|
@ -46,6 +49,10 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
hash_t HASH_hashU32(U32 value) {
|
||||
return ((value * 2654435761U) >> (32 - LDM_HASHLOG));
|
||||
}
|
||||
|
||||
void HASH_insert(LDM_hashTable *table,
|
||||
const hash_t hash, const LDM_hashEntry entry) {
|
||||
*getBucket(table, hash) = entry;
|
||||
|
|
|
@ -1,33 +1,36 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "ldm.h"
|
||||
#include "ldm_hashtable.h"
|
||||
#include "mem.h"
|
||||
|
||||
//TODO: move def somewhere else.
|
||||
//TODO: memory usage is currently no longer LDM_MEMORY_USAGE.
|
||||
// refactor code to scale the number of elements appropriately.
|
||||
|
||||
// Number of elements per hash bucket.
|
||||
// HASH_BUCKET_SIZE_LOG defined in ldm.h
|
||||
#define HASH_BUCKET_SIZE_LOG 0 // MAX is 4 for now
|
||||
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
|
||||
|
||||
#define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG)
|
||||
|
||||
struct LDM_hashTable {
|
||||
U32 size;
|
||||
U32 size; // Number of buckets
|
||||
U32 maxEntries; // Rename...
|
||||
LDM_hashEntry *entries; // 1-D array for now.
|
||||
|
||||
// Position corresponding to offset=0 in LDM_hashEntry.
|
||||
const BYTE *offsetBase;
|
||||
BYTE *bucketOffsets; // Pointer to current insert position.
|
||||
|
||||
// Last insert was at bucketOffsets - 1?
|
||||
};
|
||||
|
||||
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase) {
|
||||
LDM_hashTable *table = malloc(sizeof(LDM_hashTable));
|
||||
table->size = size;
|
||||
table->entries = calloc(size * HASH_BUCKET_SIZE, sizeof(LDM_hashEntry));
|
||||
table->bucketOffsets = calloc(size, sizeof(BYTE));
|
||||
table->size = size >> HASH_BUCKET_SIZE_LOG;
|
||||
table->maxEntries = size;
|
||||
table->entries = calloc(size, sizeof(LDM_hashEntry));
|
||||
table->bucketOffsets = calloc(size >> HASH_BUCKET_SIZE_LOG, sizeof(BYTE));
|
||||
table->offsetBase = offsetBase;
|
||||
return table;
|
||||
}
|
||||
|
@ -45,11 +48,6 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
|
|||
LDM_hashEntry *cur = bucket;
|
||||
// TODO: in order of recency?
|
||||
for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
|
||||
/*
|
||||
if (cur->checksum == 0 && cur->offset == 0) {
|
||||
return NULL;
|
||||
}
|
||||
*/
|
||||
// Check checksum for faster check.
|
||||
if (cur->checksum == checksum &&
|
||||
(*isValid)(pIn, cur->offset + table->offsetBase)) {
|
||||
|
@ -59,6 +57,11 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
hash_t HASH_hashU32(U32 value) {
|
||||
return ((value * 2654435761U) >> (32 - LDM_HASHLOG));
|
||||
}
|
||||
|
||||
|
||||
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
|
||||
const hash_t hash,
|
||||
const U32 checksum) {
|
||||
|
@ -82,7 +85,7 @@ void HASH_insert(LDM_hashTable *table,
|
|||
}
|
||||
|
||||
U32 HASH_getSize(const LDM_hashTable *table) {
|
||||
return table->size * HASH_BUCKET_SIZE;
|
||||
return table->size;
|
||||
}
|
||||
|
||||
void HASH_destroyTable(LDM_hashTable *table) {
|
||||
|
@ -101,7 +104,8 @@ void HASH_outputTableOccupancy(const LDM_hashTable *table) {
|
|||
}
|
||||
}
|
||||
|
||||
printf("Num buckets, bucket size: %d, %d\n", table->size, HASH_BUCKET_SIZE);
|
||||
printf("Hash table size, empty slots, %% empty: %u, %u, %.3f\n",
|
||||
HASH_getSize(table), ctr,
|
||||
100.0 * (double)(ctr) / (double)HASH_getSize(table));
|
||||
table->maxEntries, ctr,
|
||||
100.0 * (double)(ctr) / table->maxEntries);
|
||||
}
|
||||
|
|
|
@ -4,12 +4,16 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// Insert every (HASH_ONLY_EVERY + 1) into the hash table.
|
||||
#define HASH_ONLY_EVERY 15
|
||||
|
||||
#define LDM_HASHLOG (LDM_MEMORY_USAGE-2)
|
||||
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
|
||||
//#define LDM_HASH_ENTRY_SIZE 4
|
||||
#define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2)
|
||||
#define LDM_HASHTABLESIZE_U64 ((LDM_HASHTABLESIZE) >> 4)
|
||||
|
||||
// Insert every (HASH_ONLY_EVERY + 1) into the hash table.
|
||||
#define HASH_ONLY_EVERY_LOG (LDM_WINDOW_SIZE_LOG-((LDM_MEMORY_USAGE) - 4))
|
||||
#define HASH_ONLY_EVERY ((1 << HASH_ONLY_EVERY_LOG) - 1)
|
||||
|
||||
|
||||
#define ML_BITS 4
|
||||
#define ML_MASK ((1U<<ML_BITS)-1)
|
||||
|
@ -17,13 +21,13 @@
|
|||
#define RUN_MASK ((1U<<RUN_BITS)-1)
|
||||
|
||||
#define COMPUTE_STATS
|
||||
#define OUTPUT_CONFIGURATION
|
||||
#define CHECKSUM_CHAR_OFFSET 10
|
||||
|
||||
#define LAG 0
|
||||
//#define LDM_LAG 0
|
||||
|
||||
//#define HASH_CHECK
|
||||
//#define RUN_CHECKS
|
||||
//#define LDM_DEBUG
|
||||
|
||||
#include "ldm.h"
|
||||
#include "ldm_hashtable.h"
|
||||
|
@ -187,7 +191,8 @@ int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) {
|
|||
* of the hash table.
|
||||
*/
|
||||
static hash_t checksumToHash(U32 sum) {
|
||||
return ((sum * 2654435761U) >> (32 - LDM_HASHLOG));
|
||||
return HASH_hashU32(sum);
|
||||
// return ((sum * 2654435761U) >> (32 - LDM_HASHLOG));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -261,9 +266,9 @@ static void setNextHash(LDM_CCtx *cctx) {
|
|||
cctx->nextPosHashed = cctx->nextIp;
|
||||
cctx->nextHash = checksumToHash(cctx->nextSum);
|
||||
|
||||
#if LAG
|
||||
if (cctx->ip - cctx->ibase > LAG) {
|
||||
// printf("LAG %zu\n", cctx->ip - cctx->lagIp);
|
||||
#if LDM_LAG
|
||||
// printf("LDM_LAG %zu\n", cctx->ip - cctx->lagIp);
|
||||
if (cctx->ip - cctx->ibase > LDM_LAG) {
|
||||
cctx->lagSum = updateChecksum(
|
||||
cctx->lagSum, LDM_HASH_LENGTH,
|
||||
cctx->lagIp[0], cctx->lagIp[LDM_HASH_LENGTH]);
|
||||
|
@ -296,7 +301,7 @@ static void putHashOfCurrentPositionFromHash(
|
|||
const LDM_hashEntry entry = { cctx->ip - cctx->ibase ,
|
||||
MEM_read32(cctx->ip) };
|
||||
*/
|
||||
#if LAG
|
||||
#if LDM_LAG
|
||||
// TODO: off by 1, but whatever
|
||||
if (cctx->lagIp - cctx->ibase > 0) {
|
||||
const LDM_hashEntry entry = { cctx->lagIp - cctx->ibase, cctx->lagSum };
|
||||
|
@ -364,6 +369,18 @@ U32 LDM_countMatchLength(const BYTE *pIn, const BYTE *pMatch,
|
|||
return (U32)(pIn - pStart);
|
||||
}
|
||||
|
||||
void LDM_outputConfiguration(void) {
|
||||
printf("=====================\n");
|
||||
printf("Configuration\n");
|
||||
printf("Window size log: %d\n", LDM_WINDOW_SIZE_LOG);
|
||||
printf("Min match, hash length: %d, %d\n",
|
||||
LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH);
|
||||
printf("LDM_MEMORY_USAGE: %d\n", LDM_MEMORY_USAGE);
|
||||
printf("HASH_ONLY_EVERY: %d\n", HASH_ONLY_EVERY);
|
||||
printf("LDM_LAG %d\n", LDM_LAG);
|
||||
printf("=====================\n");
|
||||
}
|
||||
|
||||
void LDM_readHeader(const void *src, U64 *compressedSize,
|
||||
U64 *decompressedSize) {
|
||||
const BYTE *ip = (const BYTE *)src;
|
||||
|
@ -392,12 +409,8 @@ void LDM_initializeCCtx(LDM_CCtx *cctx,
|
|||
cctx->anchor = cctx->ibase;
|
||||
|
||||
memset(&(cctx->stats), 0, sizeof(cctx->stats));
|
||||
cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U32, cctx->ibase);
|
||||
cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U64, cctx->ibase);
|
||||
|
||||
//HASH_initializeTable(cctx->hashTable, LDM_HASHTABLESIZE_U32);
|
||||
|
||||
// calloc(LDM_HASHTABLESIZE_U32, sizeof(LDM_hashEntry));
|
||||
// memset(cctx->hashTable, 0, sizeof(cctx->hashTable));
|
||||
cctx->stats.minOffset = UINT_MAX;
|
||||
cctx->stats.windowSizeLog = LDM_WINDOW_SIZE_LOG;
|
||||
cctx->stats.hashTableSizeLog = LDM_MEMORY_USAGE;
|
||||
|
@ -520,17 +533,19 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
|||
void *dst, size_t maxDstSize) {
|
||||
LDM_CCtx cctx;
|
||||
const BYTE *match = NULL;
|
||||
// printf("TST: %d\n", LDM_WINDOW_SIZE / LDM_HASHTABLESIZE_U64);
|
||||
printf("HASH LOG: %d\n", HASH_ONLY_EVERY_LOG);
|
||||
|
||||
LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize);
|
||||
|
||||
/* Hash the first position and put it into the hash table. */
|
||||
LDM_putHashOfCurrentPosition(&cctx);
|
||||
|
||||
#if LAG
|
||||
#if LDM_LAG
|
||||
cctx.lagIp = cctx.ip;
|
||||
cctx.lagHash = cctx.lastHash;
|
||||
cctx.lagSum = cctx.lastSum;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Find a match.
|
||||
* If no more matches can be found (i.e. the length of the remaining input
|
||||
|
@ -542,6 +557,7 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
|||
cctx.stats.numMatches++;
|
||||
#endif
|
||||
|
||||
// printf("HERE %zu\n", cctx.ip - cctx.ibase);
|
||||
/**
|
||||
* Catch up: look back to extend the match backwards from the found match.
|
||||
*/
|
||||
|
|
|
@ -10,15 +10,20 @@
|
|||
#define LDM_HEADER_SIZE ((LDM_COMPRESS_SIZE)+(LDM_DECOMPRESS_SIZE))
|
||||
#define LDM_OFFSET_SIZE 4
|
||||
|
||||
// Defines the size of the hash table (currently the number of elements).
|
||||
#define LDM_MEMORY_USAGE 12
|
||||
// Defines the size of the hash table.
|
||||
// Currently this should be less than WINDOW_SIZE_LOG + 4?
|
||||
#define LDM_MEMORY_USAGE 24
|
||||
|
||||
#define LDM_WINDOW_SIZE_LOG 30
|
||||
//#define LDM_LAG (1 << 23)
|
||||
//#define LDM_LAG (1 << 20)
|
||||
#define LDM_LAG 0
|
||||
|
||||
#define LDM_WINDOW_SIZE_LOG 28
|
||||
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
|
||||
|
||||
//These should be multiples of four.
|
||||
#define LDM_MIN_MATCH_LENGTH 64
|
||||
#define LDM_HASH_LENGTH 64
|
||||
//These should be multiples of four (and perhaps set to the same values?).
|
||||
#define LDM_MIN_MATCH_LENGTH 512
|
||||
#define LDM_HASH_LENGTH 512
|
||||
|
||||
typedef struct LDM_compressStats LDM_compressStats;
|
||||
typedef struct LDM_CCtx LDM_CCtx;
|
||||
|
@ -48,7 +53,7 @@ typedef struct LDM_DCtx LDM_DCtx;
|
|||
* The lower four bits of the token encode the match length. With additional
|
||||
* bytes added similarly to the additional literal length bytes after the offset.
|
||||
*
|
||||
* The last sequence is incomplete and stops right after the lieterals.
|
||||
* The last sequence is incomplete and stops right after the literals.
|
||||
*
|
||||
*/
|
||||
size_t LDM_compress(const void *src, size_t srcSize,
|
||||
|
@ -142,6 +147,8 @@ void LDM_initializeDCtx(LDM_DCtx *dctx,
|
|||
void LDM_readHeader(const void *src, U64 *compressedSize,
|
||||
U64 *decompressedSize);
|
||||
|
||||
void LDM_outputConfiguration(void);
|
||||
|
||||
void LDM_test(void);
|
||||
|
||||
#endif /* LDM_H */
|
||||
|
|
|
@ -42,6 +42,8 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
|
|||
const BYTE *pIn,
|
||||
int (*isValid)(const BYTE *pIn, const BYTE *pMatch));
|
||||
|
||||
hash_t HASH_hashU32(U32 value);
|
||||
|
||||
/**
|
||||
* Insert an LDM_hashEntry into the bucket corresponding to hash.
|
||||
*/
|
||||
|
@ -61,5 +63,4 @@ void HASH_destroyTable(LDM_hashTable *table);
|
|||
*/
|
||||
void HASH_outputTableOccupancy(const LDM_hashTable *hashTable);
|
||||
|
||||
|
||||
#endif /* LDM_HASHTABLE_H */
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
/* Compress file given by fname and output to oname.
|
||||
* Returns 0 if successful, error code otherwise.
|
||||
*
|
||||
* TODO: This currently seg faults if the compressed size is > the decompress
|
||||
* TODO: This might seg fault if the compressed size is > the decompress
|
||||
* size due to the mmapping and output file size allocated to be the input size.
|
||||
* The compress function should check before writing or buffer writes.
|
||||
*/
|
||||
|
@ -28,6 +28,8 @@ static int compress(const char *fname, const char *oname) {
|
|||
char *src, *dst;
|
||||
size_t maxCompressedSize, compressedSize;
|
||||
|
||||
struct timeval tv1, tv2;
|
||||
|
||||
/* Open the input file. */
|
||||
if ((fdin = open(fname, O_RDONLY)) < 0) {
|
||||
perror("Error in file opening");
|
||||
|
@ -46,7 +48,10 @@ static int compress(const char *fname, const char *oname) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
maxCompressedSize = statbuf.st_size + LDM_HEADER_SIZE;
|
||||
maxCompressedSize = (statbuf.st_size + LDM_HEADER_SIZE);
|
||||
// Handle case where compressed size is > decompressed size.
|
||||
// The compress function should check before writing or buffer writes.
|
||||
maxCompressedSize += statbuf.st_size / 255;
|
||||
|
||||
/* Go to the location corresponding to the last byte. */
|
||||
/* TODO: fallocate? */
|
||||
|
@ -74,10 +79,12 @@ static int compress(const char *fname, const char *oname) {
|
|||
perror("mmap error for output");
|
||||
return 1;
|
||||
}
|
||||
gettimeofday(&tv1, NULL);
|
||||
|
||||
compressedSize = LDM_HEADER_SIZE +
|
||||
LDM_compress(src, statbuf.st_size,
|
||||
dst + LDM_HEADER_SIZE, maxCompressedSize);
|
||||
gettimeofday(&tv2, NULL);
|
||||
|
||||
// Write compress and decompress size to header
|
||||
// TODO: should depend on LDM_DECOMPRESS_SIZE write32
|
||||
|
@ -96,6 +103,14 @@ static int compress(const char *fname, const char *oname) {
|
|||
(unsigned)statbuf.st_size, (unsigned)compressedSize, oname,
|
||||
(double)compressedSize / (statbuf.st_size) * 100);
|
||||
|
||||
printf("Total compress time = %.3f seconds, Average compression speed: %.3f MB/s\n",
|
||||
(double) (tv2.tv_usec - tv1.tv_usec) / 1000000 +
|
||||
(double) (tv2.tv_sec - tv1.tv_sec),
|
||||
((double)statbuf.st_size / (double) (1 << 20)) /
|
||||
((double) (tv2.tv_usec - tv1.tv_usec) / 1000000 +
|
||||
(double) (tv2.tv_sec - tv1.tv_sec)));
|
||||
|
||||
|
||||
// Close files.
|
||||
close(fdin);
|
||||
close(fdout);
|
||||
|
@ -234,16 +249,10 @@ int main(int argc, const char *argv[]) {
|
|||
|
||||
/* Compress */
|
||||
{
|
||||
struct timeval tv1, tv2;
|
||||
gettimeofday(&tv1, NULL);
|
||||
if (compress(inpFilename, ldmFilename)) {
|
||||
printf("Compress error");
|
||||
return 1;
|
||||
}
|
||||
gettimeofday(&tv2, NULL);
|
||||
printf("Total compress time = %f seconds\n",
|
||||
(double) (tv2.tv_usec - tv1.tv_usec) / 1000000 +
|
||||
(double) (tv2.tv_sec - tv1.tv_sec));
|
||||
}
|
||||
|
||||
/* Decompress */
|
||||
|
|
Loading…
Reference in New Issue