Make the meaning of LDM_MEMORY_USAGE consistent across tables

dev
Stella Lau 2017-07-18 14:25:39 -07:00
parent fc41a87964
commit 19258f51c1
7 changed files with 94 additions and 53 deletions

View File

@ -25,7 +25,7 @@ LDFLAGS += -lzstd
default: all
all: main-basic main-circular-buffer main-lag
all: main-basic main-circular-buffer
main-basic : basic_table.c ldm.c main-ldm.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
@ -33,11 +33,8 @@ main-basic : basic_table.c ldm.c main-ldm.c
main-circular-buffer: circular_buffer_table.c ldm.c main-ldm.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
main-lag: lag_table.c ldm.c main-ldm.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
clean:
@rm -f core *.o tmp* result* *.ldm *.ldm.dec \
main-basic main-circular-buffer main-lag
main-basic main-circular-buffer
@echo Cleaning completed

View File

@ -1,9 +1,12 @@
#include <stdlib.h>
#include <stdio.h>
#include "ldm.h"
#include "ldm_hashtable.h"
#include "mem.h"
#define LDM_HASHLOG ((LDM_MEMORY_USAGE) - 4)
struct LDM_hashTable {
U32 size;
LDM_hashEntry *entries;
@ -46,6 +49,10 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
return NULL;
}
hash_t HASH_hashU32(U32 value) {
return ((value * 2654435761U) >> (32 - LDM_HASHLOG));
}
void HASH_insert(LDM_hashTable *table,
const hash_t hash, const LDM_hashEntry entry) {
*getBucket(table, hash) = entry;

View File

@ -1,33 +1,36 @@
#include <stdlib.h>
#include <stdio.h>
#include "ldm.h"
#include "ldm_hashtable.h"
#include "mem.h"
//TODO: move def somewhere else.
//TODO: memory usage is currently no longer LDM_MEMORY_USAGE.
// refactor code to scale the number of elements appropriately.
// Number of elements per hash bucket.
// HASH_BUCKET_SIZE_LOG defined in ldm.h
#define HASH_BUCKET_SIZE_LOG 0 // MAX is 4 for now
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
#define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG)
struct LDM_hashTable {
U32 size;
U32 size; // Number of buckets
U32 maxEntries; // Rename...
LDM_hashEntry *entries; // 1-D array for now.
// Position corresponding to offset=0 in LDM_hashEntry.
const BYTE *offsetBase;
BYTE *bucketOffsets; // Pointer to current insert position.
// Last insert was at bucketOffsets - 1?
};
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase) {
LDM_hashTable *table = malloc(sizeof(LDM_hashTable));
table->size = size;
table->entries = calloc(size * HASH_BUCKET_SIZE, sizeof(LDM_hashEntry));
table->bucketOffsets = calloc(size, sizeof(BYTE));
table->size = size >> HASH_BUCKET_SIZE_LOG;
table->maxEntries = size;
table->entries = calloc(size, sizeof(LDM_hashEntry));
table->bucketOffsets = calloc(size >> HASH_BUCKET_SIZE_LOG, sizeof(BYTE));
table->offsetBase = offsetBase;
return table;
}
@ -45,11 +48,6 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
LDM_hashEntry *cur = bucket;
// TODO: in order of recency?
for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
/*
if (cur->checksum == 0 && cur->offset == 0) {
return NULL;
}
*/
// Check checksum for faster check.
if (cur->checksum == checksum &&
(*isValid)(pIn, cur->offset + table->offsetBase)) {
@ -59,6 +57,11 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
return NULL;
}
hash_t HASH_hashU32(U32 value) {
return ((value * 2654435761U) >> (32 - LDM_HASHLOG));
}
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum) {
@ -82,7 +85,7 @@ void HASH_insert(LDM_hashTable *table,
}
U32 HASH_getSize(const LDM_hashTable *table) {
return table->size * HASH_BUCKET_SIZE;
return table->size;
}
void HASH_destroyTable(LDM_hashTable *table) {
@ -101,7 +104,8 @@ void HASH_outputTableOccupancy(const LDM_hashTable *table) {
}
}
printf("Num buckets, bucket size: %d, %d\n", table->size, HASH_BUCKET_SIZE);
printf("Hash table size, empty slots, %% empty: %u, %u, %.3f\n",
HASH_getSize(table), ctr,
100.0 * (double)(ctr) / (double)HASH_getSize(table));
table->maxEntries, ctr,
100.0 * (double)(ctr) / table->maxEntries);
}

View File

@ -4,12 +4,16 @@
#include <stdlib.h>
#include <string.h>
// Insert every (HASH_ONLY_EVERY + 1) into the hash table.
#define HASH_ONLY_EVERY 15
#define LDM_HASHLOG (LDM_MEMORY_USAGE-2)
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
//#define LDM_HASH_ENTRY_SIZE 4
#define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2)
#define LDM_HASHTABLESIZE_U64 ((LDM_HASHTABLESIZE) >> 4)
// Insert every (HASH_ONLY_EVERY + 1) into the hash table.
#define HASH_ONLY_EVERY_LOG (LDM_WINDOW_SIZE_LOG-((LDM_MEMORY_USAGE) - 4))
#define HASH_ONLY_EVERY ((1 << HASH_ONLY_EVERY_LOG) - 1)
#define ML_BITS 4
#define ML_MASK ((1U<<ML_BITS)-1)
@ -17,13 +21,13 @@
#define RUN_MASK ((1U<<RUN_BITS)-1)
#define COMPUTE_STATS
#define OUTPUT_CONFIGURATION
#define CHECKSUM_CHAR_OFFSET 10
#define LAG 0
//#define LDM_LAG 0
//#define HASH_CHECK
//#define RUN_CHECKS
//#define LDM_DEBUG
#include "ldm.h"
#include "ldm_hashtable.h"
@ -187,7 +191,8 @@ int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) {
* of the hash table.
*/
static hash_t checksumToHash(U32 sum) {
return ((sum * 2654435761U) >> (32 - LDM_HASHLOG));
return HASH_hashU32(sum);
// return ((sum * 2654435761U) >> (32 - LDM_HASHLOG));
}
/**
@ -261,9 +266,9 @@ static void setNextHash(LDM_CCtx *cctx) {
cctx->nextPosHashed = cctx->nextIp;
cctx->nextHash = checksumToHash(cctx->nextSum);
#if LAG
if (cctx->ip - cctx->ibase > LAG) {
// printf("LAG %zu\n", cctx->ip - cctx->lagIp);
#if LDM_LAG
// printf("LDM_LAG %zu\n", cctx->ip - cctx->lagIp);
if (cctx->ip - cctx->ibase > LDM_LAG) {
cctx->lagSum = updateChecksum(
cctx->lagSum, LDM_HASH_LENGTH,
cctx->lagIp[0], cctx->lagIp[LDM_HASH_LENGTH]);
@ -296,7 +301,7 @@ static void putHashOfCurrentPositionFromHash(
const LDM_hashEntry entry = { cctx->ip - cctx->ibase ,
MEM_read32(cctx->ip) };
*/
#if LAG
#if LDM_LAG
// TODO: off by 1, but whatever
if (cctx->lagIp - cctx->ibase > 0) {
const LDM_hashEntry entry = { cctx->lagIp - cctx->ibase, cctx->lagSum };
@ -364,6 +369,18 @@ U32 LDM_countMatchLength(const BYTE *pIn, const BYTE *pMatch,
return (U32)(pIn - pStart);
}
void LDM_outputConfiguration(void) {
printf("=====================\n");
printf("Configuration\n");
printf("Window size log: %d\n", LDM_WINDOW_SIZE_LOG);
printf("Min match, hash length: %d, %d\n",
LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH);
printf("LDM_MEMORY_USAGE: %d\n", LDM_MEMORY_USAGE);
printf("HASH_ONLY_EVERY: %d\n", HASH_ONLY_EVERY);
printf("LDM_LAG %d\n", LDM_LAG);
printf("=====================\n");
}
void LDM_readHeader(const void *src, U64 *compressedSize,
U64 *decompressedSize) {
const BYTE *ip = (const BYTE *)src;
@ -392,12 +409,8 @@ void LDM_initializeCCtx(LDM_CCtx *cctx,
cctx->anchor = cctx->ibase;
memset(&(cctx->stats), 0, sizeof(cctx->stats));
cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U32, cctx->ibase);
cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U64, cctx->ibase);
//HASH_initializeTable(cctx->hashTable, LDM_HASHTABLESIZE_U32);
// calloc(LDM_HASHTABLESIZE_U32, sizeof(LDM_hashEntry));
// memset(cctx->hashTable, 0, sizeof(cctx->hashTable));
cctx->stats.minOffset = UINT_MAX;
cctx->stats.windowSizeLog = LDM_WINDOW_SIZE_LOG;
cctx->stats.hashTableSizeLog = LDM_MEMORY_USAGE;
@ -520,17 +533,19 @@ size_t LDM_compress(const void *src, size_t srcSize,
void *dst, size_t maxDstSize) {
LDM_CCtx cctx;
const BYTE *match = NULL;
// printf("TST: %d\n", LDM_WINDOW_SIZE / LDM_HASHTABLESIZE_U64);
printf("HASH LOG: %d\n", HASH_ONLY_EVERY_LOG);
LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize);
/* Hash the first position and put it into the hash table. */
LDM_putHashOfCurrentPosition(&cctx);
#if LAG
#if LDM_LAG
cctx.lagIp = cctx.ip;
cctx.lagHash = cctx.lastHash;
cctx.lagSum = cctx.lastSum;
#endif
/**
* Find a match.
* If no more matches can be found (i.e. the length of the remaining input
@ -542,6 +557,7 @@ size_t LDM_compress(const void *src, size_t srcSize,
cctx.stats.numMatches++;
#endif
// printf("HERE %zu\n", cctx.ip - cctx.ibase);
/**
* Catch up: look back to extend the match backwards from the found match.
*/

View File

@ -10,15 +10,20 @@
#define LDM_HEADER_SIZE ((LDM_COMPRESS_SIZE)+(LDM_DECOMPRESS_SIZE))
#define LDM_OFFSET_SIZE 4
// Defines the size of the hash table (currently the number of elements).
#define LDM_MEMORY_USAGE 12
// Defines the size of the hash table.
// Currently this should be less than WINDOW_SIZE_LOG + 4?
#define LDM_MEMORY_USAGE 24
#define LDM_WINDOW_SIZE_LOG 30
//#define LDM_LAG (1 << 23)
//#define LDM_LAG (1 << 20)
#define LDM_LAG 0
#define LDM_WINDOW_SIZE_LOG 28
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
//These should be multiples of four.
#define LDM_MIN_MATCH_LENGTH 64
#define LDM_HASH_LENGTH 64
//These should be multiples of four (and perhaps set to the same values?).
#define LDM_MIN_MATCH_LENGTH 512
#define LDM_HASH_LENGTH 512
typedef struct LDM_compressStats LDM_compressStats;
typedef struct LDM_CCtx LDM_CCtx;
@ -48,7 +53,7 @@ typedef struct LDM_DCtx LDM_DCtx;
* The lower four bits of the token encode the match length. With additional
* bytes added similarly to the additional literal length bytes after the offset.
*
* The last sequence is incomplete and stops right after the lieterals.
* The last sequence is incomplete and stops right after the literals.
*
*/
size_t LDM_compress(const void *src, size_t srcSize,
@ -142,6 +147,8 @@ void LDM_initializeDCtx(LDM_DCtx *dctx,
void LDM_readHeader(const void *src, U64 *compressedSize,
U64 *decompressedSize);
void LDM_outputConfiguration(void);
void LDM_test(void);
#endif /* LDM_H */

View File

@ -42,6 +42,8 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const BYTE *pIn,
int (*isValid)(const BYTE *pIn, const BYTE *pMatch));
hash_t HASH_hashU32(U32 value);
/**
* Insert an LDM_hashEntry into the bucket corresponding to hash.
*/
@ -61,5 +63,4 @@ void HASH_destroyTable(LDM_hashTable *table);
*/
void HASH_outputTableOccupancy(const LDM_hashTable *hashTable);
#endif /* LDM_HASHTABLE_H */

View File

@ -18,7 +18,7 @@
/* Compress file given by fname and output to oname.
* Returns 0 if successful, error code otherwise.
*
* TODO: This currently seg faults if the compressed size is > the decompress
* TODO: This might seg fault if the compressed size is > the decompress
* size due to the mmapping and output file size allocated to be the input size.
* The compress function should check before writing or buffer writes.
*/
@ -28,6 +28,8 @@ static int compress(const char *fname, const char *oname) {
char *src, *dst;
size_t maxCompressedSize, compressedSize;
struct timeval tv1, tv2;
/* Open the input file. */
if ((fdin = open(fname, O_RDONLY)) < 0) {
perror("Error in file opening");
@ -46,7 +48,10 @@ static int compress(const char *fname, const char *oname) {
return 1;
}
maxCompressedSize = statbuf.st_size + LDM_HEADER_SIZE;
maxCompressedSize = (statbuf.st_size + LDM_HEADER_SIZE);
// Handle case where compressed size is > decompressed size.
// The compress function should check before writing or buffer writes.
maxCompressedSize += statbuf.st_size / 255;
/* Go to the location corresponding to the last byte. */
/* TODO: fallocate? */
@ -74,10 +79,12 @@ static int compress(const char *fname, const char *oname) {
perror("mmap error for output");
return 1;
}
gettimeofday(&tv1, NULL);
compressedSize = LDM_HEADER_SIZE +
LDM_compress(src, statbuf.st_size,
dst + LDM_HEADER_SIZE, maxCompressedSize);
gettimeofday(&tv2, NULL);
// Write compress and decompress size to header
// TODO: should depend on LDM_DECOMPRESS_SIZE write32
@ -96,6 +103,14 @@ static int compress(const char *fname, const char *oname) {
(unsigned)statbuf.st_size, (unsigned)compressedSize, oname,
(double)compressedSize / (statbuf.st_size) * 100);
printf("Total compress time = %.3f seconds, Average compression speed: %.3f MB/s\n",
(double) (tv2.tv_usec - tv1.tv_usec) / 1000000 +
(double) (tv2.tv_sec - tv1.tv_sec),
((double)statbuf.st_size / (double) (1 << 20)) /
((double) (tv2.tv_usec - tv1.tv_usec) / 1000000 +
(double) (tv2.tv_sec - tv1.tv_sec)));
// Close files.
close(fdin);
close(fdout);
@ -234,16 +249,10 @@ int main(int argc, const char *argv[]) {
/* Compress */
{
struct timeval tv1, tv2;
gettimeofday(&tv1, NULL);
if (compress(inpFilename, ldmFilename)) {
printf("Compress error");
return 1;
}
gettimeofday(&tv2, NULL);
printf("Total compress time = %f seconds\n",
(double) (tv2.tv_usec - tv1.tv_usec) / 1000000 +
(double) (tv2.tv_sec - tv1.tv_sec));
}
/* Decompress */