Minor refactoring

dev
Stella Lau 2017-07-19 16:56:28 -07:00
parent 030264ca51
commit 2427a154cb
7 changed files with 102 additions and 354 deletions

View File

@ -1,109 +0,0 @@
#include <stdlib.h>
#include <stdio.h>
#include "ldm.h"
#include "ldm_hashtable.h"
#include "mem.h"
#define LDM_HASHLOG ((LDM_MEMORY_USAGE) - 4)
struct LDM_hashTable {
U32 size;
LDM_hashEntry *entries;
const BYTE *offsetBase;
};
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase) {
LDM_hashTable *table = malloc(sizeof(LDM_hashTable));
table->size = size;
table->entries = calloc(size, sizeof(LDM_hashEntry));
table->offsetBase = offsetBase;
return table;
}
void HASH_initializeTable(LDM_hashTable *table, U32 size) {
table->size = size;
table->entries = calloc(size, sizeof(LDM_hashEntry));
}
LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) {
return table->entries + hash;
}
LDM_hashEntry *HASH_getEntryFromHash(
const LDM_hashTable *table, const hash_t hash, const U32 checksum) {
(void)checksum;
return getBucket(table, hash);
}
static int isValidMatch(const BYTE *pIn, const BYTE *pMatch,
U32 minMatchLength, U32 maxWindowSize) {
U32 lengthLeft = minMatchLength;
const BYTE *curIn = pIn;
const BYTE *curMatch = pMatch;
if (pIn - pMatch > maxWindowSize) {
return 0;
}
for (; lengthLeft >= 4; lengthLeft -= 4) {
if (MEM_read32(curIn) != MEM_read32(curMatch)) {
return 0;
}
curIn += 4;
curMatch += 4;
}
return 1;
}
LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum,
const BYTE *pIn,
const BYTE *pEnd,
U32 minMatchLength,
U32 maxWindowSize,
U32 *matchLength) {
LDM_hashEntry *entry = getBucket(table, hash);
(void)checksum;
(void)pEnd;
(void)matchLength;
// TODO: Count the entire forward match length rather than check if valid.
if (isValidMatch(pIn, entry->offset + table->offsetBase,
minMatchLength, maxWindowSize)) {
return entry;
}
return NULL;
}
hash_t HASH_hashU32(U32 value) {
return ((value * 2654435761U) >> (32 - LDM_HASHLOG));
}
void HASH_insert(LDM_hashTable *table,
const hash_t hash, const LDM_hashEntry entry) {
*getBucket(table, hash) = entry;
}
U32 HASH_getSize(const LDM_hashTable *table) {
return table->size;
}
void HASH_destroyTable(LDM_hashTable *table) {
free(table->entries);
free(table);
}
void HASH_outputTableOccupancy(const LDM_hashTable *hashTable) {
U32 i = 0;
U32 ctr = 0;
for (; i < HASH_getSize(hashTable); i++) {
if (getBucket(hashTable, i)->offset == 0) {
ctr++;
}
}
printf("Hash table size, empty slots, %% empty: %u, %u, %.3f\n",
HASH_getSize(hashTable), ctr,
100.0 * (double)(ctr) / (double)HASH_getSize(hashTable));
}

View File

@ -5,22 +5,19 @@
#include "ldm_hashtable.h"
#include "mem.h"
//TODO: move def somewhere else.
// Number of elements per hash bucket.
// HASH_BUCKET_SIZE_LOG defined in ldm.h
#define HASH_BUCKET_SIZE_LOG 2 // MAX is 4 for now
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
// TODO: rename. Number of hash buckets.
#define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG)
#define ZSTD_SKIP
//#define TMP_TST
//#define ZSTD_SKIP
struct LDM_hashTable {
U32 size; // Number of buckets
U32 maxEntries; // Rename...
LDM_hashEntry *entries; // 1-D array for now.
U32 numBuckets;
U32 numEntries;
LDM_hashEntry *entries;
BYTE *bucketOffsets; // Pointer to current insert position.
// Position corresponding to offset=0 in LDM_hashEntry.
@ -32,8 +29,8 @@ struct LDM_hashTable {
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase,
U32 minMatchLength, U32 maxWindowSize) {
LDM_hashTable *table = malloc(sizeof(LDM_hashTable));
table->size = size >> HASH_BUCKET_SIZE_LOG;
table->maxEntries = size;
table->numBuckets = size >> HASH_BUCKET_SIZE_LOG;
table->numEntries = size;
table->entries = calloc(size, sizeof(LDM_hashEntry));
table->bucketOffsets = calloc(size >> HASH_BUCKET_SIZE_LOG, sizeof(BYTE));
table->offsetBase = offsetBase;
@ -46,7 +43,6 @@ static LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) {
return table->entries + (hash << HASH_BUCKET_SIZE_LOG);
}
#if TMP_ZSTDTOGGLE
static unsigned ZSTD_NbCommonBytes (register size_t val)
{
if (MEM_isLittleEndian()) {
@ -159,26 +155,22 @@ U32 countBackwardsMatch(const BYTE *pIn, const BYTE *pAnchor,
return matchLength;
}
LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum,
const BYTE *pIn,
const BYTE *pEnd,
U32 *matchLength,
U32 *backwardsMatchLength,
const BYTE *pAnchor) {
LDM_hashEntry *HASH_getBestEntry(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum,
const BYTE *pIn,
const BYTE *pEnd,
const BYTE *pAnchor,
U32 *pForwardMatchLength,
U32 *pBackwardMatchLength) {
LDM_hashEntry *bucket = getBucket(table, hash);
LDM_hashEntry *cur = bucket;
LDM_hashEntry *bestEntry = NULL;
U32 bestMatchLength = 0;
U32 forwardMatch = 0;
U32 backwardMatch = 0;
#ifdef TMP_TST
U32 numBetter = 0;
#endif
for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
// Check checksum for faster check.
const BYTE *pMatch = cur->offset + table->offsetBase;
// Check checksum for faster check.
if (cur->checksum == checksum && pIn - pMatch <= table->maxWindowSize) {
U32 forwardMatchLength = ZSTD_count(pIn, pMatch, pEnd);
U32 backwardMatchLength, totalMatchLength;
@ -193,105 +185,27 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
if (totalMatchLength >= bestMatchLength) {
bestMatchLength = totalMatchLength;
forwardMatch = forwardMatchLength;
backwardMatch = backwardMatchLength;
*pForwardMatchLength = forwardMatchLength;
*pBackwardMatchLength = backwardMatchLength;
bestEntry = cur;
#ifdef TMP_TST
numBetter++;
#endif
#ifdef ZSTD_SKIP
*matchLength = forwardMatchLength;
*backwardsMatchLength = backwardMatchLength;
return cur;
#endif
// *matchLength = forwardMatchLength;
// return cur;
}
}
}
if (bestEntry != NULL && bestMatchLength > table->minMatchLength) {
#ifdef TMP_TST
printf("Num better %u\n", numBetter - 1);
#endif
*matchLength = forwardMatch;
*backwardsMatchLength = backwardMatch;
if (bestEntry != NULL) {
return bestEntry;
}
return NULL;
}
#else
static int isValidMatch(const BYTE *pIn, const BYTE *pMatch,
U32 minMatchLength, U32 maxWindowSize) {
printf("HERE\n");
U32 lengthLeft = minMatchLength;
const BYTE *curIn = pIn;
const BYTE *curMatch = pMatch;
if (pIn - pMatch > maxWindowSize) {
return 0;
}
for (; lengthLeft >= 4; lengthLeft -= 4) {
if (MEM_read32(curIn) != MEM_read32(curMatch)) {
return 0;
}
curIn += 4;
curMatch += 4;
}
return 1;
}
//TODO: clean up function call. This is not at all decoupled from LDM.
LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum,
const BYTE *pIn,
const BYTE *pEnd,
U32 *matchLength,
U32 *backwardsMatchLength,
const BYTE *pAnchor) {
LDM_hashEntry *bucket = getBucket(table, hash);
LDM_hashEntry *cur = bucket;
(void)matchLength;
(void)backwardsMatchLength;
(void)pAnchor; for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
// Check checksum for faster check.
const BYTE *pMatch = cur->offset + table->offsetBase;
(void)pEnd;
if (cur->checksum == checksum &&
isValidMatch(pIn, pMatch, table->minMatchLength, table->maxWindowSize)) {
return cur;
}
}
return NULL;
}
#endif
hash_t HASH_hashU32(U32 value) {
return ((value * 2654435761U) >> (32 - LDM_HASHLOG));
}
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum) {
// Loop through bucket.
// TODO: in order of recency???
LDM_hashEntry *bucket = getBucket(table, hash);
LDM_hashEntry *cur = bucket;
for(; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
if (cur->checksum == checksum) {
return cur;
}
}
return NULL;
}
void HASH_insert(LDM_hashTable *table,
const hash_t hash, const LDM_hashEntry entry) {
*(getBucket(table, hash) + table->bucketOffsets[hash]) = entry;
@ -300,7 +214,7 @@ void HASH_insert(LDM_hashTable *table,
}
U32 HASH_getSize(const LDM_hashTable *table) {
return table->size;
return table->numBuckets;
}
void HASH_destroyTable(LDM_hashTable *table) {
@ -312,15 +226,16 @@ void HASH_destroyTable(LDM_hashTable *table) {
void HASH_outputTableOccupancy(const LDM_hashTable *table) {
U32 ctr = 0;
LDM_hashEntry *cur = table->entries;
LDM_hashEntry *end = table->entries + (table->size * HASH_BUCKET_SIZE);
LDM_hashEntry *end = table->entries + (table->numBuckets * HASH_BUCKET_SIZE);
for (; cur < end; ++cur) {
if (cur->offset == 0) {
ctr++;
}
}
printf("Num buckets, bucket size: %d, %d\n", table->size, HASH_BUCKET_SIZE);
printf("Num buckets, bucket size: %d, %d\n",
table->numBuckets, HASH_BUCKET_SIZE);
printf("Hash table size, empty slots, %% empty: %u, %u, %.3f\n",
table->maxEntries, ctr,
100.0 * (double)(ctr) / table->maxEntries);
table->numEntries, ctr,
100.0 * (double)(ctr) / table->numEntries);
}

View File

@ -14,7 +14,6 @@
#define HASH_ONLY_EVERY_LOG (LDM_WINDOW_SIZE_LOG-((LDM_MEMORY_USAGE) - 4))
#define HASH_ONLY_EVERY ((1 << HASH_ONLY_EVERY_LOG) - 1)
#define ML_BITS 4
#define ML_MASK ((1U<<ML_BITS)-1)
#define RUN_BITS (8-ML_BITS)
@ -24,7 +23,6 @@
#define OUTPUT_CONFIGURATION
#define CHECKSUM_CHAR_OFFSET 10
//#define HASH_CHECK
//#define RUN_CHECKS
//#define TMP_RECOMPUTE_LENGTHS
@ -135,7 +133,6 @@ void LDM_printCompressStats(const LDM_compressStats *stats) {
int i = 0;
printf("=====================\n");
printf("Compression statistics\n");
//TODO: compute percentage matched?
printf("Window size, hash table size (bytes): 2^%u, 2^%u\n",
stats->windowSizeLog, stats->hashTableSizeLog);
printf("num matches, total match length, %% matched: %u, %llu, %.3f\n",
@ -191,7 +188,6 @@ int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) {
*/
static hash_t checksumToHash(U32 sum) {
return HASH_hashU32(sum);
// return ((sum * 2654435761U) >> (32 - LDM_HASHLOG));
}
/**
@ -455,22 +451,14 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match,
if (cctx->ip > cctx->imatchLimit) {
return 1;
}
#ifdef HASH_CHECK
entry = HASH_getEntryFromHash(cctx->hashTable, h, sum);
#else
entry = HASH_getValidEntry(cctx->hashTable, h, sum,
cctx->ip, cctx->iend,
matchLength, backwardMatchLength,
cctx->anchor);
#endif
entry = HASH_getBestEntry(cctx->hashTable, h, sum,
cctx->ip, cctx->iend,
cctx->anchor,
matchLength, backwardMatchLength);
if (entry != NULL) {
*match = entry->offset + cctx->ibase;
#ifdef HASH_CHECK
if (!LDM_isValidMatch(cctx->ip, *match)) {
entry = NULL;
}
#endif
}
putHashOfCurrentPositionFromHash(cctx, h, sum);
}
@ -563,21 +551,8 @@ size_t LDM_compress(const void *src, size_t srcSize,
cctx.stats.numMatches++;
#endif
#if TMP_RECOMPUTE_LENGTHS
backwardsMatchLength = 0;
/**
* Catch up: look back to extend the match backwards from the found match.
*/
while (cctx.ip > cctx.anchor && match > cctx.ibase &&
cctx.ip[-1] == match[-1]) {
cctx.ip--;
match--;
backwardsMatchLength++;
}
#else
cctx.ip -= backwardsMatchLength;
match -= backwardsMatchLength;
#endif
/**
* Write current block (literals, literal length, match offset, match
@ -586,16 +561,9 @@ size_t LDM_compress(const void *src, size_t srcSize,
{
const U32 literalLength = cctx.ip - cctx.anchor;
const U32 offset = cctx.ip - match;
#if TMP_RECOMPUTE_LENGTHS
const U32 matchLength = LDM_countMatchLength(
cctx.ip + LDM_MIN_MATCH_LENGTH + backwardsMatchLength,
match + LDM_MIN_MATCH_LENGTH + backwardsMatchLength,
cctx.ihashLimit) + backwardsMatchLength;
#else
const U32 matchLength = forwardMatchLength +
backwardsMatchLength -
LDM_MIN_MATCH_LENGTH;
#endif
LDM_outputBlock(&cctx, literalLength, offset, matchLength);

View File

@ -11,21 +11,21 @@
#define LDM_OFFSET_SIZE 4
// Defines the size of the hash table.
// Note that this is not the number of buckets.
// Currently this should be less than WINDOW_SIZE_LOG + 4?
#define LDM_MEMORY_USAGE 23
#define HASH_BUCKET_SIZE_LOG 3 // MAX is 4 for now
//#define LDM_LAG (1 << 20)
#define LDM_LAG (0)
// Defines the lag in inserting elements into the hash table.
#define LDM_LAG 0
#define LDM_WINDOW_SIZE_LOG 28
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
//These should be multiples of four (and perhaps set to the same value?).
#define LDM_MIN_MATCH_LENGTH 1024
#define LDM_HASH_LENGTH 1024
#define LDM_MIN_MATCH_LENGTH 64
#define LDM_HASH_LENGTH 64
#define TMP_ZSTDTOGGLE 1
#define TMP_RECOMPUTE_LENGTHS (!(TMP_ZSTDTOGGLE))
typedef struct LDM_compressStats LDM_compressStats;
typedef struct LDM_CCtx LDM_CCtx;

View File

@ -14,37 +14,17 @@ typedef struct LDM_hashEntry {
typedef struct LDM_hashTable LDM_hashTable;
/**
* Create a hash table with size hash buckets.
* LDM_hashEntry.offset is added to offsetBase to calculate pMatch in
* HASH_getValidEntry.
*/
LDM_hashTable *HASH_createTable(U32 size, const BYTE *offsetBase,
U32 minMatchLength, U32 maxWindowSize);
/**
* Returns an LDM_hashEntry from the table that matches the checksum.
* Returns NULL if one does not exist.
*/
LDM_hashEntry *HASH_getEntryFromHash(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum);
/**
* Gets a valid entry that matches the checksum. A valid entry is defined by
* *isValid.
*
* The function finds an entry matching the checksum, computes pMatch as
* offset + table.offsetBase, and calls isValid.
*/
LDM_hashEntry *HASH_getValidEntry(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum,
const BYTE *pIn,
const BYTE *pEnd,
U32 *matchLength,
U32 *backwardsMatchLength,
const BYTE *pAnchor);
LDM_hashEntry *HASH_getBestEntry(const LDM_hashTable *table,
const hash_t hash,
const U32 checksum,
const BYTE *pIn,
const BYTE *pEnd,
const BYTE *pAnchor,
U32 *matchLength,
U32 *backwardsMatchLength);
hash_t HASH_hashU32(U32 value);

View File

@ -4,6 +4,8 @@
#include <stdlib.h>
#include <string.h>
#include "ldm.h"
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
//#define LDM_HASH_ENTRY_SIZE 4
#define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2)
@ -14,7 +16,6 @@
#define HASH_ONLY_EVERY ((1 << HASH_ONLY_EVERY_LOG) - 1)
/* Hash table stuff. */
#define HASH_BUCKET_SIZE_LOG 3 // MAX is 4 for now
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
#define LDM_HASHLOG ((LDM_MEMORY_USAGE)-4-HASH_BUCKET_SIZE_LOG)
@ -32,18 +33,15 @@
//#define RUN_CHECKS
#include "ldm.h"
/* Hash table stuff */
typedef U32 hash_t;
typedef struct LDM_hashEntry {
U32 offset; // TODO: Replace with pointer?
U32 offset;
U32 checksum;
} LDM_hashEntry;
// TODO: Scanning speed
// TODO: Memory usage
struct LDM_compressStats {
U32 windowSizeLog, hashTableSizeLog;
@ -110,18 +108,22 @@ struct LDM_CCtx {
};
struct LDM_hashTable {
U32 size; // Number of buckets
U32 maxEntries; // Rename...
LDM_hashEntry *entries; // 1-D array for now.
U32 numBuckets; // Number of buckets
U32 numEntries; // Rename...
LDM_hashEntry *entries;
BYTE *bucketOffsets;
// Position corresponding to offset=0 in LDM_hashEntry.
};
/**
* Create a hash table that can contain size elements.
* The number of buckets is determined by size >> HASH_BUCKET_SIZE_LOG.
*/
LDM_hashTable *HASH_createTable(U32 size) {
LDM_hashTable *table = malloc(sizeof(LDM_hashTable));
table->size = size >> HASH_BUCKET_SIZE_LOG;
table->maxEntries = size;
table->numBuckets = size >> HASH_BUCKET_SIZE_LOG;
table->numEntries = size;
table->entries = calloc(size, sizeof(LDM_hashEntry));
table->bucketOffsets = calloc(size >> HASH_BUCKET_SIZE_LOG, sizeof(BYTE));
return table;
@ -131,10 +133,7 @@ static LDM_hashEntry *getBucket(const LDM_hashTable *table, const hash_t hash) {
return table->entries + (hash << HASH_BUCKET_SIZE_LOG);
}
static unsigned ZSTD_NbCommonBytes (register size_t val)
{
static unsigned ZSTD_NbCommonBytes (register size_t val) {
if (MEM_isLittleEndian()) {
if (MEM_64bits()) {
# if defined(_MSC_VER) && defined(_WIN64)
@ -234,6 +233,11 @@ static size_t ZSTD_count(const BYTE *pIn, const BYTE *pMatch,
return (size_t)(pIn - pStart);
}
/**
* Count number of bytes that match backwards before pIn and pMatch.
*
* We count only bytes where pMatch > pBaes and pIn > pAnchor.
*/
U32 countBackwardsMatch(const BYTE *pIn, const BYTE *pAnchor,
const BYTE *pMatch, const BYTE *pBase) {
U32 matchLength = 0;
@ -245,20 +249,32 @@ U32 countBackwardsMatch(const BYTE *pIn, const BYTE *pAnchor,
return matchLength;
}
LDM_hashEntry *HASH_getValidEntry(const LDM_CCtx *cctx,
const hash_t hash,
const U32 checksum,
U32 *matchLength,
U32 *backwardsMatchLength) {
/**
* Returns a pointer to the entry in the hash table matching the hash and
* checksum with the "longest match length" as defined below. The forward and
* backward match lengths are written to *pForwardMatchLength and
* *pBackwardMatchLength.
*
* The match length is defined based on cctx->ip and the entry's offset.
* The forward match is computed from cctx->ip and entry->offset + cctx->ibase.
* The backward match is computed backwards from cctx->ip and
* cctx->ibase only if the forward match is longer than LDM_MIN_MATCH_LENGTH.
*
*/
LDM_hashEntry *HASH_getBestEntry(const LDM_CCtx *cctx,
const hash_t hash,
const U32 checksum,
U32 *pForwardMatchLength,
U32 *pBackwardMatchLength) {
LDM_hashTable *table = cctx->hashTable;
LDM_hashEntry *bucket = getBucket(table, hash);
LDM_hashEntry *cur = bucket;
LDM_hashEntry *bestEntry = NULL;
U32 bestMatchLength = 0;
for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
// Check checksum for faster check.
const BYTE *pMatch = cur->offset + cctx->ibase;
// Check checksum for faster check.
if (cur->checksum == checksum &&
cctx->ip - pMatch <= LDM_WINDOW_SIZE) {
U32 forwardMatchLength = ZSTD_count(cctx->ip, pMatch, cctx->iend);
@ -279,8 +295,8 @@ LDM_hashEntry *HASH_getValidEntry(const LDM_CCtx *cctx,
if (totalMatchLength >= bestMatchLength &&
totalMatchLength >= LDM_MIN_MATCH_LENGTH) {
bestMatchLength = totalMatchLength;
*matchLength = forwardMatchLength;
*backwardsMatchLength = backwardMatchLength;
*pForwardMatchLength = forwardMatchLength;
*pBackwardMatchLength = backwardMatchLength;
bestEntry = cur;
#ifdef ZSTD_SKIP
@ -303,7 +319,7 @@ void HASH_insert(LDM_hashTable *table,
}
U32 HASH_getSize(const LDM_hashTable *table) {
return table->size;
return table->numBuckets;
}
void HASH_destroyTable(LDM_hashTable *table) {
@ -315,20 +331,20 @@ void HASH_destroyTable(LDM_hashTable *table) {
void HASH_outputTableOccupancy(const LDM_hashTable *table) {
U32 ctr = 0;
LDM_hashEntry *cur = table->entries;
LDM_hashEntry *end = table->entries + (table->size * HASH_BUCKET_SIZE);
LDM_hashEntry *end = table->entries + (table->numBuckets * HASH_BUCKET_SIZE);
for (; cur < end; ++cur) {
if (cur->offset == 0) {
ctr++;
}
}
printf("Num buckets, bucket size: %d, %d\n", table->size, HASH_BUCKET_SIZE);
printf("Num buckets, bucket size: %d, %d\n",
table->numBuckets, HASH_BUCKET_SIZE);
printf("Hash table size, empty slots, %% empty: %u, %u, %.3f\n",
table->maxEntries, ctr,
100.0 * (double)(ctr) / table->maxEntries);
table->numEntries, ctr,
100.0 * (double)(ctr) / table->numEntries);
}
// TODO: This can be done more efficiently (but it is not that important as it
// is only used for computing stats).
static int intLog2(U32 x) {
@ -339,7 +355,7 @@ static int intLog2(U32 x) {
return ret;
}
// TODO: Maybe we would eventually prefer to have linear rather than
// Maybe we would eventually prefer to have linear rather than
// exponential buckets.
/**
void HASH_outputTableOffsetHistogram(const LDM_CCtx *cctx) {
@ -369,7 +385,6 @@ void LDM_printCompressStats(const LDM_compressStats *stats) {
int i = 0;
printf("=====================\n");
printf("Compression statistics\n");
//TODO: compute percentage matched?
printf("Window size, hash table size (bytes): 2^%u, 2^%u\n",
stats->windowSizeLog, stats->hashTableSizeLog);
printf("num matches, total match length, %% matched: %u, %llu, %.3f\n",
@ -429,7 +444,6 @@ hash_t HASH_hashU32(U32 value) {
*/
static hash_t checksumToHash(U32 sum) {
return HASH_hashU32(sum);
// return ((sum * 2654435761U) >> (32 - LDM_HASHLOG));
}
/**
@ -672,10 +686,10 @@ void LDM_destroyCCtx(LDM_CCtx *cctx) {
* Returns 0 if successful and 1 otherwise (i.e. no match can be found
* in the remaining input that is long enough).
*
* matchLength contains the forward length of the match.
* forwardMatchLength contains the forward length of the match.
*/
static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match,
U32 *matchLength, U32 *backwardMatchLength) {
U32 *forwardMatchLength, U32 *backwardMatchLength) {
LDM_hashEntry *entry = NULL;
cctx->nextIp = cctx->ip + cctx->step;
@ -693,8 +707,8 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match,
return 1;
}
entry = HASH_getValidEntry(cctx, h, sum,
matchLength, backwardMatchLength);
entry = HASH_getBestEntry(cctx, h, sum,
forwardMatchLength, backwardMatchLength);
if (entry != NULL) {
*match = entry->offset + cctx->ibase;

View File

@ -29,6 +29,7 @@ static int compress(const char *fname, const char *oname) {
size_t maxCompressedSize, compressedSize;
struct timeval tv1, tv2;
double timeTaken;
/* Open the input file. */
if ((fdin = open(fname, O_RDONLY)) < 0) {
@ -53,18 +54,7 @@ static int compress(const char *fname, const char *oname) {
// The compress function should check before writing or buffer writes.
maxCompressedSize += statbuf.st_size / 255;
/* Go to the location corresponding to the last byte. */
/* TODO: fallocate? */
if (lseek(fdout, maxCompressedSize - 1, SEEK_SET) == -1) {
perror("lseek error");
return 1;
}
/* Write a dummy byte at the last location. */
if (write(fdout, "", 1) != 1) {
perror("write error");
return 1;
}
ftruncate(fdout, maxCompressedSize);
/* mmap the input file. */
if ((src = mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, fdin, 0))
@ -103,12 +93,12 @@ static int compress(const char *fname, const char *oname) {
(unsigned)statbuf.st_size, (unsigned)compressedSize, oname,
(double)compressedSize / (statbuf.st_size) * 100);
timeTaken = (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 +
(double) (tv2.tv_sec - tv1.tv_sec),
printf("Total compress time = %.3f seconds, Average compression speed: %.3f MB/s\n",
(double) (tv2.tv_usec - tv1.tv_usec) / 1000000 +
(double) (tv2.tv_sec - tv1.tv_sec),
((double)statbuf.st_size / (double) (1 << 20)) /
((double) (tv2.tv_usec - tv1.tv_usec) / 1000000 +
(double) (tv2.tv_sec - tv1.tv_sec)));
timeTaken,
((double)statbuf.st_size / (double) (1 << 20)) / timeTaken);
// Close files.
@ -156,17 +146,7 @@ static int decompress(const char *fname, const char *oname) {
/* Read the header. */
LDM_readHeader(src, &compressedSize, &decompressedSize);
/* Go to the location corresponding to the last byte. */
if (lseek(fdout, decompressedSize - 1, SEEK_SET) == -1) {
perror("lseek error");
return 1;
}
/* write a dummy byte at the last location */
if (write(fdout, "", 1) != 1) {
perror("write error");
return 1;
}
ftruncate(fdout, decompressedSize);
/* mmap the output file */
if ((dst = mmap(0, decompressedSize, PROT_READ | PROT_WRITE,