Experiment with not using a checksum
parent
08a6e9a141
commit
0295a27133
|
@ -10,6 +10,14 @@
|
||||||
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
|
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
|
||||||
#define LDM_HASHTABLESIZE_U64 ((LDM_HASHTABLESIZE) >> 3)
|
#define LDM_HASHTABLESIZE_U64 ((LDM_HASHTABLESIZE) >> 3)
|
||||||
|
|
||||||
|
#define LDM_HASH_ENTRY_SIZE_LOG 3
|
||||||
|
|
||||||
|
//#define HASH_ONLY_EVERY_LOG 7
|
||||||
|
#define HASH_ONLY_EVERY_LOG (LDM_WINDOW_SIZE_LOG-((LDM_MEMORY_USAGE)-(LDM_HASH_ENTRY_SIZE_LOG)))
|
||||||
|
|
||||||
|
#define HASH_ONLY_EVERY ((1 << HASH_ONLY_EVERY_LOG) - 1)
|
||||||
|
|
||||||
|
|
||||||
#define COMPUTE_STATS
|
#define COMPUTE_STATS
|
||||||
#define OUTPUT_CONFIGURATION
|
#define OUTPUT_CONFIGURATION
|
||||||
#define CHECKSUM_CHAR_OFFSET 10
|
#define CHECKSUM_CHAR_OFFSET 10
|
||||||
|
@ -510,6 +518,21 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LDM_outputConfiguration(void) {
|
||||||
|
printf("=====================\n");
|
||||||
|
printf("Configuration\n");
|
||||||
|
printf("LDM_WINDOW_SIZE_LOG: %d\n", LDM_WINDOW_SIZE_LOG);
|
||||||
|
printf("LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH: %d, %d\n",
|
||||||
|
LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH);
|
||||||
|
printf("LDM_MEMORY_USAGE: %d\n", LDM_MEMORY_USAGE);
|
||||||
|
printf("HASH_ONLY_EVERY_LOG: %d\n", HASH_ONLY_EVERY_LOG);
|
||||||
|
printf("HASH_BUCKET_SIZE_LOG: %d\n", HASH_BUCKET_SIZE_LOG);
|
||||||
|
printf("LDM_LAG %d\n", LDM_LAG);
|
||||||
|
printf("=====================\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void LDM_test(const BYTE *src) {
|
void LDM_test(const BYTE *src) {
|
||||||
(void)src;
|
(void)src;
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,23 +32,15 @@
|
||||||
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
|
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
|
||||||
|
|
||||||
//These should be multiples of four (and perhaps set to the same value?).
|
//These should be multiples of four (and perhaps set to the same value?).
|
||||||
#define LDM_MIN_MATCH_LENGTH 16
|
#define LDM_MIN_MATCH_LENGTH 64
|
||||||
#define LDM_HASH_LENGTH 16
|
#define LDM_HASH_LENGTH 64
|
||||||
|
|
||||||
// Experimental.
|
// Experimental.
|
||||||
//#define TMP_EVICTION
|
//#define TMP_EVICTION // Experiment with eviction policies.
|
||||||
#define TMP_TAG_INSERT
|
#define TMP_TAG_INSERT // Insertion policy based on hash.
|
||||||
//#define TMP_FORCE_HASH_ONLY
|
|
||||||
|
|
||||||
#define LDM_HASH_ENTRY_SIZE_LOG 3
|
#define USE_CHECKSUM 1
|
||||||
|
//#define USE_CHECKSUM (HASH_BUCKET_SIZE_LOG)
|
||||||
// Insert every (HASH_ONLY_EVERY + 1) into the hash table.
|
|
||||||
#ifdef TMP_FORCE_HASH_ONLY
|
|
||||||
#define HASH_ONLY_EVERY_LOG 7
|
|
||||||
#else
|
|
||||||
#define HASH_ONLY_EVERY_LOG (LDM_WINDOW_SIZE_LOG-((LDM_MEMORY_USAGE)-(LDM_HASH_ENTRY_SIZE_LOG)))
|
|
||||||
#endif
|
|
||||||
#define HASH_ONLY_EVERY ((1 << HASH_ONLY_EVERY_LOG) - 1)
|
|
||||||
|
|
||||||
typedef struct LDM_compressStats LDM_compressStats;
|
typedef struct LDM_compressStats LDM_compressStats;
|
||||||
typedef struct LDM_CCtx LDM_CCtx;
|
typedef struct LDM_CCtx LDM_CCtx;
|
||||||
|
|
|
@ -7,9 +7,20 @@
|
||||||
#include "ldm.h"
|
#include "ldm.h"
|
||||||
|
|
||||||
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
|
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
|
||||||
|
#define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2)
|
||||||
#define LDM_HASHTABLESIZE_U64 ((LDM_HASHTABLESIZE) >> 3)
|
#define LDM_HASHTABLESIZE_U64 ((LDM_HASHTABLESIZE) >> 3)
|
||||||
|
|
||||||
/* Hash table stuff. */
|
#if USE_CHECKSUM
|
||||||
|
#define LDM_HASH_ENTRY_SIZE_LOG 3
|
||||||
|
#else
|
||||||
|
#define LDM_HASH_ENTRY_SIZE_LOG 2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//#define HASH_ONLY_EVERY_LOG 7
|
||||||
|
#define HASH_ONLY_EVERY_LOG (LDM_WINDOW_SIZE_LOG-((LDM_MEMORY_USAGE)-(LDM_HASH_ENTRY_SIZE_LOG)))
|
||||||
|
|
||||||
|
#define HASH_ONLY_EVERY ((1 << (HASH_ONLY_EVERY_LOG)) - 1)
|
||||||
|
|
||||||
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
|
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
|
||||||
#define LDM_HASHLOG ((LDM_MEMORY_USAGE)-(LDM_HASH_ENTRY_SIZE_LOG)-(HASH_BUCKET_SIZE_LOG))
|
#define LDM_HASHLOG ((LDM_MEMORY_USAGE)-(LDM_HASH_ENTRY_SIZE_LOG)-(HASH_BUCKET_SIZE_LOG))
|
||||||
|
|
||||||
|
@ -27,10 +38,16 @@ static const U64 prime8bytes = 11400714785074694791ULL;
|
||||||
// Type of the small hash used to index into the hash table.
|
// Type of the small hash used to index into the hash table.
|
||||||
typedef U32 hash_t;
|
typedef U32 hash_t;
|
||||||
|
|
||||||
|
#if USE_CHECKSUM
|
||||||
typedef struct LDM_hashEntry {
|
typedef struct LDM_hashEntry {
|
||||||
U32 offset;
|
U32 offset;
|
||||||
U32 checksum;
|
U32 checksum;
|
||||||
} LDM_hashEntry;
|
} LDM_hashEntry;
|
||||||
|
#else
|
||||||
|
typedef struct LDM_hashEntry {
|
||||||
|
U32 offset;
|
||||||
|
} LDM_hashEntry;
|
||||||
|
#endif
|
||||||
|
|
||||||
struct LDM_compressStats {
|
struct LDM_compressStats {
|
||||||
U32 windowSizeLog, hashTableSizeLog;
|
U32 windowSizeLog, hashTableSizeLog;
|
||||||
|
@ -39,6 +56,8 @@ struct LDM_compressStats {
|
||||||
U64 totalLiteralLength;
|
U64 totalLiteralLength;
|
||||||
U64 totalOffset;
|
U64 totalOffset;
|
||||||
|
|
||||||
|
U32 matchLengthHistogram[32];
|
||||||
|
|
||||||
U32 minOffset, maxOffset;
|
U32 minOffset, maxOffset;
|
||||||
U32 offsetHistogram[32];
|
U32 offsetHistogram[32];
|
||||||
|
|
||||||
|
@ -262,12 +281,19 @@ LDM_hashEntry *HASH_getBestEntry(const LDM_CCtx *cctx,
|
||||||
LDM_hashEntry *cur = bucket;
|
LDM_hashEntry *cur = bucket;
|
||||||
LDM_hashEntry *bestEntry = NULL;
|
LDM_hashEntry *bestEntry = NULL;
|
||||||
U64 bestMatchLength = 0;
|
U64 bestMatchLength = 0;
|
||||||
|
#if !(USE_CHECKSUM)
|
||||||
|
(void)checksum;
|
||||||
|
#endif
|
||||||
for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
|
for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
|
||||||
const BYTE *pMatch = cur->offset + cctx->ibase;
|
const BYTE *pMatch = cur->offset + cctx->ibase;
|
||||||
|
|
||||||
// Check checksum for faster check.
|
// Check checksum for faster check.
|
||||||
|
#if USE_CHECKSUM
|
||||||
if (cur->checksum == checksum &&
|
if (cur->checksum == checksum &&
|
||||||
cctx->ip - pMatch <= LDM_WINDOW_SIZE) {
|
cctx->ip - pMatch <= LDM_WINDOW_SIZE) {
|
||||||
|
#else
|
||||||
|
if (cctx->ip - pMatch <= LDM_WINDOW_SIZE) {
|
||||||
|
#endif
|
||||||
U64 forwardMatchLength = ZSTD_count(cctx->ip, pMatch, cctx->iend);
|
U64 forwardMatchLength = ZSTD_count(cctx->ip, pMatch, cctx->iend);
|
||||||
U64 backwardMatchLength, totalMatchLength;
|
U64 backwardMatchLength, totalMatchLength;
|
||||||
|
|
||||||
|
@ -448,12 +474,18 @@ void LDM_printCompressStats(const LDM_compressStats *stats) {
|
||||||
stats->minOffset, stats->maxOffset);
|
stats->minOffset, stats->maxOffset);
|
||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("offset histogram: offset, num matches, %% of matches\n");
|
printf("offset histogram | match length histogram\n");
|
||||||
|
printf("offset/ML, num matches, %% of matches | num matches, %% of matches\n");
|
||||||
|
|
||||||
for (; i <= intLog2(stats->maxOffset); i++) {
|
for (; i <= intLog2(stats->maxOffset); i++) {
|
||||||
printf("2^%*d: %10u %6.3f%%\n", 2, i,
|
printf("2^%*d: %10u %6.3f%% |2^%*d: %10u %6.3f \n",
|
||||||
|
2, i,
|
||||||
stats->offsetHistogram[i],
|
stats->offsetHistogram[i],
|
||||||
100.0 * (double) stats->offsetHistogram[i] /
|
100.0 * (double) stats->offsetHistogram[i] /
|
||||||
|
(double) stats->numMatches,
|
||||||
|
2, i,
|
||||||
|
stats->matchLengthHistogram[i],
|
||||||
|
100.0 * (double) stats->matchLengthHistogram[i] /
|
||||||
(double) stats->numMatches);
|
(double) stats->numMatches);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
@ -619,23 +651,32 @@ static void putHashOfCurrentPositionFromHash(LDM_CCtx *cctx, U64 hash) {
|
||||||
// TODO: Off by one, but not important.
|
// TODO: Off by one, but not important.
|
||||||
if (cctx->lagIp - cctx->ibase > 0) {
|
if (cctx->lagIp - cctx->ibase > 0) {
|
||||||
U32 smallHash = getSmallHash(cctx->lagHash);
|
U32 smallHash = getSmallHash(cctx->lagHash);
|
||||||
|
|
||||||
|
# if USE_CHECKSUM
|
||||||
U32 checksum = getChecksum(cctx->lagHash);
|
U32 checksum = getChecksum(cctx->lagHash);
|
||||||
const LDM_hashEntry entry = { cctx->lagIp - cctx->ibase, checksum };
|
const LDM_hashEntry entry = { cctx->lagIp - cctx->ibase, checksum };
|
||||||
#ifdef TMP_EVICTION
|
# else
|
||||||
HASH_insert(cctx->hashTable, smallHash, entry, cctx);
|
const LDM_hashEntry entry = { cctx->lagIp - cctx->ibase };
|
||||||
#else
|
# endif
|
||||||
HASH_insert(cctx->hashTable, smallHash, entry);
|
|
||||||
#endif
|
|
||||||
} else {
|
|
||||||
U32 smallHash = getSmallHash(hash);
|
|
||||||
U32 checksum = getChecksum(hash);
|
|
||||||
|
|
||||||
const LDM_hashEntry entry = { cctx->ip - cctx->ibase, checksum };
|
# ifdef TMP_EVICTION
|
||||||
#ifdef TMP_EVICTION
|
|
||||||
HASH_insert(cctx->hashTable, smallHash, entry, cctx);
|
HASH_insert(cctx->hashTable, smallHash, entry, cctx);
|
||||||
#else
|
# else
|
||||||
HASH_insert(cctx->hashTable, smallHash, entry);
|
HASH_insert(cctx->hashTable, smallHash, entry);
|
||||||
#endif
|
# endif
|
||||||
|
} else {
|
||||||
|
# if USE_CHECKSUM
|
||||||
|
U32 checksum = getChecksum(hash);
|
||||||
|
const LDM_hashEntry entry = { cctx->lagIp - cctx->ibase, checksum };
|
||||||
|
# else
|
||||||
|
const LDM_hashEntry entry = { cctx->lagIp - cctx->ibase };
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# ifdef TMP_EVICTION
|
||||||
|
HASH_insert(cctx->hashTable, smallHash, entry, cctx);
|
||||||
|
# else
|
||||||
|
HASH_insert(cctx->hashTable, smallHash, entry);
|
||||||
|
# endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
@ -646,8 +687,12 @@ static void putHashOfCurrentPositionFromHash(LDM_CCtx *cctx, U64 hash) {
|
||||||
if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) {
|
if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) {
|
||||||
#endif
|
#endif
|
||||||
U32 smallHash = getSmallHash(hash);
|
U32 smallHash = getSmallHash(hash);
|
||||||
|
#if USE_CHECKSUM
|
||||||
U32 checksum = getChecksum(hash);
|
U32 checksum = getChecksum(hash);
|
||||||
const LDM_hashEntry entry = { cctx->ip - cctx->ibase, checksum };
|
const LDM_hashEntry entry = { cctx->ip - cctx->ibase, checksum };
|
||||||
|
#else
|
||||||
|
const LDM_hashEntry entry = { cctx->ip - cctx->ibase };
|
||||||
|
#endif
|
||||||
#ifdef TMP_EVICTION
|
#ifdef TMP_EVICTION
|
||||||
HASH_insert(cctx->hashTable, smallHash, entry, cctx);
|
HASH_insert(cctx->hashTable, smallHash, entry, cctx);
|
||||||
#else
|
#else
|
||||||
|
@ -711,8 +756,11 @@ void LDM_initializeCCtx(LDM_CCtx *cctx,
|
||||||
cctx->anchor = cctx->ibase;
|
cctx->anchor = cctx->ibase;
|
||||||
|
|
||||||
memset(&(cctx->stats), 0, sizeof(cctx->stats));
|
memset(&(cctx->stats), 0, sizeof(cctx->stats));
|
||||||
|
#if USE_CHECKSUM
|
||||||
cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U64);
|
cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U64);
|
||||||
|
#else
|
||||||
|
cctx->hashTable = HASH_createTable(LDM_HASHTABLESIZE_U32);
|
||||||
|
#endif
|
||||||
cctx->stats.minOffset = UINT_MAX;
|
cctx->stats.minOffset = UINT_MAX;
|
||||||
cctx->stats.windowSizeLog = LDM_WINDOW_SIZE_LOG;
|
cctx->stats.windowSizeLog = LDM_WINDOW_SIZE_LOG;
|
||||||
cctx->stats.hashTableSizeLog = LDM_MEMORY_USAGE;
|
cctx->stats.hashTableSizeLog = LDM_MEMORY_USAGE;
|
||||||
|
@ -755,6 +803,7 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match,
|
||||||
U32 hashEveryMask;
|
U32 hashEveryMask;
|
||||||
#endif
|
#endif
|
||||||
setNextHash(cctx);
|
setNextHash(cctx);
|
||||||
|
|
||||||
hash = cctx->nextHash;
|
hash = cctx->nextHash;
|
||||||
smallHash = getSmallHash(hash);
|
smallHash = getSmallHash(hash);
|
||||||
checksum = getChecksum(hash);
|
checksum = getChecksum(hash);
|
||||||
|
@ -770,6 +819,7 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match,
|
||||||
}
|
}
|
||||||
#ifdef TMP_TAG_INSERT
|
#ifdef TMP_TAG_INSERT
|
||||||
if (hashEveryMask == HASH_ONLY_EVERY) {
|
if (hashEveryMask == HASH_ONLY_EVERY) {
|
||||||
|
|
||||||
entry = HASH_getBestEntry(cctx, smallHash, checksum,
|
entry = HASH_getBestEntry(cctx, smallHash, checksum,
|
||||||
forwardMatchLength, backwardMatchLength);
|
forwardMatchLength, backwardMatchLength);
|
||||||
}
|
}
|
||||||
|
@ -781,7 +831,9 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match,
|
||||||
if (entry != NULL) {
|
if (entry != NULL) {
|
||||||
*match = entry->offset + cctx->ibase;
|
*match = entry->offset + cctx->ibase;
|
||||||
}
|
}
|
||||||
|
|
||||||
putHashOfCurrentPositionFromHash(cctx, hash);
|
putHashOfCurrentPositionFromHash(cctx, hash);
|
||||||
|
|
||||||
}
|
}
|
||||||
setNextHash(cctx);
|
setNextHash(cctx);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -850,6 +902,7 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
||||||
U64 backwardsMatchLength = 0;
|
U64 backwardsMatchLength = 0;
|
||||||
|
|
||||||
LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize);
|
LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize);
|
||||||
|
|
||||||
#ifdef OUTPUT_CONFIGURATION
|
#ifdef OUTPUT_CONFIGURATION
|
||||||
LDM_outputConfiguration();
|
LDM_outputConfiguration();
|
||||||
#endif
|
#endif
|
||||||
|
@ -869,6 +922,7 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
||||||
*/
|
*/
|
||||||
while (LDM_findBestMatch(&cctx, &match, &forwardMatchLength,
|
while (LDM_findBestMatch(&cctx, &match, &forwardMatchLength,
|
||||||
&backwardsMatchLength) == 0) {
|
&backwardsMatchLength) == 0) {
|
||||||
|
|
||||||
#ifdef COMPUTE_STATS
|
#ifdef COMPUTE_STATS
|
||||||
cctx.stats.numMatches++;
|
cctx.stats.numMatches++;
|
||||||
#endif
|
#endif
|
||||||
|
@ -898,6 +952,8 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
||||||
cctx.stats.maxOffset =
|
cctx.stats.maxOffset =
|
||||||
offset > cctx.stats.maxOffset ? offset : cctx.stats.maxOffset;
|
offset > cctx.stats.maxOffset ? offset : cctx.stats.maxOffset;
|
||||||
cctx.stats.offsetHistogram[(U32)intLog2(offset)]++;
|
cctx.stats.offsetHistogram[(U32)intLog2(offset)]++;
|
||||||
|
cctx.stats.matchLengthHistogram[
|
||||||
|
(U32)intLog2(matchLength + LDM_MIN_MATCH_LENGTH)]++;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Move ip to end of block, inserting hashes at each position.
|
// Move ip to end of block, inserting hashes at each position.
|
||||||
|
@ -938,6 +994,22 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LDM_outputConfiguration(void) {
|
||||||
|
printf("=====================\n");
|
||||||
|
printf("Configuration\n");
|
||||||
|
printf("LDM_WINDOW_SIZE_LOG: %d\n", LDM_WINDOW_SIZE_LOG);
|
||||||
|
printf("LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH: %d, %d\n",
|
||||||
|
LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH);
|
||||||
|
printf("LDM_MEMORY_USAGE: %d\n", LDM_MEMORY_USAGE);
|
||||||
|
printf("HASH_ONLY_EVERY_LOG: %d\n", HASH_ONLY_EVERY_LOG);
|
||||||
|
printf("HASH_BUCKET_SIZE_LOG: %d\n", HASH_BUCKET_SIZE_LOG);
|
||||||
|
printf("LDM_LAG %d\n", LDM_LAG);
|
||||||
|
printf("USE_CHECKSUM %d\n", USE_CHECKSUM);
|
||||||
|
printf("=====================\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// TODO: implement and test hash function
|
// TODO: implement and test hash function
|
||||||
void LDM_test(const BYTE *src) {
|
void LDM_test(const BYTE *src) {
|
||||||
const U32 diff = 100;
|
const U32 diff = 100;
|
||||||
|
|
|
@ -2,19 +2,6 @@
|
||||||
|
|
||||||
#include "ldm.h"
|
#include "ldm.h"
|
||||||
|
|
||||||
void LDM_outputConfiguration(void) {
|
|
||||||
printf("=====================\n");
|
|
||||||
printf("Configuration\n");
|
|
||||||
printf("LDM_WINDOW_SIZE_LOG: %d\n", LDM_WINDOW_SIZE_LOG);
|
|
||||||
printf("LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH: %d, %d\n",
|
|
||||||
LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH);
|
|
||||||
printf("LDM_MEMORY_USAGE: %d\n", LDM_MEMORY_USAGE);
|
|
||||||
printf("HASH_ONLY_EVERY_LOG: %d\n", HASH_ONLY_EVERY_LOG);
|
|
||||||
printf("HASH_BUCKET_SIZE_LOG: %d\n", HASH_BUCKET_SIZE_LOG);
|
|
||||||
printf("LDM_LAG %d\n", LDM_LAG);
|
|
||||||
printf("=====================\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
void LDM_readHeader(const void *src, U64 *compressedSize,
|
void LDM_readHeader(const void *src, U64 *compressedSize,
|
||||||
U64 *decompressedSize) {
|
U64 *decompressedSize) {
|
||||||
const BYTE *ip = (const BYTE *)src;
|
const BYTE *ip = (const BYTE *)src;
|
||||||
|
|
|
@ -7,10 +7,16 @@
|
||||||
#include "ldm.h"
|
#include "ldm.h"
|
||||||
|
|
||||||
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
|
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
|
||||||
#define LDM_HASH_ENTRY_SIZE_LOG 3
|
|
||||||
#define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2)
|
#define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2)
|
||||||
#define LDM_HASHTABLESIZE_U64 ((LDM_HASHTABLESIZE) >> 3)
|
#define LDM_HASHTABLESIZE_U64 ((LDM_HASHTABLESIZE) >> 3)
|
||||||
|
|
||||||
|
#define LDM_HASH_ENTRY_SIZE_LOG 3
|
||||||
|
//#define HASH_ONLY_EVERY_LOG 7
|
||||||
|
#define HASH_ONLY_EVERY_LOG (LDM_WINDOW_SIZE_LOG-((LDM_MEMORY_USAGE)-(LDM_HASH_ENTRY_SIZE_LOG)))
|
||||||
|
|
||||||
|
#define HASH_ONLY_EVERY ((1 << HASH_ONLY_EVERY_LOG) - 1)
|
||||||
|
|
||||||
|
|
||||||
/* Hash table stuff. */
|
/* Hash table stuff. */
|
||||||
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
|
#define HASH_BUCKET_SIZE (1 << (HASH_BUCKET_SIZE_LOG))
|
||||||
#define LDM_HASHLOG ((LDM_MEMORY_USAGE)-(LDM_HASH_ENTRY_SIZE_LOG)-(HASH_BUCKET_SIZE_LOG))
|
#define LDM_HASHLOG ((LDM_MEMORY_USAGE)-(LDM_HASH_ENTRY_SIZE_LOG)-(HASH_BUCKET_SIZE_LOG))
|
||||||
|
@ -38,6 +44,8 @@ struct LDM_compressStats {
|
||||||
U64 totalLiteralLength;
|
U64 totalLiteralLength;
|
||||||
U64 totalOffset;
|
U64 totalOffset;
|
||||||
|
|
||||||
|
U32 matchLengthHistogram[32];
|
||||||
|
|
||||||
U32 minOffset, maxOffset;
|
U32 minOffset, maxOffset;
|
||||||
|
|
||||||
U32 offsetHistogram[32];
|
U32 offsetHistogram[32];
|
||||||
|
@ -358,12 +366,18 @@ void LDM_printCompressStats(const LDM_compressStats *stats) {
|
||||||
stats->minOffset, stats->maxOffset);
|
stats->minOffset, stats->maxOffset);
|
||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("offset histogram: offset, num matches, %% of matches\n");
|
printf("offset histogram | match length histogram\n");
|
||||||
|
printf("offset/ML, num matches, %% of matches | num matches, %% of matches\n");
|
||||||
|
|
||||||
for (; i <= intLog2(stats->maxOffset); i++) {
|
for (; i <= intLog2(stats->maxOffset); i++) {
|
||||||
printf("2^%*d: %10u %6.3f%%\n", 2, i,
|
printf("2^%*d: %10u %6.3f%% |2^%*d: %10u %6.3f \n",
|
||||||
|
2, i,
|
||||||
stats->offsetHistogram[i],
|
stats->offsetHistogram[i],
|
||||||
100.0 * (double) stats->offsetHistogram[i] /
|
100.0 * (double) stats->offsetHistogram[i] /
|
||||||
|
(double) stats->numMatches,
|
||||||
|
2, i,
|
||||||
|
stats->matchLengthHistogram[i],
|
||||||
|
100.0 * (double) stats->matchLengthHistogram[i] /
|
||||||
(double) stats->numMatches);
|
(double) stats->numMatches);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
@ -742,6 +756,8 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
||||||
cctx.stats.maxOffset =
|
cctx.stats.maxOffset =
|
||||||
offset > cctx.stats.maxOffset ? offset : cctx.stats.maxOffset;
|
offset > cctx.stats.maxOffset ? offset : cctx.stats.maxOffset;
|
||||||
cctx.stats.offsetHistogram[(U32)intLog2(offset)]++;
|
cctx.stats.offsetHistogram[(U32)intLog2(offset)]++;
|
||||||
|
cctx.stats.matchLengthHistogram[
|
||||||
|
(U32)intLog2(matchLength + LDM_MIN_MATCH_LENGTH)]++;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Move ip to end of block, inserting hashes at each position.
|
// Move ip to end of block, inserting hashes at each position.
|
||||||
|
@ -784,6 +800,21 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LDM_outputConfiguration(void) {
|
||||||
|
printf("=====================\n");
|
||||||
|
printf("Configuration\n");
|
||||||
|
printf("LDM_WINDOW_SIZE_LOG: %d\n", LDM_WINDOW_SIZE_LOG);
|
||||||
|
printf("LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH: %d, %d\n",
|
||||||
|
LDM_MIN_MATCH_LENGTH, LDM_HASH_LENGTH);
|
||||||
|
printf("LDM_MEMORY_USAGE: %d\n", LDM_MEMORY_USAGE);
|
||||||
|
printf("HASH_ONLY_EVERY_LOG: %d\n", HASH_ONLY_EVERY_LOG);
|
||||||
|
printf("HASH_BUCKET_SIZE_LOG: %d\n", HASH_BUCKET_SIZE_LOG);
|
||||||
|
printf("LDM_LAG %d\n", LDM_LAG);
|
||||||
|
printf("=====================\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// TODO: implement and test hash function
|
// TODO: implement and test hash function
|
||||||
void LDM_test(const BYTE *src) {
|
void LDM_test(const BYTE *src) {
|
||||||
(void)src;
|
(void)src;
|
||||||
|
|
Loading…
Reference in New Issue