Add offset histogram

This commit is contained in:
Stella Lau 2017-07-14 10:52:03 -07:00
parent 175a6c6029
commit 4db7f12ef3
4 changed files with 43 additions and 8 deletions

View File

@ -27,7 +27,7 @@ default: all
all: main-ldm all: main-ldm
main-ldm : ldm.c main-ldm.c main-ldm : ldm.h ldm.c main-ldm.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
clean: clean:

View File

@ -15,7 +15,7 @@
#define RUN_MASK ((1U<<RUN_BITS)-1) #define RUN_MASK ((1U<<RUN_BITS)-1)
#define COMPUTE_STATS #define COMPUTE_STATS
#define CHECKSUM_CHAR_OFFSET 0 #define CHECKSUM_CHAR_OFFSET 10
//#define RUN_CHECKS //#define RUN_CHECKS
//#define LDM_DEBUG //#define LDM_DEBUG
@ -23,6 +23,16 @@ struct LDM_hashEntry {
offset_t offset; offset_t offset;
}; };
typedef struct LDM_hashTable {
U32 numEntries;
U32 minimumTagMask; // TODO: what if tag == offset?
// Maximum number of elements in the table.
U32 limit;
LDM_hashEntry *entries;
} LDM_hashTable;
// TODO: Add offset histogram by powers of two // TODO: Add offset histogram by powers of two
// TODO: Scanning speed // TODO: Scanning speed
// TODO: Memory usage // TODO: Memory usage
@ -36,6 +46,8 @@ struct LDM_compressStats {
U32 numCollisions; U32 numCollisions;
U32 numHashInserts; U32 numHashInserts;
U32 offsetHistogram[32];
}; };
struct LDM_CCtx { struct LDM_CCtx {
@ -92,7 +104,19 @@ void LDM_outputHashtableOccupancy(
100.0 * (double)(ctr) / (double)hashTableSize); 100.0 * (double)(ctr) / (double)hashTableSize);
} }
// TODO: This can be done more efficienctly but is not that important as it
// is only used for computing stats.
//
static int intLog2(U32 x) {
int ret = 0;
while (x >>= 1) {
ret++;
}
return ret;
}
void LDM_printCompressStats(const LDM_compressStats *stats) { void LDM_printCompressStats(const LDM_compressStats *stats) {
int i = 0;
printf("=====================\n"); printf("=====================\n");
printf("Compression statistics\n"); printf("Compression statistics\n");
//TODO: compute percentage matched? //TODO: compute percentage matched?
@ -107,11 +131,22 @@ void LDM_printCompressStats(const LDM_compressStats *stats) {
((double)stats->totalOffset) / (double)stats->numMatches); ((double)stats->totalOffset) / (double)stats->numMatches);
printf("min offset, max offset: %u %u\n", printf("min offset, max offset: %u %u\n",
stats->minOffset, stats->maxOffset); stats->minOffset, stats->maxOffset);
printf("\n");
printf("offset histogram\n");
for (; i <= intLog2(stats->maxOffset); i++) {
printf("2^%*d: %10u\n", 2, i, stats->offsetHistogram[i]);
}
printf("\n");
printf("num collisions, num hash inserts, %% collisions: %u, %u, %.3f\n", printf("num collisions, num hash inserts, %% collisions: %u, %u, %.3f\n",
stats->numCollisions, stats->numHashInserts, stats->numCollisions, stats->numHashInserts,
stats->numHashInserts == 0 ? stats->numHashInserts == 0 ?
1.0 : (100.0 * (double)stats->numCollisions) / 1.0 : (100.0 * (double)stats->numCollisions) /
(double)stats->numHashInserts); (double)stats->numHashInserts);
printf("=====================\n");
} }
int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) { int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) {
@ -145,7 +180,7 @@ int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) {
* of the hash table. * of the hash table.
*/ */
static hash_t checksumToHash(U32 sum) { static hash_t checksumToHash(U32 sum) {
return ((sum * 2654435761U) >> ((32)-LDM_HASHLOG)); return ((sum * 2654435761U) >> (32 - LDM_HASHLOG));
} }
/** /**
@ -490,6 +525,7 @@ size_t LDM_compress(const void *src, size_t srcSize,
offset < cctx.stats.minOffset ? offset : cctx.stats.minOffset; offset < cctx.stats.minOffset ? offset : cctx.stats.minOffset;
cctx.stats.maxOffset = cctx.stats.maxOffset =
offset > cctx.stats.maxOffset ? offset : cctx.stats.maxOffset; offset > cctx.stats.maxOffset ? offset : cctx.stats.maxOffset;
cctx.stats.offsetHistogram[(U32)intLog2(offset)]++;
#endif #endif
// Move ip to end of block, inserting hashes at each position. // Move ip to end of block, inserting hashes at each position.
@ -607,7 +643,6 @@ size_t LDM_decompress(const void *src, size_t compressedSize,
// TODO: implement and test hash function // TODO: implement and test hash function
void LDM_test(void) { void LDM_test(void) {
} }
/* /*

View File

@ -11,7 +11,7 @@
#define LDM_OFFSET_SIZE 4 #define LDM_OFFSET_SIZE 4
// Defines the size of the hash table. // Defines the size of the hash table.
#define LDM_MEMORY_USAGE 22 #define LDM_MEMORY_USAGE 20
#define LDM_HASHLOG (LDM_MEMORY_USAGE-2) #define LDM_HASHLOG (LDM_MEMORY_USAGE-2)
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE)) #define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
#define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2) #define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2)
@ -19,8 +19,8 @@
#define WINDOW_SIZE (1 << 25) #define WINDOW_SIZE (1 << 25)
//These should be multiples of four. //These should be multiples of four.
#define LDM_MIN_MATCH_LENGTH 8 #define LDM_MIN_MATCH_LENGTH 4
#define LDM_HASH_LENGTH 8 #define LDM_HASH_LENGTH 4
typedef U32 offset_t; typedef U32 offset_t;
typedef U32 hash_t; typedef U32 hash_t;

View File

@ -13,7 +13,7 @@
#include "zstd.h" #include "zstd.h"
#define DEBUG #define DEBUG
//#define TEST #define TEST
/* Compress file given by fname and output to oname. /* Compress file given by fname and output to oname.
* Returns 0 if successful, error code otherwise. * Returns 0 if successful, error code otherwise.