diff --git a/contrib/long_distance_matching/Makefile b/contrib/long_distance_matching/Makefile index 8ba16d03..cff78644 100644 --- a/contrib/long_distance_matching/Makefile +++ b/contrib/long_distance_matching/Makefile @@ -27,7 +27,7 @@ default: all all: main-ldm -main-ldm : ldm.c main-ldm.c +main-ldm : ldm.h ldm.c main-ldm.c $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ clean: diff --git a/contrib/long_distance_matching/ldm.c b/contrib/long_distance_matching/ldm.c index 437feb1c..186fa08e 100644 --- a/contrib/long_distance_matching/ldm.c +++ b/contrib/long_distance_matching/ldm.c @@ -15,7 +15,7 @@ #define RUN_MASK ((1U<>= 1) { + ret++; + } + return ret; +} + void LDM_printCompressStats(const LDM_compressStats *stats) { + int i = 0; printf("=====================\n"); printf("Compression statistics\n"); //TODO: compute percentage matched? @@ -107,11 +131,22 @@ void LDM_printCompressStats(const LDM_compressStats *stats) { ((double)stats->totalOffset) / (double)stats->numMatches); printf("min offset, max offset: %u %u\n", stats->minOffset, stats->maxOffset); + + printf("\n"); + printf("offset histogram\n"); + for (; i <= intLog2(stats->maxOffset); i++) { + printf("2^%*d: %10u\n", 2, i, stats->offsetHistogram[i]); + } + printf("\n"); + + printf("num collisions, num hash inserts, %% collisions: %u, %u, %.3f\n", stats->numCollisions, stats->numHashInserts, stats->numHashInserts == 0 ? 1.0 : (100.0 * (double)stats->numCollisions) / (double)stats->numHashInserts); + printf("=====================\n"); + } int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) { @@ -145,7 +180,7 @@ int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) { * of the hash table. */ static hash_t checksumToHash(U32 sum) { - return ((sum * 2654435761U) >> ((32)-LDM_HASHLOG)); + return ((sum * 2654435761U) >> (32 - LDM_HASHLOG)); } /** @@ -490,6 +525,7 @@ size_t LDM_compress(const void *src, size_t srcSize, offset < cctx.stats.minOffset ? offset : cctx.stats.minOffset; cctx.stats.maxOffset = offset > cctx.stats.maxOffset ? offset : cctx.stats.maxOffset; + cctx.stats.offsetHistogram[(U32)intLog2(offset)]++; #endif // Move ip to end of block, inserting hashes at each position. @@ -607,7 +643,6 @@ size_t LDM_decompress(const void *src, size_t compressedSize, // TODO: implement and test hash function void LDM_test(void) { - } /* diff --git a/contrib/long_distance_matching/ldm.h b/contrib/long_distance_matching/ldm.h index 5da3c3b9..0e54faa7 100644 --- a/contrib/long_distance_matching/ldm.h +++ b/contrib/long_distance_matching/ldm.h @@ -11,7 +11,7 @@ #define LDM_OFFSET_SIZE 4 // Defines the size of the hash table. -#define LDM_MEMORY_USAGE 22 +#define LDM_MEMORY_USAGE 20 #define LDM_HASHLOG (LDM_MEMORY_USAGE-2) #define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE)) #define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2) @@ -19,8 +19,8 @@ #define WINDOW_SIZE (1 << 25) //These should be multiples of four. -#define LDM_MIN_MATCH_LENGTH 8 -#define LDM_HASH_LENGTH 8 +#define LDM_MIN_MATCH_LENGTH 4 +#define LDM_HASH_LENGTH 4 typedef U32 offset_t; typedef U32 hash_t; diff --git a/contrib/long_distance_matching/main-ldm.c b/contrib/long_distance_matching/main-ldm.c index 40afef8c..ea6375ba 100644 --- a/contrib/long_distance_matching/main-ldm.c +++ b/contrib/long_distance_matching/main-ldm.c @@ -13,7 +13,7 @@ #include "zstd.h" #define DEBUG -//#define TEST +#define TEST /* Compress file given by fname and output to oname. * Returns 0 if successful, error code otherwise.