From f791fc27e3faae15eef3dfa7ce768a05cd2773cb Mon Sep 17 00:00:00 2001 From: Stella Lau Date: Fri, 7 Jul 2017 12:44:29 -0700 Subject: [PATCH] Add header with compress and decompress size --- contrib/long_distance_matching/Makefile | 6 +-- contrib/long_distance_matching/ldm.c | 16 +++--- contrib/long_distance_matching/ldm.h | 7 +++ contrib/long_distance_matching/main-ldm.c | 61 +++++++++++++++-------- 4 files changed, 57 insertions(+), 33 deletions(-) diff --git a/contrib/long_distance_matching/Makefile b/contrib/long_distance_matching/Makefile index 0efae69b..4e04fd6a 100644 --- a/contrib/long_distance_matching/Makefile +++ b/contrib/long_distance_matching/Makefile @@ -16,11 +16,11 @@ LDFLAGS += -lzstd default: all -all: main main-ldm +all: main-ldm -main : ldm.c main.c - $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ +#main : ldm.c main.c +# $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ main-ldm : ldm.c main-ldm.c $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ diff --git a/contrib/long_distance_matching/ldm.c b/contrib/long_distance_matching/ldm.c index 908ac2ac..cb90efec 100644 --- a/contrib/long_distance_matching/ldm.c +++ b/contrib/long_distance_matching/ldm.c @@ -69,8 +69,6 @@ static void LDM_writeLE16(void *memPtr, U16 value) { } } - - static U32 LDM_read32(const void *ptr) { return *(const U32 *)ptr; } @@ -98,17 +96,13 @@ struct hash_entry { }; static U32 LDM_hash(U32 sequence) { - return ((sequence * 2654435761U) >> ((MINMATCH*8)-LDM_HASHLOG)); + return ((sequence * 2654435761U) >> ((32)-LDM_HASHLOG)); } static U32 LDM_hash_position(const void * const p) { return LDM_hash(LDM_read32(p)); } -static U64 find_best_match(tag t, U64 offset) { - return 0; -} - static void LDM_put_position_on_hash(const BYTE *p, U32 h, void *tableBase, const BYTE *srcBase) { U32 *hashTable = (U32 *) tableBase; @@ -148,6 +142,12 @@ static unsigned LDM_count(const BYTE *pIn, const BYTE *pMatch, return (unsigned)(pIn - pStart); } +void LDM_read_header(void const *source, size_t *compressed_size, + size_t *decompressed_size) { + U32 *ip = (U32 *)source; + *compressed_size = *ip++; + *decompressed_size = *ip; +} size_t LDM_compress(void const *source, void *dest, size_t source_size, size_t max_dest_size) { @@ -359,7 +359,7 @@ size_t LDM_decompress(void const *source, void *dest, size_t compressed_size, cpy = op + length; // Inefficient for now - while (match < cpy - offset) { + while (match < cpy - offset && op < oend) { *op++ = *match++; } } diff --git a/contrib/long_distance_matching/ldm.h b/contrib/long_distance_matching/ldm.h index 0aab6aa3..f4ca25a3 100644 --- a/contrib/long_distance_matching/ldm.h +++ b/contrib/long_distance_matching/ldm.h @@ -3,10 +3,17 @@ #include /* size_t */ +#define LDM_COMPRESS_SIZE 4 +#define LDM_DECOMPRESS_SIZE 4 +#define LDM_HEADER_SIZE ((LDM_COMPRESS_SIZE)+(LDM_DECOMPRESS_SIZE)) + size_t LDM_compress(void const *source, void *dest, size_t source_size, size_t max_dest_size); size_t LDM_decompress(void const *source, void *dest, size_t compressed_size, size_t max_decompressed_size); +void LDM_read_header(void const *source, size_t *compressed_size, + size_t *decompressed_size); + #endif /* LDM_H */ diff --git a/contrib/long_distance_matching/main-ldm.c b/contrib/long_distance_matching/main-ldm.c index 7f1abdab..4d54ef6d 100644 --- a/contrib/long_distance_matching/main-ldm.c +++ b/contrib/long_distance_matching/main-ldm.c @@ -11,10 +11,10 @@ #include #include "ldm.h" -#define BUF_SIZE 16*1024 // Block size -#define LDM_HEADER_SIZE 8 +// #define BUF_SIZE 16*1024 // Block size #define DEBUG -// #define ZSTD + +//#define ZSTD #if 0 static size_t compress_file(FILE *in, FILE *out, size_t *size_in, @@ -159,9 +159,10 @@ static size_t compress(const char *fname, const char *oname) { perror("Fstat error"); return 1; } + size_t size_in = statbuf.st_size; /* go to the location corresponding to the last byte */ - if (lseek(fdout, statbuf.st_size - 1, SEEK_SET) == -1) { + if (lseek(fdout, size_in + LDM_HEADER_SIZE - 1, SEEK_SET) == -1) { perror("lseek error"); return 1; } @@ -178,24 +179,31 @@ static size_t compress(const char *fname, const char *oname) { perror("mmap error for input"); return 1; } + size_t out_size = statbuf.st_size + LDM_HEADER_SIZE; /* mmap the output file */ - if ((dst = mmap(0, statbuf.st_size, PROT_READ | PROT_WRITE, + if ((dst = mmap(0, out_size, PROT_READ | PROT_WRITE, MAP_SHARED, fdout, 0)) == (caddr_t) - 1) { perror("mmap error for output"); return 1; } - /* Copy input file to output file */ -// memcpy(dst, src, statbuf.st_size); #ifdef ZSTD size_t size_out = ZSTD_compress(dst, statbuf.st_size, src, statbuf.st_size, 1); #else - size_t size_out = LDM_compress(src, dst, statbuf.st_size, + size_t size_out = LDM_compress(src, dst + LDM_HEADER_SIZE, statbuf.st_size, statbuf.st_size); + size_out += LDM_HEADER_SIZE; + + // TODO: should depend on LDM_DECOMPRESS_SIZE write32 + memcpy(dst, &size_out, 4); + memcpy(dst + 4, &(statbuf.st_size), 4); + printf("Compressed size: %zu\n", size_out); + printf("Decompressed size: %zu\n", statbuf.st_size); #endif ftruncate(fdout, size_out); + printf("%25s : %6u -> %7u - %s (%.1f%%)\n", fname, (unsigned)statbuf.st_size, (unsigned)size_out, oname, (double)size_out / (statbuf.st_size) * 100); @@ -228,8 +236,22 @@ static size_t decompress(const char *fname, const char *oname) { return 1; } + /* mmap the input file */ + if ((src = mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, fdin, 0)) + == (caddr_t) - 1) { + perror("mmap error for input"); + return 1; + } + + /* read header */ + size_t compressed_size, decompressed_size; + LDM_read_header(src, &compressed_size, &decompressed_size); + + printf("Size, compressed_size, decompressed_size: %zu %zu %zu\n", + statbuf.st_size, compressed_size, decompressed_size); + /* go to the location corresponding to the last byte */ - if (lseek(fdout, 2*statbuf.st_size - 1, SEEK_SET) == -1) { + if (lseek(fdout, decompressed_size - 1, SEEK_SET) == -1) { perror("lseek error"); return 1; } @@ -240,15 +262,8 @@ static size_t decompress(const char *fname, const char *oname) { return 1; } - /* mmap the input file */ - if ((src = mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, fdin, 0)) - == (caddr_t) - 1) { - perror("mmap error for input"); - return 1; - } - /* mmap the output file */ - if ((dst = mmap(0, statbuf.st_size, PROT_READ | PROT_WRITE, + if ((dst = mmap(0, decompressed_size, PROT_READ | PROT_WRITE, MAP_SHARED, fdout, 0)) == (caddr_t) - 1) { perror("mmap error for output"); return 1; @@ -258,13 +273,15 @@ static size_t decompress(const char *fname, const char *oname) { // memcpy(dst, src, statbuf.st_size); #ifdef ZSTD - size_t size_out = ZSTD_decompress(dst, statbuf.st_size, - src, statbuf.st_size); + size_t size_out = ZSTD_decompress(dst, decomrpessed_size, + src + LDM_HEADER_SIZE, + statbuf.st_size - LDM_HEADER_SIZE); #else - size_t size_out = LDM_decompress(src, dst, statbuf.st_size, - statbuf.st_size); + size_t size_out = LDM_decompress(src + LDM_HEADER_SIZE, dst, + statbuf.st_size - LDM_HEADER_SIZE, + decompressed_size); #endif - //ftruncate(fdout, size_out); + ftruncate(fdout, size_out); close(fdin); close(fdout);