Add header with compress and decompress size

This commit is contained in:
Stella Lau 2017-07-07 12:44:29 -07:00
parent 3bbfa1249e
commit f791fc27e3
4 changed files with 57 additions and 33 deletions

View File

@ -16,11 +16,11 @@ LDFLAGS += -lzstd
default: all default: all
all: main main-ldm all: main-ldm
main : ldm.c main.c #main : ldm.c main.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ # $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
main-ldm : ldm.c main-ldm.c main-ldm : ldm.c main-ldm.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@

View File

@ -69,8 +69,6 @@ static void LDM_writeLE16(void *memPtr, U16 value) {
} }
} }
static U32 LDM_read32(const void *ptr) { static U32 LDM_read32(const void *ptr) {
return *(const U32 *)ptr; return *(const U32 *)ptr;
} }
@ -98,17 +96,13 @@ struct hash_entry {
}; };
static U32 LDM_hash(U32 sequence) { static U32 LDM_hash(U32 sequence) {
return ((sequence * 2654435761U) >> ((MINMATCH*8)-LDM_HASHLOG)); return ((sequence * 2654435761U) >> ((32)-LDM_HASHLOG));
} }
static U32 LDM_hash_position(const void * const p) { static U32 LDM_hash_position(const void * const p) {
return LDM_hash(LDM_read32(p)); return LDM_hash(LDM_read32(p));
} }
static U64 find_best_match(tag t, U64 offset) {
return 0;
}
static void LDM_put_position_on_hash(const BYTE *p, U32 h, void *tableBase, static void LDM_put_position_on_hash(const BYTE *p, U32 h, void *tableBase,
const BYTE *srcBase) { const BYTE *srcBase) {
U32 *hashTable = (U32 *) tableBase; U32 *hashTable = (U32 *) tableBase;
@ -148,6 +142,12 @@ static unsigned LDM_count(const BYTE *pIn, const BYTE *pMatch,
return (unsigned)(pIn - pStart); return (unsigned)(pIn - pStart);
} }
void LDM_read_header(void const *source, size_t *compressed_size,
size_t *decompressed_size) {
U32 *ip = (U32 *)source;
*compressed_size = *ip++;
*decompressed_size = *ip;
}
size_t LDM_compress(void const *source, void *dest, size_t source_size, size_t LDM_compress(void const *source, void *dest, size_t source_size,
size_t max_dest_size) { size_t max_dest_size) {
@ -359,7 +359,7 @@ size_t LDM_decompress(void const *source, void *dest, size_t compressed_size,
cpy = op + length; cpy = op + length;
// Inefficient for now // Inefficient for now
while (match < cpy - offset) { while (match < cpy - offset && op < oend) {
*op++ = *match++; *op++ = *match++;
} }
} }

View File

@ -3,10 +3,17 @@
#include <stddef.h> /* size_t */ #include <stddef.h> /* size_t */
#define LDM_COMPRESS_SIZE 4
#define LDM_DECOMPRESS_SIZE 4
#define LDM_HEADER_SIZE ((LDM_COMPRESS_SIZE)+(LDM_DECOMPRESS_SIZE))
size_t LDM_compress(void const *source, void *dest, size_t source_size, size_t LDM_compress(void const *source, void *dest, size_t source_size,
size_t max_dest_size); size_t max_dest_size);
size_t LDM_decompress(void const *source, void *dest, size_t compressed_size, size_t LDM_decompress(void const *source, void *dest, size_t compressed_size,
size_t max_decompressed_size); size_t max_decompressed_size);
void LDM_read_header(void const *source, size_t *compressed_size,
size_t *decompressed_size);
#endif /* LDM_H */ #endif /* LDM_H */

View File

@ -11,10 +11,10 @@
#include <fcntl.h> #include <fcntl.h>
#include "ldm.h" #include "ldm.h"
#define BUF_SIZE 16*1024 // Block size // #define BUF_SIZE 16*1024 // Block size
#define LDM_HEADER_SIZE 8
#define DEBUG #define DEBUG
// #define ZSTD
//#define ZSTD
#if 0 #if 0
static size_t compress_file(FILE *in, FILE *out, size_t *size_in, static size_t compress_file(FILE *in, FILE *out, size_t *size_in,
@ -159,9 +159,10 @@ static size_t compress(const char *fname, const char *oname) {
perror("Fstat error"); perror("Fstat error");
return 1; return 1;
} }
size_t size_in = statbuf.st_size;
/* go to the location corresponding to the last byte */ /* go to the location corresponding to the last byte */
if (lseek(fdout, statbuf.st_size - 1, SEEK_SET) == -1) { if (lseek(fdout, size_in + LDM_HEADER_SIZE - 1, SEEK_SET) == -1) {
perror("lseek error"); perror("lseek error");
return 1; return 1;
} }
@ -178,24 +179,31 @@ static size_t compress(const char *fname, const char *oname) {
perror("mmap error for input"); perror("mmap error for input");
return 1; return 1;
} }
size_t out_size = statbuf.st_size + LDM_HEADER_SIZE;
/* mmap the output file */ /* mmap the output file */
if ((dst = mmap(0, statbuf.st_size, PROT_READ | PROT_WRITE, if ((dst = mmap(0, out_size, PROT_READ | PROT_WRITE,
MAP_SHARED, fdout, 0)) == (caddr_t) - 1) { MAP_SHARED, fdout, 0)) == (caddr_t) - 1) {
perror("mmap error for output"); perror("mmap error for output");
return 1; return 1;
} }
/* Copy input file to output file */
// memcpy(dst, src, statbuf.st_size);
#ifdef ZSTD #ifdef ZSTD
size_t size_out = ZSTD_compress(dst, statbuf.st_size, size_t size_out = ZSTD_compress(dst, statbuf.st_size,
src, statbuf.st_size, 1); src, statbuf.st_size, 1);
#else #else
size_t size_out = LDM_compress(src, dst, statbuf.st_size, size_t size_out = LDM_compress(src, dst + LDM_HEADER_SIZE, statbuf.st_size,
statbuf.st_size); statbuf.st_size);
size_out += LDM_HEADER_SIZE;
// TODO: should depend on LDM_DECOMPRESS_SIZE write32
memcpy(dst, &size_out, 4);
memcpy(dst + 4, &(statbuf.st_size), 4);
printf("Compressed size: %zu\n", size_out);
printf("Decompressed size: %zu\n", statbuf.st_size);
#endif #endif
ftruncate(fdout, size_out); ftruncate(fdout, size_out);
printf("%25s : %6u -> %7u - %s (%.1f%%)\n", fname, printf("%25s : %6u -> %7u - %s (%.1f%%)\n", fname,
(unsigned)statbuf.st_size, (unsigned)size_out, oname, (unsigned)statbuf.st_size, (unsigned)size_out, oname,
(double)size_out / (statbuf.st_size) * 100); (double)size_out / (statbuf.st_size) * 100);
@ -228,8 +236,22 @@ static size_t decompress(const char *fname, const char *oname) {
return 1; return 1;
} }
/* mmap the input file */
if ((src = mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, fdin, 0))
== (caddr_t) - 1) {
perror("mmap error for input");
return 1;
}
/* read header */
size_t compressed_size, decompressed_size;
LDM_read_header(src, &compressed_size, &decompressed_size);
printf("Size, compressed_size, decompressed_size: %zu %zu %zu\n",
statbuf.st_size, compressed_size, decompressed_size);
/* go to the location corresponding to the last byte */ /* go to the location corresponding to the last byte */
if (lseek(fdout, 2*statbuf.st_size - 1, SEEK_SET) == -1) { if (lseek(fdout, decompressed_size - 1, SEEK_SET) == -1) {
perror("lseek error"); perror("lseek error");
return 1; return 1;
} }
@ -240,15 +262,8 @@ static size_t decompress(const char *fname, const char *oname) {
return 1; return 1;
} }
/* mmap the input file */
if ((src = mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, fdin, 0))
== (caddr_t) - 1) {
perror("mmap error for input");
return 1;
}
/* mmap the output file */ /* mmap the output file */
if ((dst = mmap(0, statbuf.st_size, PROT_READ | PROT_WRITE, if ((dst = mmap(0, decompressed_size, PROT_READ | PROT_WRITE,
MAP_SHARED, fdout, 0)) == (caddr_t) - 1) { MAP_SHARED, fdout, 0)) == (caddr_t) - 1) {
perror("mmap error for output"); perror("mmap error for output");
return 1; return 1;
@ -258,13 +273,15 @@ static size_t decompress(const char *fname, const char *oname) {
// memcpy(dst, src, statbuf.st_size); // memcpy(dst, src, statbuf.st_size);
#ifdef ZSTD #ifdef ZSTD
size_t size_out = ZSTD_decompress(dst, statbuf.st_size, size_t size_out = ZSTD_decompress(dst, decomrpessed_size,
src, statbuf.st_size); src + LDM_HEADER_SIZE,
statbuf.st_size - LDM_HEADER_SIZE);
#else #else
size_t size_out = LDM_decompress(src, dst, statbuf.st_size, size_t size_out = LDM_decompress(src + LDM_HEADER_SIZE, dst,
statbuf.st_size); statbuf.st_size - LDM_HEADER_SIZE,
decompressed_size);
#endif #endif
//ftruncate(fdout, size_out); ftruncate(fdout, size_out);
close(fdin); close(fdin);
close(fdout); close(fdout);