From 0e12919bb3f6fa036e909e373f71759d30619866 Mon Sep 17 00:00:00 2001 From: Pentium44 Date: Tue, 15 Jun 2021 19:48:43 -0700 Subject: [PATCH] Remove LZ77 for LZ78, optimized --- Makefile | 4 +- docs/examples/tar.ss | 8 + src/compression.c | 340 ----------------------- src/inc/util.h | 1 + src/lexer.c | 114 ++++---- src/lz78/bitio.c | 305 +++++++++++++++++++++ src/lz78/bitio.h | 53 ++++ src/lz78/lz78.c | 638 +++++++++++++++++++++++++++++++++++++++++++ src/lz78/lz78.h | 70 +++++ src/lz78/wrapper.c | 291 ++++++++++++++++++++ src/lz78/wrapper.h | 86 ++++++ src/util.c | 10 + 12 files changed, 1526 insertions(+), 394 deletions(-) create mode 100755 docs/examples/tar.ss delete mode 100644 src/compression.c create mode 100644 src/lz78/bitio.c create mode 100644 src/lz78/bitio.h create mode 100644 src/lz78/lz78.c create mode 100644 src/lz78/lz78.h create mode 100644 src/lz78/wrapper.c create mode 100644 src/lz78/wrapper.h diff --git a/Makefile b/Makefile index 3c05cb0..a0497c7 100755 --- a/Makefile +++ b/Makefile @@ -11,13 +11,13 @@ PREFIX ?= /usr CC ?= gcc #CC ?= tcc #CC ?= musl-tcc -CFLAGS += -O2 -pedantic -g -Wall -Wextra +CFLAGS += -O3 -g -Wall -Wextra CPPFLAGS += -DVERSION=$(VERSION) -D_FORTIFY_SOURCE=2 #CPPFLAGS += -DVERSION=$(VERSION) LDFLAGS += -lm BIN ?= slidescript -SRCS=$(wildcard src/*.c) +SRCS=$(wildcard src/lz78/*.c) $(wildcard src/*.c) OBJECTS=$(SRCS:%.c=%.o) diff --git a/docs/examples/tar.ss b/docs/examples/tar.ss new file mode 100755 index 0000000..7bff79c --- /dev/null +++ b/docs/examples/tar.ss @@ -0,0 +1,8 @@ +#!/usr/bin/slidescript + +print "Compressing..." +compress "test" "docs" + +sleep "1" +print "Decompressing..." +decompress "test.tar.ss" diff --git a/src/compression.c b/src/compression.c deleted file mode 100644 index 016b050..0000000 --- a/src/compression.c +++ /dev/null @@ -1,340 +0,0 @@ -// LZ77 compression examples, simple and lightweight -// Being quick to process and execute, this will be great -// For internal compression on modern machines -// -// Andy Herbert -// lz1 https://github.com/andyherbert/lz1 -// - -#include "inc/deps.h" -#include "inc/compression.h" - -//-------------------------------------------------------------------- - -#define MY_ERR "Error: lz77" - -#define my_free(dm) \ -{ \ - if (dm == (void *) 0) \ - { fprintf (stderr, "WTH. Mooo.\n"); exit (1); }; \ - free (dm); \ - dm = (void *) 0; \ -} - -//-------------------------------------------------------------------- - -uint32_t lz77_compress (uint8_t *uncompressed_text, uint32_t uncompressed_size, uint8_t *compressed_text, uint8_t pointer_length_width) -{ - uint16_t pointer_pos, temp_pointer_pos, output_pointer, pointer_length, temp_pointer_length; - uint32_t compressed_pointer, output_size, coding_pos, output_lookahead_ref, look_behind, look_ahead; - uint16_t pointer_pos_max, pointer_length_max; - pointer_pos_max = pow(2, 16 - pointer_length_width); - pointer_length_max = pow(2, pointer_length_width); - - *((uint32_t *) compressed_text) = uncompressed_size; - *(compressed_text + 4) = pointer_length_width; - compressed_pointer = output_size = 5; - - for(coding_pos = 0; coding_pos < uncompressed_size; ++coding_pos) - { - pointer_pos = 0; - pointer_length = 0; - for(temp_pointer_pos = 1; (temp_pointer_pos < pointer_pos_max) && (temp_pointer_pos <= coding_pos); ++temp_pointer_pos) - { - look_behind = coding_pos - temp_pointer_pos; - look_ahead = coding_pos; - for(temp_pointer_length = 0; uncompressed_text[look_ahead++] == uncompressed_text[look_behind++]; ++temp_pointer_length) - { - if(temp_pointer_length == pointer_length_max) - break; - } - - if(temp_pointer_length > pointer_length) - { - pointer_pos = temp_pointer_pos; - pointer_length = temp_pointer_length; - if(pointer_length == pointer_length_max) - break; - } - } - coding_pos += pointer_length; - if((coding_pos == uncompressed_size) && pointer_length) - { - output_pointer = (pointer_length == 1) ? 0 : ((pointer_pos << pointer_length_width) | (pointer_length - 2)); - output_lookahead_ref = coding_pos - 1; - } - else - { - output_pointer = (pointer_pos << pointer_length_width) | (pointer_length ? (pointer_length - 1) : 0); - output_lookahead_ref = coding_pos; - } - *((uint16_t *) (compressed_text + compressed_pointer)) = output_pointer; - compressed_pointer += 2; - *(compressed_text + compressed_pointer++) = *(uncompressed_text + output_lookahead_ref); - output_size += 3; - } - - return output_size; -} - -uint32_t lz77_decompress (uint8_t *compressed_text, uint8_t *uncompressed_text) -{ - uint8_t pointer_length_width; - uint16_t input_pointer, pointer_length, pointer_pos, pointer_length_mask; - uint32_t compressed_pointer, coding_pos, pointer_offset, uncompressed_size; - - uncompressed_size = *((uint32_t *) compressed_text); - pointer_length_width = *(compressed_text + 4); - compressed_pointer = 5; - - pointer_length_mask = pow(2, pointer_length_width) - 1; - - for(coding_pos = 0; coding_pos < uncompressed_size; ++coding_pos) - { - input_pointer = *((uint16_t *) (compressed_text + compressed_pointer)); - compressed_pointer += 2; - pointer_pos = input_pointer >> pointer_length_width; - pointer_length = pointer_pos ? ((input_pointer & pointer_length_mask) + 1) : 0; - if(pointer_pos) - for(pointer_offset = coding_pos - pointer_pos; pointer_length > 0; --pointer_length) - uncompressed_text[coding_pos++] = uncompressed_text[pointer_offset++]; - *(uncompressed_text + coding_pos) = *(compressed_text + compressed_pointer++); - } - - return coding_pos; -} - -long fsize (FILE *in) -{ - long pos, length; - pos = ftell(in); - fseek(in, 0L, SEEK_END); - length = ftell(in); - fseek(in, pos, SEEK_SET); - return length; -} - -uint32_t ss_compress (const char *filename_in, char *filename_out, uint8_t pointer_length_width) -{ - FILE *in, *out; - uint8_t *uncompressed_text, *compressed_text; - uint32_t uncompressed_size, compressed_size; - - in = fopen(filename_in, "rb"); - if(in == NULL) - return 0; - uncompressed_size = fsize(in); - uncompressed_text = malloc(uncompressed_size + 20); - // +20 for uncompressed data size sway in algorithm - if((uncompressed_size != fread(uncompressed_text, 1, uncompressed_size, in))) - { - my_free(uncompressed_text); - return 0; - } - fclose(in); - - compressed_text = malloc((int)(uncompressed_size * 1.25)); - // * 2 for uncompressed size climb on first compress pass - compressed_size = lz77_compress(uncompressed_text, uncompressed_size, compressed_text, pointer_length_width); - - out = fopen(filename_out, "wb"); - if(out == NULL) - { - my_free(uncompressed_text); - my_free(compressed_text); - return 0; - } - - if((compressed_size != fwrite(compressed_text, 1, compressed_size, out))) - { - my_free(uncompressed_text); - my_free(compressed_text); - fclose(out); - return 0; - } - fclose(out); - - free(compressed_text); - free(uncompressed_text); - - return compressed_size; -} - -//-------------------------------------------------------------------- - -#ifndef ZERO -#define ZERO 0 -#endif - -#ifndef ONE -#define ONE 1 -#endif - -//-------------------------------------------------------------------- - -uint32_t ss_decompress -( - char *ifname, // Input -file name - char *ofname // Output -file name -) -{ - FILE *ifp; // Input -file pointer - FILE *ofp; // Output -file pointer - - uint8_t *dm_comp; // DM: Compressed data - uint8_t *dm_deco; // DM: Uncompressed data - - int orig_size; // Original size - int comp_size; // Compressed size - int deco_size; // Decompressed size - int writ_size; // Written size - -//-------------------------------------------------------------------- -// Open input file. - - if ((ifp = fopen (ifname, "rb")) == NULL) - { // Error - fprintf (stderr, - "%s: File not found: %s\n", - MY_ERR, ifname); - - return ZERO; - } - -//-------------------------------------------------------------------- -// Set up compressed-data buffer. - - // Compressed size - comp_size = (int) fsize (ifp); - // DM: Compressed data - dm_comp = (uint8_t *) malloc (comp_size + 10); - // Add some extra memory at the end of malloc comp_size - // Saves dirty archive size sway, WIP - if (dm_comp == NULL) // Error? - { // Yes - Error exit - fprintf (stderr, - "%s: Error: malloc failed\n", - MY_ERR); - - return ZERO; - } - -//-------------------------------------------------------------------- -// Read compressed data. - - if (fread (dm_comp, ONE, comp_size, ifp) != (size_t) comp_size) - { // Error - my_free (dm_comp); // Release DM - - fprintf (stderr, - "%s: Read of input data failed\n", - MY_ERR); - - return ZERO; - } - - fclose (ifp); // Close input file - -//-------------------------------------------------------------------- -// Set up decompressed-data buffer. - - orig_size = (int) *((uint32_t *) dm_comp); - dm_deco = (uint8_t *) malloc (orig_size + 20); - // +20 to cover byte sway, dirty trick for mem leak - if (dm_comp == NULL) // Error? - { // Yes - my_free (dm_comp); // Release DM - - fprintf (stderr, "%s: malloc failed\n", MY_ERR); - return ZERO; - } - -//-------------------------------------------------------------------- -// Decompress. - - deco_size = (int) lz77_decompress (dm_comp, dm_deco); - my_free (dm_comp); // Release DM - - if (deco_size < orig_size) // Error? - { // Yes - my_free (dm_deco); // Release DM - - fprintf (stderr, - "%s: deco size %d < orig size %d\n", - MY_ERR, deco_size, orig_size); - - return ZERO; - } - -//-------------------------------------------------------------------- -// Open output file. - - if ((ofp = fopen (ofname, "wb")) == NULL) - { // Error - my_free (dm_deco); // Release DM - - fprintf (stderr, - "%s: Can't open output file: %s\n", - MY_ERR, ofname); - - return ZERO; - } - -//-------------------------------------------------------------------- -// Write to output file. - - writ_size = (int) fwrite (dm_deco, ONE, deco_size, ofp); - - fclose (ofp); // Close output file - my_free (dm_deco); // Release DM - - if (writ_size != deco_size) // Error? - { // Yes - fprintf (stderr, - "%s: Bytes written %d != Data size %d\n", - MY_ERR, writ_size, deco_size); - - return ZERO; - } - -//-------------------------------------------------------------------- -// Wrap it up. - - return deco_size; // == orig_size -} - -//-------------------------------------------------------------------- - -/* -int main (int argc, char const *argv[]) -{ - FILE *in; - - char filename[129]; - char filedecout[141]; - - if(argc < 2) - { - printf("Please enter a filename: ./comp file.txt"); - } - - in = fopen(argv[1], "r"); - if(in == NULL) - return 0; - - if(strlen(argv[1]) > 128) - { - printf("Filename too long"); - return 1; - } - - sprintf(filename, "%s.ss", argv[1]); - sprintf(filedecout, "%s.1", argv[1]); - - printf("Original size: %ld\n", fsize(in)); - fclose(in); - for(uint8_t i = 1; i <= 6; ++i) - printf("Compressed (%i): %u, decompressed: (%u)\n", i, ss_compress(argv[1], filename, 20000000, i), ss_decompress(filename, filedecout)); - return 0; -} -*/ diff --git a/src/inc/util.h b/src/inc/util.h index 428c4b5..521d9c2 100644 --- a/src/inc/util.h +++ b/src/inc/util.h @@ -31,6 +31,7 @@ void syn_error(char *message); void syn_warn(char *message); +long fsize (FILE *in); char *strip_nl(char *string); int file_exists(char *path); int is_dir(char *path); diff --git a/src/lexer.c b/src/lexer.c index 229bf7a..e1abe54 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -18,7 +18,7 @@ // For slidescript compression algorithm #include "inc/tar.h" -#include "inc/compression.h" +#include "lz78/wrapper.h" #define strtok_next(s) strtok_r (NULL, s, &strtok_save) @@ -487,9 +487,14 @@ char *process_line (char *line) { char *filename; struct tar_t *archive = NULL; - int fd; + int fd, lzret; + wrapper *lzwrapper; + int bsize = B_SIZE_DEFAULT; + uint8_t w_mode = WRAPPER_MODE_DECOMPRESS; + uint8_t w_type = LZ78_ALGORITHM; + + retbuf = qmalloc(QM_SS, 8129); - tar_free_pool(); tok_srch = strtok_next ("\""); if (tok_srch == NULL) { @@ -505,27 +510,33 @@ char *process_line (char *line) filename = parse_vars(tok_srch); - FILE *in; - - char origsize[128]; - char filedecout[MAX_FILENAME_LEN+5]; - - in = fopen(filename, "rb"); - if (in == NULL) - { - x_warn("ss:warn:compress, failed to open tar for compression"); - return NULL; - } + char filedecout[128]; sprintf(filedecout, "uncompressed.tar"); - sprintf(origsize, "%ld", fsize(in)); + if (bsize <= 0) { + x_warn("ss:warn:compress, default buffer not set?"); + return NULL; + } - uint32_t deco_return = ss_decompress(filename, filedecout); - retbuf = qmalloc(QM_SS, (sizeof(deco_return) + strlen(filename) + strlen(origsize) + 40)); - sprintf(retbuf, "ss: %s: decompressed: %s -> %u", filename, origsize, deco_return); + /* Creates a wrapper instance */ + lzwrapper = wrapper_new(w_mode, w_type, NULL); + if (lzwrapper == NULL) { + x_warn("ss:warn:decompress, failed to open lz78 wrapper socket!"); + return NULL; + } - fclose(in); + /* Executes the wrapper function */ + lzret = wrapper_exec(lzwrapper, filename, filedecout); + + if (lzret != WRAPPER_SUCCESS) + { + x_warn("ss:warn:decompress, failed to decompress tarball: %d", lzret); + return NULL; + } + + /* Destroyes the wrapper instance */ + wrapper_destroy(lzwrapper); // open existing file if ((fd = open(filedecout, O_RDWR)) < 0) { @@ -562,9 +573,14 @@ char *process_line (char *line) else if (strncmp("compress",tok_srch,8) == 0) { char filename[MAX_FILENAME_LEN+1]; // Files to be added into the archive - char comp_size[128]; struct tar_t *archive = NULL; - int fd; + int fd, lzret; + wrapper *lzwrapper; + int bsize = B_SIZE_DEFAULT; + uint8_t w_mode = WRAPPER_MODE_COMPRESS; + uint8_t w_type = LZ78_ALGORITHM; + + retbuf = qmalloc(QM_SS, 8129); tar_free_pool(); tok_srch = strtok_next ("\""); @@ -645,45 +661,39 @@ char *process_line (char *line) close(fd); // don't bother checking for fd < 0 - FILE *in; - - char origsize[128]; char file_comp[MAX_FILENAME_LEN+9]; - char filedecout[MAX_FILENAME_LEN+10]; - in = fopen(filename, "rb"); - if (in == NULL) - { - x_warn("ss:warn:compress, failed to open tar for compression"); + sprintf(file_comp, "%s.tar.ss", filename); + + if (bsize <= 0) { + x_warn("ss:warn:compress, default buffer not set?"); return NULL; } - sprintf(file_comp, "%s.tar.ss", filename); - sprintf(filedecout, "%s.tar.1", filename); - - sprintf(origsize, "%ld", fsize(in)); - - sprintf(comp_size, "%s", origsize); - - fclose(in); - for(uint32_t i = 1; i < 7; ++i) - { - uint32_t comp_return = ss_compress(filename, file_comp, i); - uint32_t deco_return = ss_decompress(file_comp, filedecout); - if (atoi(comp_size) < (int)comp_return && comp_return != 0 && deco_return != 0 && i != 1) - { - retbuf = qmalloc(QM_SS, sizeof(deco_return) + sizeof(comp_return) + strlen(file_comp) + 40); - sprintf(retbuf, "ss: %s: compressed: %u -> %u", file_comp, deco_return, comp_return); - break; - } - - sprintf(comp_size, "%u", comp_return); - printf("pass %u decompressed/compressed: %u/%u\n", i, deco_return, comp_return); - fflush(stdout); + /* Creates a wrapper instance */ + lzwrapper = wrapper_new(w_mode, w_type, NULL); + if (lzwrapper == NULL) { + x_warn("ss:warn:compress, failed to open lz78 wrapper socket!"); + return NULL; } + /* Executes the wrapper function */ + lzret = wrapper_exec(lzwrapper, filename, file_comp); + + if (lzret != WRAPPER_SUCCESS) + { + x_warn("ss:warn:compress, error on compress: %d", lzret); + return NULL; + } + else + { + sprintf(retbuf, "Compressed -> %s.tar.ss", filename); + } + + /* Destroyes the wrapper instance */ + wrapper_destroy(lzwrapper); + // Remove the decompressed version for sanity check - remove(filedecout); remove(filename); return retbuf; diff --git a/src/lz78/bitio.c b/src/lz78/bitio.c new file mode 100644 index 0000000..4cdc7c7 --- /dev/null +++ b/src/lz78/bitio.c @@ -0,0 +1,305 @@ +/* +* Basic implementation of LZ78 compression algorithm +* +* Copyright (C) 2010 evilaliv3 +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include +#include +#include +#include + +#include "bitio.h" + +/* Struct of bitfile */ +struct __bit_file { + int fd; /* File descriptor */ + int mode; /* Mode: read 0, write 1 */ + UINTMAX_T buff_size; /* Buffer size (bits) */ + UINTMAX_T w_start; /* Window start (bits) */ + UINTMAX_T w_len; /* Window length (bits) */ + char buff[1]; /* Buffer (contiguous memory area) */ +}; + +/* Return max value that can be represented using UINTMAX_T */ +UINTMAX_T max_index() { + UINTMAX_T max = -1; + max /= 8; + return max; +} + +bit_file* bit_open(int fd, int mode, UINTMAX_T buff_size) { + + bit_file* bfp; + int ret; + + if (mode != ACCESS_READ && mode != ACCESS_WRITE) + return NULL; + + if (mode == ACCESS_READ) + ret = read(fd, NULL, 0); + else /* mode == ACCESS_WRITE */ + ret = write(fd, NULL, 0); + + if(ret != 0) + return NULL; + + if (buff_size % 8 != 0) + return NULL; + + buff_size = (buff_size > max_index()) ? max_index() : buff_size; + + /* Buffer allocation */ + bfp = (bit_file*) calloc(1, sizeof(bit_file) + buff_size / 8); + if (bfp == NULL) { + close(fd); + } else { + bfp->fd = fd; + bfp->mode = mode; + bfp->buff_size = buff_size; + /* bfp->w_start and bfp->w_len are initialized by calloc */ + } + + return bfp; +} + +int bit_read(bit_file* bfp, char* buff_out, UINTMAX_T n_bits, uint8_t ofs) { + uint8_t* base; + uint8_t mask; + uint8_t r_mask; + uint8_t writebit; + const uint8_t* readptr; + UINTMAX_T buff_ready_bytes; + UINTMAX_T bits_read = 0; + UINTMAX_T bits_read_total = 0; + UINTMAX_T buff_size; + UINTMAX_T w_start; + UINTMAX_T w_len; + UINTMAX_T c; + uint8_t aligned; + + if (bfp == NULL || buff_out == NULL || ofs > 7) + return -1; + + if (bfp->mode != ACCESS_READ) + return -1; + + buff_size = bfp->buff_size; + w_start = bfp->w_start; + w_len = bfp->w_len; + + mask = 1 << ofs; + base = (uint8_t*) buff_out; + + /* Check if input ad output are aligned to byte */ + aligned = (mask == 1 && (w_start % 8 == 0)) ? 1 : 0; + + while (n_bits > 0) { + /* Buffer refill if needed */ + if (w_len == 0) { + c = read(bfp->fd, bfp->buff, buff_size / 8); + if (c == (uint32_t)-1) { + if (errno == EAGAIN) { + errno = 0; + break; + } else { + return -1; + } + } else if (c == 0) { + break; + } + + w_start = 0; + w_len = c * 8; + } + + readptr = (uint8_t*)&(bfp->buff) + w_start / 8; + + if (aligned && w_len > 7 && n_bits >= w_len) { + /* Optimization: due to alignment we can use memcpy */ + buff_ready_bytes = w_len / 8; + memcpy(base, readptr, buff_ready_bytes); + base += buff_ready_bytes; + + bits_read = buff_ready_bytes * 8; + w_start = (w_start + bits_read) % buff_size; + w_len -= bits_read; + n_bits -= bits_read; + bits_read_total += bits_read; + } else { + /* Single bit read */ + r_mask = 1 << w_start % 8; + + writebit = (*readptr & r_mask) ? 1 : 0; + if (writebit == 0) { + *base &= ~mask; + } else { + *base |= mask; + } + + w_start = ((w_start + 1) % buff_size); + --w_len; + --n_bits; + ++bits_read_total; + + if (mask == 0x80) { + mask = 1; + ++base; + aligned = (mask == 1 && (w_start % 8 == 0)) ? 1 : 0; + } else { + mask <<= 1; + } + } + } + + /* Update */ + bfp->buff_size = buff_size; + bfp->w_start = w_start; + bfp->w_len = w_len; + + return bits_read_total; +} + +int bit_write(bit_file* bfp, const char* buff_in, UINTMAX_T n_bits, uint8_t ofs) { + UINTMAX_T ret = 0; + const uint8_t* base; + uint8_t mask; + uint8_t readbit; + uint8_t* writeptr; + UINTMAX_T pos; + UINTMAX_T buff_free_bits; + UINTMAX_T buff_free_bytes; + UINTMAX_T bits_written = 0; + uint8_t aligned; + + if (bfp == NULL || buff_in == NULL || ofs > 7) + return -1; + + if (bfp->mode != ACCESS_WRITE) + return -1; + + mask = 1 << ofs; + base = (uint8_t*)buff_in; + + pos = bfp->w_start + bfp->w_len; + buff_free_bits = bfp->buff_size - bfp->w_len; + + /* Check if input ad output are aligned to byte */ + aligned = (mask == 1 && (pos % 8 == 0)) ? 1 : 0; + + while (n_bits > 0) { + writeptr = (uint8_t*)&(bfp->buff) + pos / 8; + + if (aligned && buff_free_bits > 7 && n_bits >= buff_free_bits) { + /* Optimization: due to alignment we can use memcpy */ + buff_free_bytes = buff_free_bits / 8; + memcpy(writeptr, base, buff_free_bytes); + base += buff_free_bytes; + bits_written = buff_free_bytes * 8; + + pos += bits_written; + bfp->w_len += bits_written; + n_bits -= bits_written; + ret += bits_written; + buff_free_bits -= bits_written; + } else { + /* Single bit write */ + readbit = (*base & mask) ? 1 : 0; + if (readbit == 0) { + *writeptr &= ~(1 << pos % 8); + } else { + *writeptr |= (1 << pos % 8); + } + + if (mask == 0x80) { + mask = 1; + ++base; + aligned = (mask == 1 && (pos % 8 == 0)) ? 1 : 0; + } else { + mask <<= 1; + } + + ++pos; + ++(bfp->w_len); + --(n_bits); + --buff_free_bits; + ++ret; + } + + /* Flush if needed */ + if (bfp->w_len == bfp->buff_size) { + if (bit_flush(bfp) == -1) + return -1; + if (bfp->w_len != 0) + return ret; + pos = bfp->w_start + bfp->w_len; + buff_free_bits = bfp->buff_size; + } + + } + + return ret; +} + +int bit_flush(bit_file* bfp) { + UINTMAX_T count; + UINTMAX_T written; + UINTMAX_T n; + uint8_t* base; + + if (bfp == NULL) + return -1; + + count = bfp->w_len / 8; + written = 0; + base = (uint8_t*) bfp->buff + bfp->w_start / 8; + + while (count > 0) { + n = write(bfp->fd, base, count); + if (n == (uint32_t)-1) { + if (errno == EAGAIN) { + errno = 0; + break; + } else { + return -1; + } + } + base += n; + written += n; + count -= n; + } + + bfp->w_start = (bfp->w_start + written * 8) % bfp->buff_size; + bfp->w_len -= written * 8; + return 0; +} + +int bit_close(bit_file* bfp) { + int fd; + + if (bfp == NULL) + return -1; + + fd = bfp->fd; + + if (bfp->w_len % 8) + bfp->w_len += 8 - (bfp->w_len % 8); + + bit_flush(bfp); + free(bfp); + close(fd); + return 0; +} diff --git a/src/lz78/bitio.h b/src/lz78/bitio.h new file mode 100644 index 0000000..15e745b --- /dev/null +++ b/src/lz78/bitio.h @@ -0,0 +1,53 @@ +/* +* Basic implementation of LZ78 compression algorithm +* +* Copyright (C) 2010 evilaliv3 +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#ifndef __BITIO_H +#define __BITIO_H + +#include +#include +#include + +#define B_SIZE_DEFAULT 1048576 + +#define UINTMAX_T uint32_t + +/* Access mode for reading and writing */ +#define ACCESS_READ (O_RDONLY | O_NONBLOCK) +#define ACCESS_WRITE (O_WRONLY | O_CREAT | O_TRUNC | O_NONBLOCK) + +/* The opaque type used for bitwise streams */ +typedef struct __bit_file bit_file; + +/* Creates a new bit_file opening a file f with the specificated mode and size */ +bit_file* bit_open(int fd, int mode, UINTMAX_T bufsize); + +/* Does a memory read (occasionally an i/o read) */ +int bit_read(bit_file* bf, char* base, UINTMAX_T n_bits, uint8_t ofs); + +/* Does a memory write (occasionally an i/o flush) */ +int bit_write(bit_file* bf, const char* base, UINTMAX_T n_bits, uint8_t ofs); + +/* Effectively swap out the buffer into memory */ +int bit_flush(bit_file* bf); + +/* Relases the resources allocated by the bit_file */ +int bit_close(bit_file* bf); + +#endif /* __BITIO_H */ diff --git a/src/lz78/lz78.c b/src/lz78/lz78.c new file mode 100644 index 0000000..cc342b7 --- /dev/null +++ b/src/lz78/lz78.c @@ -0,0 +1,638 @@ +/* +* Basic implementation of LZ78 compression algorithm +* +* Copyright (C) 2010 evilaliv3 +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include +#include +#include +#include +#include + +#include "lz78.h" + +/* Code used to represent an EOF */ +#define DICT_CODE_EOF 256 +/* Code used before to send the size of the dictionary */ +#define DICT_CODE_SIZE 257 +/* Code used by the compressor to start the operations */ +#define DICT_CODE_START 258 +/* Code used by the compressor to stop the operations */ +#define DICT_CODE_STOP 259 + +/* Limits dict_size inside [DICT_SIZE_MIN, DICT_SIZE_MAX] */ +#ifndef DICT_LIMIT +#define DICT_LIMIT(x) (((x) < (DICT_SIZE_MIN + 1)) ? (DICT_SIZE_MIN + 1) : (((x) > (DICT_SIZE_MAX)) ? (DICT_SIZE_MAX) : (x))) +#endif + +/* Compute the threshold for the start of secondary dictionary */ +#define DICT_SIZE_THRESHOLD(x) ((x) * 8 / 10) + +/* Entry of the hash table used by the compressor to encode data */ +struct __ht_entry { + uint8_t used; /* Flag indicating if the node is used or not */ + uint32_t parent; /* Parent node */ + uint16_t label; /* Node's label */ + uint32_t child; /* Child node */ +}; + +/* The opaque type of hash table entry used by the compressor */ +typedef struct __ht_entry ht_entry; + +/* Dictionary of the compressor implemented as an hash table */ +struct __ht_dictionary { + ht_entry* root; /* Root node of the dictionary */ + uint32_t cur_node; /* Current position inside the dictionary */ + uint32_t prev_node; /* Pointer to the father of cur_node */ + uint32_t d_size; /* Size of the dictionary */ + uint32_t d_thr; /* Threshold for activation of secondary dictionary */ + uint32_t d_next; /* Next code to put in the dictionary */ +}; + +/* The opaque type representing the dictionary used by the compressor */ +typedef struct __ht_dictionary ht_dictionary; + +/* State of a compressor */ +struct __lz78_c { + uint8_t completed; /* Termination flag */ + uint32_t d_size; /* Size of the dictionaries */ + ht_dictionary* main; /* Main dictionary */ + ht_dictionary* secondary; /* Secondary dictionary */ + uint32_t bitbuf; /* Buffer containing bits not yet written */ + uint32_t n_bits; /* Number of valid bits in the buffer */ +}; + +/* The opaque type representing the state of the compressor */ +typedef struct __lz78_c lz78_c; + +/* Entry of the dictionary used by the decompressor */ +struct __entry { + uint32_t parent; /* Parent node */ + uint16_t label; /* Node's label */ +}; + +/* The opaque type of a dictionary entry used by the decompressor */ +typedef struct __entry entry; + +/* Dictionary of the decompressor */ +struct __dictionary { + entry* root; /* Root node of the dictionary */ + uint32_t d_size; /* Size of the dictionray */ + uint32_t d_thr; /* Threshold for activation of secondary dictionary */ + uint32_t d_min; /* Minimum size of the dictionary */ + uint32_t d_next; /* Next code to put in the dictionary */ + uint32_t n_bytes; /* Number of bytes contained in bytebuf */ + uint32_t offset; /* Offset of the first valid byte inside bytebuf */ + char bytebuf[1]; /* Buffer used to output strings */ +}; + +/* The opaque type representing the dictionary used by the decompressor */ +typedef struct __dictionary dictionary; + +/* State of the decompressor */ +struct __lz78_d { + uint8_t completed; /* Termination flag */ + dictionary* main; /* Main dictionary */ + ht_dictionary* secondary; /* Secondary dictionary */ + uint32_t bitbuf; /* Buffer containing bits not yet written */ + uint32_t n_bits; /* Number of valid bits contained in the buffer */ +}; + +/* The opaque type representing the status of the decompressor */ +typedef struct __lz78_d lz78_d; + +/* lz78 instance descriptor */ +struct __lz78_instance { + uint8_t mode; /* Discriminate compression operations */ + char state[1]; /* Compression/Decompression state struct */ +}; + +/* Return the number of bits needed to represent the given number */ +uint8_t bitlen(uint32_t i); + +/* Create a new ht_dictionary to be used for the compression */ +ht_dictionary* ht_dictionary_new(uint32_t d_size); + +/* Update the dictionary depending with input byte + Return: + 0 a new entry have been put in the dictionary + -1 switch the current node + */ +int ht_dictionary_update(ht_dictionary* d, uint16_t label); + +/* Reset the dictionary associated to the given compressor */ +void ht_dictionary_reset(ht_dictionary* d); + +/* Destroy the given ht_dictionary object */ +void ht_dictionary_destroy(ht_dictionary* d); + +/* Create a new dictionary to be used for the decompression */ +dictionary* dictionary_new(uint32_t d_size); + +/* Update the internal state of the dictionary */ +void dictionary_update(dictionary* d, uint32_t code); + +/* Reset the dictionary associated to the given decompressor */ +void dictionary_reset(dictionary* d); + +/* Destroy the given dictionary object */ +void dictionary_destroy(dictionary* d); + +/* Compress the input byte and modifiy the state of the dictionary */ +void compress_byte(lz78_c* o, int c_in); + +/* Decompress the input code and modify the state of the dictionary */ +int decompress_code(lz78_d* o, uint32_t code); + +uint8_t bitlen(uint32_t i) { + uint8_t n = 0; + while (i) { + ++n; + i >>= 1; + } + return n; +} + +ht_dictionary* ht_dictionary_new(uint32_t d_size) { + ht_dictionary* dict = malloc(sizeof(ht_dictionary)); + if (dict == NULL) + return NULL; + + d_size = DICT_LIMIT(d_size); + dict->root = calloc(1, sizeof(ht_entry) * d_size); + if (dict->root == NULL) { + free(dict); + return NULL; + } else { + dict->d_size = d_size; + dict->d_thr = DICT_SIZE_THRESHOLD(d_size); + dict->d_next = DICT_SIZE_MIN; + dict->cur_node = -1; + return dict; + } +} + +int ht_dictionary_update(ht_dictionary* d, uint16_t label) { + uint8_t i; + uint32_t key; + uint32_t hash; + d->prev_node = d->cur_node; + + if (d->cur_node == (uint32_t)-1) { + d->cur_node = label; + return -1; + } + + /* Bernstein hash function */ + key = (label << bitlen(d->d_size)) + d->cur_node; + hash = 0; + for (i = 0; i < 4; ++i) { + hash = ((hash << 5) + hash) + (key & 0xFF); + key >>= 8; + } + hash %= d->d_size; + + /* Search if current sequence is present, else return an empty hash entry + where insert it */ + while (d->root[hash].used) { + if (d->root[hash].parent == d->cur_node && + d->root[hash].label == label) { + d->cur_node = d->root[hash].child; + return -1; + } else { + /* Collision (linear search) */ + hash = (hash + 1) % d->d_size; + } + } + + /* At this point, in d->prev_node there is the symbol we will send */ + + /* Fill out hash entry */ + d->root[hash].used = 1; + d->root[hash].parent = d->prev_node; + d->root[hash].label = label; + d->root[hash].child = d->d_next; + /* Update current node */ + d->cur_node = label; + /* Update next symbol */ + ++(d->d_next); + + return 0; +} + +void ht_dictionary_reset(ht_dictionary* d) { + memset(d->root, 0, sizeof(ht_entry) * d->d_size); + d->d_next = DICT_SIZE_MIN; + d->cur_node = -1; +} + +void ht_dictionary_destroy(ht_dictionary* d) { + if (d != NULL) + free(d); +} + +dictionary* dictionary_new(uint32_t d_size) { + uint16_t i; + dictionary* dict = malloc(sizeof(dictionary) + d_size); + if (dict == NULL) + return NULL; + + d_size = DICT_LIMIT(d_size); + dict->root = malloc(sizeof(entry) * d_size); + if (dict->root == NULL) { + free(dict); + return NULL; + } + + dict->d_size = d_size; + dict->d_thr = DICT_SIZE_THRESHOLD(d_size); + dict->d_min = DICT_SIZE_MIN; + dict->d_next = DICT_SIZE_MIN; + for (i = 0; i < DICT_SIZE_MIN; ++i) { + dict->root[i].parent = 0; + dict->root[i].label = i; + } + return dict; +} + +void dictionary_update(dictionary* d, uint32_t code) { + uint32_t d_size = d->d_size - 1; + uint32_t d_next = d->d_next; + uint32_t d_min = d->d_min; + uint32_t i = d_size; + uint32_t p = code; + + /* Recover original sequence */ + while (1) { + d->bytebuf[i--] = d->root[p].label; + if (p < DICT_SIZE_MIN || i == 0) + break; + p = d->root[p].parent; + } + + /* Fill last char with the first char of the sequence */ + if (code >= d_min && code == d_next - 1) + d->bytebuf[d_size] = d->bytebuf[i + 1]; + + /* Update last incomplete entry of the dictionary */ + if (d_next > d_min) + d->root[d_next - 1].label = d->bytebuf[i + 1]; + + /* Update */ + d->n_bytes = d_size - i; + d->offset = d_size + 1 - d->n_bytes; + d->root[d_next].parent = code; + ++(d->d_next); +} + +void dictionary_reset(dictionary* d) { + d->d_min = DICT_SIZE_MIN; + d->d_next = DICT_SIZE_MIN; +} + +void dictionary_destroy(dictionary* d) { + if (d != NULL) { + free(d->root); + free(d); + } +} + +void compress_byte(lz78_c* o, int c_in) { + /* Optimization pointers */ + ht_dictionary* d_main = o->main; + ht_dictionary* d_sec = o->secondary; + + switch(d_main->cur_node) { + case DICT_CODE_START: + o->bitbuf = d_main->d_size; + o->n_bits = bitlen(DICT_SIZE_MAX); + d_main->cur_node = -1; + break; + case DICT_CODE_EOF: + o->bitbuf = d_main->cur_node; + o->n_bits = bitlen(d_main->d_next); + d_main->cur_node = DICT_CODE_STOP; + return; + case DICT_CODE_STOP: + o->completed = 1; + return; + default: + break; + } + + c_in = c_in == EOF ? DICT_CODE_EOF : c_in; + /* Dictonaries update */ + if (ht_dictionary_update(d_main, c_in) != 0) { + if (d_main->d_next >= d_main->d_thr) + ht_dictionary_update(d_sec, c_in); + return; + } + + o->bitbuf = d_main->prev_node; + o->n_bits = bitlen(d_main->d_next - 1); + + /* Dictonaries swap */ + if (d_main->d_next == d_main->d_size) { + o->main = o->secondary; + o->secondary = d_main; + d_main = d_sec; + d_sec = o->secondary; + d_main->cur_node = c_in; + ht_dictionary_reset(d_sec); + } + /* Update of secondary if threshold is reached */ + if (d_main->d_next >= d_main->d_thr) + ht_dictionary_update(d_sec, c_in); +} + +int decompress_code(lz78_d* o, uint32_t code) { + uint32_t i; + int c_in; + /* Optimization pointers */ + dictionary* d_main = o->main; + ht_dictionary* d_sec = o->secondary; + + switch(code) { + case DICT_CODE_EOF: + o->completed = 1; + return 0; + case DICT_CODE_START: + case DICT_CODE_SIZE: + d_main->d_next = DICT_SIZE_MAX; + o->n_bits = 0; + return 0; + default: + /* Initial operations */ + if (d_main->d_next == DICT_SIZE_MAX) { + dictionary_destroy(d_main); + d_main = dictionary_new(code); + o->main = d_main; + if (d_main == NULL) + return -1; + ht_dictionary_destroy(d_sec); + d_sec = ht_dictionary_new(code); + o->secondary = d_sec; + if (d_sec == NULL) { + dictionary_destroy(d_main); + o->main = NULL; + return -1; + } + o->bitbuf = 0; + o->n_bits = 0; + return 0; + } + break; + } + + /* Bad compressed file */ + if (d_sec == NULL || d_main == NULL) + return -2; + + dictionary_update(d_main, code); + + /* Update of secondary if threshold is reached */ + if (d_main->d_next > d_main->d_thr) { + for (i = 0; i < d_main->n_bytes; ++i) { + c_in = (uint8_t) d_main->bytebuf[d_main->offset + i]; + ht_dictionary_update(d_sec, c_in); + } + } + + /* Dictonaries swap */ + if (d_main->d_next == d_main->d_size) { + dictionary_reset(d_main); + d_main->d_min = d_sec->d_next; + d_main->d_next = d_sec->d_next; + for (i = 0; i < d_sec->d_size && d_sec->d_next; ++i) { + if (d_sec->root[i].used) { + d_main->root[d_sec->root[i].child].parent = + d_sec->root[i].parent; + d_main->root[d_sec->root[i].child].label = + d_sec->root[i].label; + --(d_sec->d_next); + } + } + ht_dictionary_reset(d_sec); + } + return 0; +} + +lz78_instance* lz78_new(uint8_t cmode, uint32_t dsize) { + lz78_instance* i; + lz78_c* c; + lz78_d* d; + + int max_dim = (sizeof(lz78_c) > sizeof(lz78_d)) ? sizeof(lz78_c) : sizeof(lz78_d); + i = malloc(sizeof(lz78_instance) + max_dim); + if (i == NULL) + return NULL; + i->mode = cmode; + + switch (cmode) { + case LZ78_MODE_COMPRESS: + c = (lz78_c*)&i->state; + dsize = (dsize == 0) ? DICT_SIZE_DEFAULT : dsize; + c->d_size = DICT_LIMIT(dsize); + c->completed = 0; + c->main = ht_dictionary_new(c->d_size); + if (c->main == NULL) { + free(i); + return NULL; + } + c->secondary = ht_dictionary_new(c->d_size); + if (c->secondary == NULL) { + ht_dictionary_destroy(c->main); + free(i); + return NULL; + } + c->bitbuf = DICT_CODE_START; + c->n_bits = bitlen(DICT_SIZE_MIN); + c->main->cur_node = DICT_CODE_START; + return i; + + case LZ78_MODE_DECOMPRESS: + d = (lz78_d*)&i->state; + d->completed = 0; + d->main = dictionary_new(DICT_SIZE_MIN); + if (d->main == NULL) { + free(i); + return NULL; + } + return i; + + default: + return NULL; + } +} + +uint8_t lz78_compress(lz78_instance* lz78, int fd_in, int fd_out) { + FILE* in; + bit_file* out; + lz78_c* o; + int bits; + int c_in; + + if (lz78 == NULL) + return LZ78_ERROR_INITIALIZATION; + + if (lz78->mode != LZ78_MODE_COMPRESS) + return LZ78_ERROR_MODE; + + in = fdopen(fd_in, "r"); + if (in == NULL) + return LZ78_ERROR_READ; + + out = bit_open(fd_out, ACCESS_WRITE, B_SIZE_DEFAULT); + if (out == NULL) + return LZ78_ERROR_WRITE; + + o = (lz78_c*)&lz78->state; + + for (;;) { + + if (o->n_bits > 0) { + bits = bit_write(out, (char*) &o->bitbuf, o->n_bits, 0); + if (bits == -1) + return LZ78_ERROR_WRITE; + + o->bitbuf >>= bits; + o->n_bits -= bits; + + if (o->n_bits > 0) + return LZ78_ERROR_EAGAIN; + } + + c_in = fgetc(in); + if (c_in == EOF) { + if (errno == EAGAIN) { + errno = 0; + return LZ78_ERROR_EAGAIN; + } else if (errno != 0) { + return LZ78_ERROR_READ; + } + } + + compress_byte(o, c_in); + if (o->completed == 1) { + bit_close(out); + return LZ78_SUCCESS; + } + } +} + +uint8_t lz78_decompress(lz78_instance* lz78, int fd_in, int fd_out) { + bit_file* in; + FILE* out; + lz78_d* o; + dictionary* d_main; + uint32_t bits, written; + int ret; + + if (lz78 == NULL) + return LZ78_ERROR_INITIALIZATION; + + if (lz78->mode != LZ78_MODE_DECOMPRESS) + return LZ78_ERROR_MODE; + + in = bit_open(fd_in, ACCESS_READ, B_SIZE_DEFAULT); + if (in == NULL) + return LZ78_ERROR_READ; + + out = fdopen(fd_out, "w"); + if (out == NULL) + return LZ78_ERROR_WRITE; + + o = (lz78_d*) &lz78->state; + + for (;;) { + /* Optimization pointer (MUST be init every cycle) */ + d_main = o->main; + if (d_main->n_bytes) { + written = 0; + while (written != d_main->n_bytes) { + ret = fwrite(d_main->bytebuf + d_main->offset + written, 1, + d_main->n_bytes - written, out); + if (ret == -1) { + d_main->offset += written; + d_main->n_bytes -= written; + if (errno == EAGAIN) { + errno = 0; + return LZ78_ERROR_EAGAIN; + } else { + return LZ78_ERROR_WRITE; + } + } + written += ret; + } + } + + o->bitbuf = 0; + o->n_bits = 0; + bits = bitlen(d_main->d_next); + + if (bits > 0) { + ret = bit_read(in, (char*) &o->bitbuf, bits, 0); + if (ret == -1) + return LZ78_ERROR_READ; + + o->n_bits = ret; + if (bits != o->n_bits) + return LZ78_ERROR_EAGAIN; + } + + ret = decompress_code(o, o->bitbuf); + if (ret < 0) { + switch(ret) { + case -1: + return LZ78_ERROR_DICTIONARY; + case -2: + return LZ78_ERROR_DECOMPRESS; + } + } + + if (o->completed == 1) { + fflush(out); + return LZ78_SUCCESS; + } + } +} + +void lz78_destroy(lz78_instance *lz78) { + lz78_c *c; + lz78_d *d; + if (lz78 != NULL) { + switch (lz78->mode) { + case LZ78_MODE_COMPRESS: + c = (lz78_c*)&lz78->state; + if (c != NULL) { + ht_dictionary_destroy(c->main); + ht_dictionary_destroy(c->secondary); + } + break; + + case LZ78_MODE_DECOMPRESS: + d = (lz78_d*)&lz78->state; + if (d != NULL) { + dictionary_destroy(d->main); + ht_dictionary_destroy(d->secondary); + } + break; + } + + free(lz78); + } +} diff --git a/src/lz78/lz78.h b/src/lz78/lz78.h new file mode 100644 index 0000000..2f84ef6 --- /dev/null +++ b/src/lz78/lz78.h @@ -0,0 +1,70 @@ +/* +* Basic implementation of LZ78 compression algorithm +* +* Copyright (C) 2010 evilaliv3 +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#ifndef __LZ78_H +#define __LZ78_H + +#include "bitio.h" + +/* Modes of compression */ +#define LZ78_MODE_COMPRESS 0 +#define LZ78_MODE_DECOMPRESS 1 + +/* List of lz78-level return codes */ +#define LZ78_SUCCESS 0 +#define LZ78_ERROR_DICTIONARY 1 +#define LZ78_ERROR_READ 2 +#define LZ78_ERROR_WRITE 3 +#define LZ78_ERROR_EAGAIN 4 +#define LZ78_ERROR_COMPRESS 5 +#define LZ78_ERROR_DECOMPRESS 6 +#define LZ78_ERROR_INITIALIZATION 7 +#define LZ78_ERROR_MODE 8 + +/* Size of the dictionary */ +#define DICT_SIZE_MIN 260 +#define DICT_SIZE_DEFAULT 4096 +#define DICT_SIZE_MAX 1048576 + +/* Opaque type representing the compression instance */ +typedef struct __lz78_instance lz78_instance; + +/* Allocate and return an instance of lz78 compressor + cmode: specify compress/decompress mode + dsize: specify the size of the dictionary (byte) + */ +lz78_instance* lz78_new(uint8_t cmode, uint32_t dsize); + +/* Compress the input stream by sending the result to the output stream + arg: current instance of compressor obtained by invoking lz78_init() + Return: one of defined lz78-level return codes + */ +uint8_t lz78_compress(lz78_instance* lz78, int fd_in, int fd_out); + +/* Decompress the input stream by sending the result to the output stream + arg: current instance of compressor obtained by invoking lz78_init() + Return: one of defined lz78-level return codes + */ +uint8_t lz78_decompress(lz78_instance* lz78, int fd_in, int fd_out); + + +/* Deallocate current instance */ +void lz78_destroy(lz78_instance* lz78); + +#endif /* __LZ78_H */ diff --git a/src/lz78/wrapper.c b/src/lz78/wrapper.c new file mode 100644 index 0000000..1d90e36 --- /dev/null +++ b/src/lz78/wrapper.c @@ -0,0 +1,291 @@ +/* +* Basic implementation of LZ78 compression algorithm +* +* Copyright (C) 2010 evilaliv3 +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include +#include +#include + +#include "wrapper.h" + +/* Structure representing the type of algorithm */ +struct __algorithm { + char* name; /* String representing the name */ + uint8_t type; /* Constant representing the type */ +}; + +/* Opaque type representing the type of algorithm */ +typedef struct __algorithm algorithm; + +/* Struct of available algorithms */ +const algorithm algo_list[] = { + {"lz78", LZ78_ALGORITHM}, + {NULL, UNKNOWN_ALGORITHM} +}; + +/* Struct representing the wrapper used for compression or decompression */ +struct __wrapper { + uint8_t type; /* Algorithm used to compress or decompress data */ + uint8_t mode; /* Flag indicating compress/decompress mode */ + void* data; /* Opaque structure representing the algorithm */ +}; + +/* Global variable representing the current error stored */ +uint8_t wrapper_cur_err = WRAPPER_SUCCESS; + +/* Associate an algorithm-dependent error to a wrapper-generic error */ +uint8_t wrapper_return(uint8_t code) { + wrapper_cur_err = code; + switch (code) { + case LZ78_SUCCESS: + return WRAPPER_SUCCESS; + case LZ78_ERROR_READ: + return WRAPPER_ERROR_READ; + case LZ78_ERROR_WRITE: + return WRAPPER_ERROR_WRITE; + case LZ78_ERROR_EAGAIN: + return WRAPPER_ERROR_EAGAIN; + case LZ78_ERROR_COMPRESS: + return WRAPPER_ERROR_COMPRESS; + case LZ78_ERROR_DECOMPRESS: + return WRAPPER_ERROR_DECOMPRESS; + case LZ78_ERROR_DICTIONARY: + case LZ78_ERROR_INITIALIZATION: + case LZ78_ERROR_MODE: + return WRAPPER_ERROR_GENERIC; + } + return code; +} + +uint8_t get_algorithm(char* type) { + uint8_t i = 0; + while (algo_list[i].name != NULL) { + if (strcmp(type, algo_list[i].name) == 0) + return algo_list[i].type; + ++i; + } + return UNKNOWN_ALGORITHM; +} + +int byte_size(char* size) { + int n; + + if (size == NULL) + return 0; + + n = atoi(size); + + switch (size[strlen(size) - 1]) { + case 'K': + n <<= 10; + break; + + case 'M': + n <<= 20; + break; + } + + return (n < 0) ? 0 : n; +} + +void wrapper_perror() { + switch (wrapper_cur_err) { + case WRAPPER_SUCCESS: + break; + + case WRAPPER_ERROR_ALGORITHM: + fprintf(stderr, "Unrecognized compression algorithm\n"); + break; + + case WRAPPER_ERROR_FILE_IN: + fprintf(stderr, "Unable to read input file\n"); + break; + + case WRAPPER_ERROR_FILE_OUT: + fprintf(stderr, "Unable to write output file\n"); + break; + + case LZ78_SUCCESS: + break; + + case LZ78_ERROR_DICTIONARY: + fprintf(stderr, "LZ78: unable to allocate dictionaries\n"); + break; + + case LZ78_ERROR_INITIALIZATION: + fprintf(stderr, "LZ78: bad initialization\n"); + break; + + case LZ78_ERROR_MODE: + fprintf(stderr, "LZ78: wrong compression/decompression mode\n"); + break; + + case LZ78_ERROR_READ: + fprintf(stderr, "LZ78: unable to read input data\n"); + break; + + case LZ78_ERROR_WRITE: + fprintf(stderr, "LZ78: unable to write output data\n"); + break; + + case LZ78_ERROR_EAGAIN: + fprintf(stderr, "LZ78: I/O operation would block: retry...\n"); + break; + + case LZ78_ERROR_COMPRESS: + fprintf(stderr, "LZ78: unable to compress input data\n"); + break; + + case LZ78_ERROR_DECOMPRESS: + fprintf(stderr, "LZ78: unable to decompress input data\n"); + break; + + default: + fprintf(stderr, "Unhandled error code %d\n", wrapper_cur_err); + } +} + +wrapper* wrapper_new(uint8_t w_mode, uint8_t w_type, char* argv) { + wrapper* w = malloc(sizeof(wrapper)); + if (w == NULL) + return NULL; + + w->type = w_type; + w->mode = w_mode; + + switch (w->type) { + case LZ78_ALGORITHM: + w->data = lz78_new(w_mode, byte_size(argv)); + break; + + default: + free(w); + return NULL; + } + + if (w->data) + return w; + else { + free(w); + return NULL; + } +} + +void wrapper_destroy(wrapper* w) { + if (w == NULL) + return; + + switch (w->type) { + case LZ78_ALGORITHM: + lz78_destroy(w->data); + break; + + default: + return; + } + free(w); +} + +uint8_t wrapper_compress(wrapper* w, char* input, char* output) { + uint8_t ret; + int fd_in; + int fd_out; + + switch (w->type) { + case LZ78_ALGORITHM: + if (input == NULL) { + fd_in = STDIN_FILENO; + } else { + fd_in = open(input, ACCESS_READ); + if (fd_in == -1) + return wrapper_return(WRAPPER_ERROR_FILE_IN); + } + + if (output == NULL) { + fd_out = STDOUT_FILENO; + } else { + fd_out = open(output, ACCESS_WRITE, 0644); + if (fd_out == -1) { + close(fd_in); + return wrapper_return(WRAPPER_ERROR_FILE_OUT); + } + } + + ret = lz78_compress(w->data, fd_in, fd_out); + + close(fd_in); + close(fd_out); + return wrapper_return(ret); + + default: + return wrapper_return(WRAPPER_ERROR_ALGORITHM); + } +} + +uint8_t wrapper_decompress(wrapper* w, char* input, char* output) { + uint8_t ret; + int fd_in; + int fd_out; + + switch (w->type) { + case LZ78_ALGORITHM: + if (input == NULL) { + fd_in = STDIN_FILENO; + } else { + fd_in = open(input, ACCESS_READ); + if (fd_in == -1) + return wrapper_return(WRAPPER_ERROR_FILE_IN); + } + + if (output == NULL) { + fd_out = STDOUT_FILENO; + } else { + fd_out = open(output, ACCESS_WRITE, 0644); + if (fd_out == -1) { + close(fd_in); + return wrapper_return(WRAPPER_ERROR_FILE_OUT); + } + } + + ret = lz78_decompress(w->data, fd_in, fd_out); + + close(fd_in); + close(fd_out); + return wrapper_return(ret); + + default: + return wrapper_return(WRAPPER_ERROR_ALGORITHM); + } +} + +uint8_t wrapper_exec(wrapper* w, char* input, char* output) { + uint8_t ret; + if (w->mode == WRAPPER_MODE_COMPRESS) { + for (;;) { + ret = wrapper_compress(w, input, output); + if (ret != WRAPPER_ERROR_EAGAIN) + return ret; + } + } else { + for (;;) { + ret = wrapper_decompress(w, input, output); + if (ret != WRAPPER_ERROR_EAGAIN) + return ret; + } + } +} diff --git a/src/lz78/wrapper.h b/src/lz78/wrapper.h new file mode 100644 index 0000000..006ab20 --- /dev/null +++ b/src/lz78/wrapper.h @@ -0,0 +1,86 @@ +/* +* Basic implementation of LZ78 compression algorithm +* +* Copyright (C) 2010 evilaliv3 +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#ifndef __WRAPPER_H +#define __WRAPPER_H + +#include "lz78.h" + +/* List of included compression algorithms */ +#define UNKNOWN_ALGORITHM 0 +#define LZ78_ALGORITHM 1 + +/* Modes of compression */ +#define WRAPPER_MODE_COMPRESS 0 +#define WRAPPER_MODE_DECOMPRESS 1 + +/* List of managed wrapper-level errors */ +#define WRAPPER_SUCCESS 20 +#define WRAPPER_ERROR_ALGORITHM 21 +#define WRAPPER_ERROR_FILE_IN 22 +#define WRAPPER_ERROR_FILE_OUT 23 +#define WRAPPER_ERROR_READ 24 +#define WRAPPER_ERROR_WRITE 25 +#define WRAPPER_ERROR_EAGAIN 26 +#define WRAPPER_ERROR_COMPRESS 27 +#define WRAPPER_ERROR_DECOMPRESS 28 +#define WRAPPER_ERROR_GENERIC 29 + +/* Opaque type representing the wrapper */ +typedef struct __wrapper wrapper; + +/* Creates a new wrapper: + w_mode mode of compression + w_type type of algorithm + w_argv additional parameter + */ +wrapper* wrapper_new(uint8_t w_mode, uint8_t w_type, char* w_argv); + +/* Deallocates a wrapper */ +void wrapper_destroy(wrapper* w); + +/* Execute the function associated with the wrapper (compress/decompress) + Return: + WRAPPER_SUCCESS on success + WRAPPER_ERROR_FILE_IN unable to open input file + WRAPPER_ERROR_FILE_OUT unable to open output file + WRAPPER_ERROR_READ unable to read input data + WRAPPER_ERROR_WRITE unable to write output data + WRAPPER_ERROR_EAGAIN unable to accomplish current operation + WRAPPER_ERROR_ALGORITHM type of wrapper unknown + WRAPPER_ERROR_COMPRESS unable to compress input data + WRAPPER_ERROR_DECOMPRESS unable to decompress input data + WRAPPER_ERROR_GENERIC algorithm-dependent error + */ +uint8_t wrapper_exec(wrapper* w, char* in, char* out); + +/* Return a positive constant associated to a particular algorithm + (UNKNOWN_ALGORITHM if doesn't exist) + */ +uint8_t get_algorithm(char* type); + +/* Return an integer representing the given size + (K = KBytes, M = MBytes) + */ +int byte_size(char* size); + +/* Print last wrapper error occurred into standard error stream */ +void wrapper_perror(); + +#endif /* __WRAPPER_H */ diff --git a/src/util.c b/src/util.c index 04e8baf..63393e6 100644 --- a/src/util.c +++ b/src/util.c @@ -17,6 +17,16 @@ void syn_error(char *message) exit(1); } +long fsize (FILE *in) +{ + long pos, length; + pos = ftell(in); + fseek(in, 0L, SEEK_END); + length = ftell(in); + fseek(in, pos, SEEK_SET); + return length; +} + void syn_warn(char *message) { printf("%s\n", message);