Remove LZ77 for LZ78, optimized

master
Pentium44 2021-06-15 19:48:43 -07:00
parent 353ed7e1e5
commit 0e12919bb3
12 changed files with 1526 additions and 394 deletions

View File

@ -11,13 +11,13 @@ PREFIX ?= /usr
CC ?= gcc
#CC ?= tcc
#CC ?= musl-tcc
CFLAGS += -O2 -pedantic -g -Wall -Wextra
CFLAGS += -O3 -g -Wall -Wextra
CPPFLAGS += -DVERSION=$(VERSION) -D_FORTIFY_SOURCE=2
#CPPFLAGS += -DVERSION=$(VERSION)
LDFLAGS += -lm
BIN ?= slidescript
SRCS=$(wildcard src/*.c)
SRCS=$(wildcard src/lz78/*.c) $(wildcard src/*.c)
OBJECTS=$(SRCS:%.c=%.o)

8
docs/examples/tar.ss Executable file
View File

@ -0,0 +1,8 @@
#!/usr/bin/slidescript
print "Compressing..."
compress "test" "docs"
sleep "1"
print "Decompressing..."
decompress "test.tar.ss"

View File

@ -1,340 +0,0 @@
// LZ77 compression examples, simple and lightweight
// Being quick to process and execute, this will be great
// For internal compression on modern machines
//
// Andy Herbert
// lz1 https://github.com/andyherbert/lz1
//
#include "inc/deps.h"
#include "inc/compression.h"
//--------------------------------------------------------------------
#define MY_ERR "Error: lz77"
#define my_free(dm) \
{ \
if (dm == (void *) 0) \
{ fprintf (stderr, "WTH. Mooo.\n"); exit (1); }; \
free (dm); \
dm = (void *) 0; \
}
//--------------------------------------------------------------------
uint32_t lz77_compress (uint8_t *uncompressed_text, uint32_t uncompressed_size, uint8_t *compressed_text, uint8_t pointer_length_width)
{
uint16_t pointer_pos, temp_pointer_pos, output_pointer, pointer_length, temp_pointer_length;
uint32_t compressed_pointer, output_size, coding_pos, output_lookahead_ref, look_behind, look_ahead;
uint16_t pointer_pos_max, pointer_length_max;
pointer_pos_max = pow(2, 16 - pointer_length_width);
pointer_length_max = pow(2, pointer_length_width);
*((uint32_t *) compressed_text) = uncompressed_size;
*(compressed_text + 4) = pointer_length_width;
compressed_pointer = output_size = 5;
for(coding_pos = 0; coding_pos < uncompressed_size; ++coding_pos)
{
pointer_pos = 0;
pointer_length = 0;
for(temp_pointer_pos = 1; (temp_pointer_pos < pointer_pos_max) && (temp_pointer_pos <= coding_pos); ++temp_pointer_pos)
{
look_behind = coding_pos - temp_pointer_pos;
look_ahead = coding_pos;
for(temp_pointer_length = 0; uncompressed_text[look_ahead++] == uncompressed_text[look_behind++]; ++temp_pointer_length)
{
if(temp_pointer_length == pointer_length_max)
break;
}
if(temp_pointer_length > pointer_length)
{
pointer_pos = temp_pointer_pos;
pointer_length = temp_pointer_length;
if(pointer_length == pointer_length_max)
break;
}
}
coding_pos += pointer_length;
if((coding_pos == uncompressed_size) && pointer_length)
{
output_pointer = (pointer_length == 1) ? 0 : ((pointer_pos << pointer_length_width) | (pointer_length - 2));
output_lookahead_ref = coding_pos - 1;
}
else
{
output_pointer = (pointer_pos << pointer_length_width) | (pointer_length ? (pointer_length - 1) : 0);
output_lookahead_ref = coding_pos;
}
*((uint16_t *) (compressed_text + compressed_pointer)) = output_pointer;
compressed_pointer += 2;
*(compressed_text + compressed_pointer++) = *(uncompressed_text + output_lookahead_ref);
output_size += 3;
}
return output_size;
}
uint32_t lz77_decompress (uint8_t *compressed_text, uint8_t *uncompressed_text)
{
uint8_t pointer_length_width;
uint16_t input_pointer, pointer_length, pointer_pos, pointer_length_mask;
uint32_t compressed_pointer, coding_pos, pointer_offset, uncompressed_size;
uncompressed_size = *((uint32_t *) compressed_text);
pointer_length_width = *(compressed_text + 4);
compressed_pointer = 5;
pointer_length_mask = pow(2, pointer_length_width) - 1;
for(coding_pos = 0; coding_pos < uncompressed_size; ++coding_pos)
{
input_pointer = *((uint16_t *) (compressed_text + compressed_pointer));
compressed_pointer += 2;
pointer_pos = input_pointer >> pointer_length_width;
pointer_length = pointer_pos ? ((input_pointer & pointer_length_mask) + 1) : 0;
if(pointer_pos)
for(pointer_offset = coding_pos - pointer_pos; pointer_length > 0; --pointer_length)
uncompressed_text[coding_pos++] = uncompressed_text[pointer_offset++];
*(uncompressed_text + coding_pos) = *(compressed_text + compressed_pointer++);
}
return coding_pos;
}
long fsize (FILE *in)
{
long pos, length;
pos = ftell(in);
fseek(in, 0L, SEEK_END);
length = ftell(in);
fseek(in, pos, SEEK_SET);
return length;
}
uint32_t ss_compress (const char *filename_in, char *filename_out, uint8_t pointer_length_width)
{
FILE *in, *out;
uint8_t *uncompressed_text, *compressed_text;
uint32_t uncompressed_size, compressed_size;
in = fopen(filename_in, "rb");
if(in == NULL)
return 0;
uncompressed_size = fsize(in);
uncompressed_text = malloc(uncompressed_size + 20);
// +20 for uncompressed data size sway in algorithm
if((uncompressed_size != fread(uncompressed_text, 1, uncompressed_size, in)))
{
my_free(uncompressed_text);
return 0;
}
fclose(in);
compressed_text = malloc((int)(uncompressed_size * 1.25));
// * 2 for uncompressed size climb on first compress pass
compressed_size = lz77_compress(uncompressed_text, uncompressed_size, compressed_text, pointer_length_width);
out = fopen(filename_out, "wb");
if(out == NULL)
{
my_free(uncompressed_text);
my_free(compressed_text);
return 0;
}
if((compressed_size != fwrite(compressed_text, 1, compressed_size, out)))
{
my_free(uncompressed_text);
my_free(compressed_text);
fclose(out);
return 0;
}
fclose(out);
free(compressed_text);
free(uncompressed_text);
return compressed_size;
}
//--------------------------------------------------------------------
#ifndef ZERO
#define ZERO 0
#endif
#ifndef ONE
#define ONE 1
#endif
//--------------------------------------------------------------------
uint32_t ss_decompress
(
char *ifname, // Input -file name
char *ofname // Output -file name
)
{
FILE *ifp; // Input -file pointer
FILE *ofp; // Output -file pointer
uint8_t *dm_comp; // DM: Compressed data
uint8_t *dm_deco; // DM: Uncompressed data
int orig_size; // Original size
int comp_size; // Compressed size
int deco_size; // Decompressed size
int writ_size; // Written size
//--------------------------------------------------------------------
// Open input file.
if ((ifp = fopen (ifname, "rb")) == NULL)
{ // Error
fprintf (stderr,
"%s: File not found: %s\n",
MY_ERR, ifname);
return ZERO;
}
//--------------------------------------------------------------------
// Set up compressed-data buffer.
// Compressed size
comp_size = (int) fsize (ifp);
// DM: Compressed data
dm_comp = (uint8_t *) malloc (comp_size + 10);
// Add some extra memory at the end of malloc comp_size
// Saves dirty archive size sway, WIP
if (dm_comp == NULL) // Error?
{ // Yes - Error exit
fprintf (stderr,
"%s: Error: malloc failed\n",
MY_ERR);
return ZERO;
}
//--------------------------------------------------------------------
// Read compressed data.
if (fread (dm_comp, ONE, comp_size, ifp) != (size_t) comp_size)
{ // Error
my_free (dm_comp); // Release DM
fprintf (stderr,
"%s: Read of input data failed\n",
MY_ERR);
return ZERO;
}
fclose (ifp); // Close input file
//--------------------------------------------------------------------
// Set up decompressed-data buffer.
orig_size = (int) *((uint32_t *) dm_comp);
dm_deco = (uint8_t *) malloc (orig_size + 20);
// +20 to cover byte sway, dirty trick for mem leak
if (dm_comp == NULL) // Error?
{ // Yes
my_free (dm_comp); // Release DM
fprintf (stderr, "%s: malloc failed\n", MY_ERR);
return ZERO;
}
//--------------------------------------------------------------------
// Decompress.
deco_size = (int) lz77_decompress (dm_comp, dm_deco);
my_free (dm_comp); // Release DM
if (deco_size < orig_size) // Error?
{ // Yes
my_free (dm_deco); // Release DM
fprintf (stderr,
"%s: deco size %d < orig size %d\n",
MY_ERR, deco_size, orig_size);
return ZERO;
}
//--------------------------------------------------------------------
// Open output file.
if ((ofp = fopen (ofname, "wb")) == NULL)
{ // Error
my_free (dm_deco); // Release DM
fprintf (stderr,
"%s: Can't open output file: %s\n",
MY_ERR, ofname);
return ZERO;
}
//--------------------------------------------------------------------
// Write to output file.
writ_size = (int) fwrite (dm_deco, ONE, deco_size, ofp);
fclose (ofp); // Close output file
my_free (dm_deco); // Release DM
if (writ_size != deco_size) // Error?
{ // Yes
fprintf (stderr,
"%s: Bytes written %d != Data size %d\n",
MY_ERR, writ_size, deco_size);
return ZERO;
}
//--------------------------------------------------------------------
// Wrap it up.
return deco_size; // == orig_size
}
//--------------------------------------------------------------------
/*
int main (int argc, char const *argv[])
{
FILE *in;
char filename[129];
char filedecout[141];
if(argc < 2)
{
printf("Please enter a filename: ./comp file.txt");
}
in = fopen(argv[1], "r");
if(in == NULL)
return 0;
if(strlen(argv[1]) > 128)
{
printf("Filename too long");
return 1;
}
sprintf(filename, "%s.ss", argv[1]);
sprintf(filedecout, "%s.1", argv[1]);
printf("Original size: %ld\n", fsize(in));
fclose(in);
for(uint8_t i = 1; i <= 6; ++i)
printf("Compressed (%i): %u, decompressed: (%u)\n", i, ss_compress(argv[1], filename, 20000000, i), ss_decompress(filename, filedecout));
return 0;
}
*/

View File

@ -31,6 +31,7 @@
void syn_error(char *message);
void syn_warn(char *message);
long fsize (FILE *in);
char *strip_nl(char *string);
int file_exists(char *path);
int is_dir(char *path);

View File

@ -18,7 +18,7 @@
// For slidescript compression algorithm
#include "inc/tar.h"
#include "inc/compression.h"
#include "lz78/wrapper.h"
#define strtok_next(s) strtok_r (NULL, s, &strtok_save)
@ -487,9 +487,14 @@ char *process_line (char *line)
{
char *filename;
struct tar_t *archive = NULL;
int fd;
int fd, lzret;
wrapper *lzwrapper;
int bsize = B_SIZE_DEFAULT;
uint8_t w_mode = WRAPPER_MODE_DECOMPRESS;
uint8_t w_type = LZ78_ALGORITHM;
retbuf = qmalloc(QM_SS, 8129);
tar_free_pool();
tok_srch = strtok_next ("\"");
if (tok_srch == NULL)
{
@ -505,27 +510,33 @@ char *process_line (char *line)
filename = parse_vars(tok_srch);
FILE *in;
char origsize[128];
char filedecout[MAX_FILENAME_LEN+5];
in = fopen(filename, "rb");
if (in == NULL)
{
x_warn("ss:warn:compress, failed to open tar for compression");
return NULL;
}
char filedecout[128];
sprintf(filedecout, "uncompressed.tar");
sprintf(origsize, "%ld", fsize(in));
if (bsize <= 0) {
x_warn("ss:warn:compress, default buffer not set?");
return NULL;
}
uint32_t deco_return = ss_decompress(filename, filedecout);
retbuf = qmalloc(QM_SS, (sizeof(deco_return) + strlen(filename) + strlen(origsize) + 40));
sprintf(retbuf, "ss: %s: decompressed: %s -> %u", filename, origsize, deco_return);
/* Creates a wrapper instance */
lzwrapper = wrapper_new(w_mode, w_type, NULL);
if (lzwrapper == NULL) {
x_warn("ss:warn:decompress, failed to open lz78 wrapper socket!");
return NULL;
}
fclose(in);
/* Executes the wrapper function */
lzret = wrapper_exec(lzwrapper, filename, filedecout);
if (lzret != WRAPPER_SUCCESS)
{
x_warn("ss:warn:decompress, failed to decompress tarball: %d", lzret);
return NULL;
}
/* Destroyes the wrapper instance */
wrapper_destroy(lzwrapper);
// open existing file
if ((fd = open(filedecout, O_RDWR)) < 0) {
@ -562,9 +573,14 @@ char *process_line (char *line)
else if (strncmp("compress",tok_srch,8) == 0)
{
char filename[MAX_FILENAME_LEN+1]; // Files to be added into the archive
char comp_size[128];
struct tar_t *archive = NULL;
int fd;
int fd, lzret;
wrapper *lzwrapper;
int bsize = B_SIZE_DEFAULT;
uint8_t w_mode = WRAPPER_MODE_COMPRESS;
uint8_t w_type = LZ78_ALGORITHM;
retbuf = qmalloc(QM_SS, 8129);
tar_free_pool();
tok_srch = strtok_next ("\"");
@ -645,45 +661,39 @@ char *process_line (char *line)
close(fd); // don't bother checking for fd < 0
FILE *in;
char origsize[128];
char file_comp[MAX_FILENAME_LEN+9];
char filedecout[MAX_FILENAME_LEN+10];
in = fopen(filename, "rb");
if (in == NULL)
{
x_warn("ss:warn:compress, failed to open tar for compression");
sprintf(file_comp, "%s.tar.ss", filename);
if (bsize <= 0) {
x_warn("ss:warn:compress, default buffer not set?");
return NULL;
}
sprintf(file_comp, "%s.tar.ss", filename);
sprintf(filedecout, "%s.tar.1", filename);
sprintf(origsize, "%ld", fsize(in));
sprintf(comp_size, "%s", origsize);
fclose(in);
for(uint32_t i = 1; i < 7; ++i)
{
uint32_t comp_return = ss_compress(filename, file_comp, i);
uint32_t deco_return = ss_decompress(file_comp, filedecout);
if (atoi(comp_size) < (int)comp_return && comp_return != 0 && deco_return != 0 && i != 1)
{
retbuf = qmalloc(QM_SS, sizeof(deco_return) + sizeof(comp_return) + strlen(file_comp) + 40);
sprintf(retbuf, "ss: %s: compressed: %u -> %u", file_comp, deco_return, comp_return);
break;
}
sprintf(comp_size, "%u", comp_return);
printf("pass %u decompressed/compressed: %u/%u\n", i, deco_return, comp_return);
fflush(stdout);
/* Creates a wrapper instance */
lzwrapper = wrapper_new(w_mode, w_type, NULL);
if (lzwrapper == NULL) {
x_warn("ss:warn:compress, failed to open lz78 wrapper socket!");
return NULL;
}
/* Executes the wrapper function */
lzret = wrapper_exec(lzwrapper, filename, file_comp);
if (lzret != WRAPPER_SUCCESS)
{
x_warn("ss:warn:compress, error on compress: %d", lzret);
return NULL;
}
else
{
sprintf(retbuf, "Compressed -> %s.tar.ss", filename);
}
/* Destroyes the wrapper instance */
wrapper_destroy(lzwrapper);
// Remove the decompressed version for sanity check
remove(filedecout);
remove(filename);
return retbuf;

305
src/lz78/bitio.c Normal file
View File

@ -0,0 +1,305 @@
/*
* Basic implementation of LZ78 compression algorithm
*
* Copyright (C) 2010 evilaliv3 <giovanni.pellerano@evilaliv3.org>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include "bitio.h"
/* Struct of bitfile */
struct __bit_file {
int fd; /* File descriptor */
int mode; /* Mode: read 0, write 1 */
UINTMAX_T buff_size; /* Buffer size (bits) */
UINTMAX_T w_start; /* Window start (bits) */
UINTMAX_T w_len; /* Window length (bits) */
char buff[1]; /* Buffer (contiguous memory area) */
};
/* Return max value that can be represented using UINTMAX_T */
UINTMAX_T max_index() {
UINTMAX_T max = -1;
max /= 8;
return max;
}
bit_file* bit_open(int fd, int mode, UINTMAX_T buff_size) {
bit_file* bfp;
int ret;
if (mode != ACCESS_READ && mode != ACCESS_WRITE)
return NULL;
if (mode == ACCESS_READ)
ret = read(fd, NULL, 0);
else /* mode == ACCESS_WRITE */
ret = write(fd, NULL, 0);
if(ret != 0)
return NULL;
if (buff_size % 8 != 0)
return NULL;
buff_size = (buff_size > max_index()) ? max_index() : buff_size;
/* Buffer allocation */
bfp = (bit_file*) calloc(1, sizeof(bit_file) + buff_size / 8);
if (bfp == NULL) {
close(fd);
} else {
bfp->fd = fd;
bfp->mode = mode;
bfp->buff_size = buff_size;
/* bfp->w_start and bfp->w_len are initialized by calloc */
}
return bfp;
}
int bit_read(bit_file* bfp, char* buff_out, UINTMAX_T n_bits, uint8_t ofs) {
uint8_t* base;
uint8_t mask;
uint8_t r_mask;
uint8_t writebit;
const uint8_t* readptr;
UINTMAX_T buff_ready_bytes;
UINTMAX_T bits_read = 0;
UINTMAX_T bits_read_total = 0;
UINTMAX_T buff_size;
UINTMAX_T w_start;
UINTMAX_T w_len;
UINTMAX_T c;
uint8_t aligned;
if (bfp == NULL || buff_out == NULL || ofs > 7)
return -1;
if (bfp->mode != ACCESS_READ)
return -1;
buff_size = bfp->buff_size;
w_start = bfp->w_start;
w_len = bfp->w_len;
mask = 1 << ofs;
base = (uint8_t*) buff_out;
/* Check if input ad output are aligned to byte */
aligned = (mask == 1 && (w_start % 8 == 0)) ? 1 : 0;
while (n_bits > 0) {
/* Buffer refill if needed */
if (w_len == 0) {
c = read(bfp->fd, bfp->buff, buff_size / 8);
if (c == (uint32_t)-1) {
if (errno == EAGAIN) {
errno = 0;
break;
} else {
return -1;
}
} else if (c == 0) {
break;
}
w_start = 0;
w_len = c * 8;
}
readptr = (uint8_t*)&(bfp->buff) + w_start / 8;
if (aligned && w_len > 7 && n_bits >= w_len) {
/* Optimization: due to alignment we can use memcpy */
buff_ready_bytes = w_len / 8;
memcpy(base, readptr, buff_ready_bytes);
base += buff_ready_bytes;
bits_read = buff_ready_bytes * 8;
w_start = (w_start + bits_read) % buff_size;
w_len -= bits_read;
n_bits -= bits_read;
bits_read_total += bits_read;
} else {
/* Single bit read */
r_mask = 1 << w_start % 8;
writebit = (*readptr & r_mask) ? 1 : 0;
if (writebit == 0) {
*base &= ~mask;
} else {
*base |= mask;
}
w_start = ((w_start + 1) % buff_size);
--w_len;
--n_bits;
++bits_read_total;
if (mask == 0x80) {
mask = 1;
++base;
aligned = (mask == 1 && (w_start % 8 == 0)) ? 1 : 0;
} else {
mask <<= 1;
}
}
}
/* Update */
bfp->buff_size = buff_size;
bfp->w_start = w_start;
bfp->w_len = w_len;
return bits_read_total;
}
int bit_write(bit_file* bfp, const char* buff_in, UINTMAX_T n_bits, uint8_t ofs) {
UINTMAX_T ret = 0;
const uint8_t* base;
uint8_t mask;
uint8_t readbit;
uint8_t* writeptr;
UINTMAX_T pos;
UINTMAX_T buff_free_bits;
UINTMAX_T buff_free_bytes;
UINTMAX_T bits_written = 0;
uint8_t aligned;
if (bfp == NULL || buff_in == NULL || ofs > 7)
return -1;
if (bfp->mode != ACCESS_WRITE)
return -1;
mask = 1 << ofs;
base = (uint8_t*)buff_in;
pos = bfp->w_start + bfp->w_len;
buff_free_bits = bfp->buff_size - bfp->w_len;
/* Check if input ad output are aligned to byte */
aligned = (mask == 1 && (pos % 8 == 0)) ? 1 : 0;
while (n_bits > 0) {
writeptr = (uint8_t*)&(bfp->buff) + pos / 8;
if (aligned && buff_free_bits > 7 && n_bits >= buff_free_bits) {
/* Optimization: due to alignment we can use memcpy */
buff_free_bytes = buff_free_bits / 8;
memcpy(writeptr, base, buff_free_bytes);
base += buff_free_bytes;
bits_written = buff_free_bytes * 8;
pos += bits_written;
bfp->w_len += bits_written;
n_bits -= bits_written;
ret += bits_written;
buff_free_bits -= bits_written;
} else {
/* Single bit write */
readbit = (*base & mask) ? 1 : 0;
if (readbit == 0) {
*writeptr &= ~(1 << pos % 8);
} else {
*writeptr |= (1 << pos % 8);
}
if (mask == 0x80) {
mask = 1;
++base;
aligned = (mask == 1 && (pos % 8 == 0)) ? 1 : 0;
} else {
mask <<= 1;
}
++pos;
++(bfp->w_len);
--(n_bits);
--buff_free_bits;
++ret;
}
/* Flush if needed */
if (bfp->w_len == bfp->buff_size) {
if (bit_flush(bfp) == -1)
return -1;
if (bfp->w_len != 0)
return ret;
pos = bfp->w_start + bfp->w_len;
buff_free_bits = bfp->buff_size;
}
}
return ret;
}
int bit_flush(bit_file* bfp) {
UINTMAX_T count;
UINTMAX_T written;
UINTMAX_T n;
uint8_t* base;
if (bfp == NULL)
return -1;
count = bfp->w_len / 8;
written = 0;
base = (uint8_t*) bfp->buff + bfp->w_start / 8;
while (count > 0) {
n = write(bfp->fd, base, count);
if (n == (uint32_t)-1) {
if (errno == EAGAIN) {
errno = 0;
break;
} else {
return -1;
}
}
base += n;
written += n;
count -= n;
}
bfp->w_start = (bfp->w_start + written * 8) % bfp->buff_size;
bfp->w_len -= written * 8;
return 0;
}
int bit_close(bit_file* bfp) {
int fd;
if (bfp == NULL)
return -1;
fd = bfp->fd;
if (bfp->w_len % 8)
bfp->w_len += 8 - (bfp->w_len % 8);
bit_flush(bfp);
free(bfp);
close(fd);
return 0;
}

53
src/lz78/bitio.h Normal file
View File

@ -0,0 +1,53 @@
/*
* Basic implementation of LZ78 compression algorithm
*
* Copyright (C) 2010 evilaliv3 <giovanni.pellerano@evilaliv3.org>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __BITIO_H
#define __BITIO_H
#include <stdint.h>
#include <fcntl.h>
#include <stdio.h>
#define B_SIZE_DEFAULT 1048576
#define UINTMAX_T uint32_t
/* Access mode for reading and writing */
#define ACCESS_READ (O_RDONLY | O_NONBLOCK)
#define ACCESS_WRITE (O_WRONLY | O_CREAT | O_TRUNC | O_NONBLOCK)
/* The opaque type used for bitwise streams */
typedef struct __bit_file bit_file;
/* Creates a new bit_file opening a file f with the specificated mode and size */
bit_file* bit_open(int fd, int mode, UINTMAX_T bufsize);
/* Does a memory read (occasionally an i/o read) */
int bit_read(bit_file* bf, char* base, UINTMAX_T n_bits, uint8_t ofs);
/* Does a memory write (occasionally an i/o flush) */
int bit_write(bit_file* bf, const char* base, UINTMAX_T n_bits, uint8_t ofs);
/* Effectively swap out the buffer into memory */
int bit_flush(bit_file* bf);
/* Relases the resources allocated by the bit_file */
int bit_close(bit_file* bf);
#endif /* __BITIO_H */

638
src/lz78/lz78.c Normal file
View File

@ -0,0 +1,638 @@
/*
* Basic implementation of LZ78 compression algorithm
*
* Copyright (C) 2010 evilaliv3 <giovanni.pellerano@evilaliv3.org>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include "lz78.h"
/* Code used to represent an EOF */
#define DICT_CODE_EOF 256
/* Code used before to send the size of the dictionary */
#define DICT_CODE_SIZE 257
/* Code used by the compressor to start the operations */
#define DICT_CODE_START 258
/* Code used by the compressor to stop the operations */
#define DICT_CODE_STOP 259
/* Limits dict_size inside [DICT_SIZE_MIN, DICT_SIZE_MAX] */
#ifndef DICT_LIMIT
#define DICT_LIMIT(x) (((x) < (DICT_SIZE_MIN + 1)) ? (DICT_SIZE_MIN + 1) : (((x) > (DICT_SIZE_MAX)) ? (DICT_SIZE_MAX) : (x)))
#endif
/* Compute the threshold for the start of secondary dictionary */
#define DICT_SIZE_THRESHOLD(x) ((x) * 8 / 10)
/* Entry of the hash table used by the compressor to encode data */
struct __ht_entry {
uint8_t used; /* Flag indicating if the node is used or not */
uint32_t parent; /* Parent node */
uint16_t label; /* Node's label */
uint32_t child; /* Child node */
};
/* The opaque type of hash table entry used by the compressor */
typedef struct __ht_entry ht_entry;
/* Dictionary of the compressor implemented as an hash table */
struct __ht_dictionary {
ht_entry* root; /* Root node of the dictionary */
uint32_t cur_node; /* Current position inside the dictionary */
uint32_t prev_node; /* Pointer to the father of cur_node */
uint32_t d_size; /* Size of the dictionary */
uint32_t d_thr; /* Threshold for activation of secondary dictionary */
uint32_t d_next; /* Next code to put in the dictionary */
};
/* The opaque type representing the dictionary used by the compressor */
typedef struct __ht_dictionary ht_dictionary;
/* State of a compressor */
struct __lz78_c {
uint8_t completed; /* Termination flag */
uint32_t d_size; /* Size of the dictionaries */
ht_dictionary* main; /* Main dictionary */
ht_dictionary* secondary; /* Secondary dictionary */
uint32_t bitbuf; /* Buffer containing bits not yet written */
uint32_t n_bits; /* Number of valid bits in the buffer */
};
/* The opaque type representing the state of the compressor */
typedef struct __lz78_c lz78_c;
/* Entry of the dictionary used by the decompressor */
struct __entry {
uint32_t parent; /* Parent node */
uint16_t label; /* Node's label */
};
/* The opaque type of a dictionary entry used by the decompressor */
typedef struct __entry entry;
/* Dictionary of the decompressor */
struct __dictionary {
entry* root; /* Root node of the dictionary */
uint32_t d_size; /* Size of the dictionray */
uint32_t d_thr; /* Threshold for activation of secondary dictionary */
uint32_t d_min; /* Minimum size of the dictionary */
uint32_t d_next; /* Next code to put in the dictionary */
uint32_t n_bytes; /* Number of bytes contained in bytebuf */
uint32_t offset; /* Offset of the first valid byte inside bytebuf */
char bytebuf[1]; /* Buffer used to output strings */
};
/* The opaque type representing the dictionary used by the decompressor */
typedef struct __dictionary dictionary;
/* State of the decompressor */
struct __lz78_d {
uint8_t completed; /* Termination flag */
dictionary* main; /* Main dictionary */
ht_dictionary* secondary; /* Secondary dictionary */
uint32_t bitbuf; /* Buffer containing bits not yet written */
uint32_t n_bits; /* Number of valid bits contained in the buffer */
};
/* The opaque type representing the status of the decompressor */
typedef struct __lz78_d lz78_d;
/* lz78 instance descriptor */
struct __lz78_instance {
uint8_t mode; /* Discriminate compression operations */
char state[1]; /* Compression/Decompression state struct */
};
/* Return the number of bits needed to represent the given number */
uint8_t bitlen(uint32_t i);
/* Create a new ht_dictionary to be used for the compression */
ht_dictionary* ht_dictionary_new(uint32_t d_size);
/* Update the dictionary depending with input byte
Return:
0 a new entry have been put in the dictionary
-1 switch the current node
*/
int ht_dictionary_update(ht_dictionary* d, uint16_t label);
/* Reset the dictionary associated to the given compressor */
void ht_dictionary_reset(ht_dictionary* d);
/* Destroy the given ht_dictionary object */
void ht_dictionary_destroy(ht_dictionary* d);
/* Create a new dictionary to be used for the decompression */
dictionary* dictionary_new(uint32_t d_size);
/* Update the internal state of the dictionary */
void dictionary_update(dictionary* d, uint32_t code);
/* Reset the dictionary associated to the given decompressor */
void dictionary_reset(dictionary* d);
/* Destroy the given dictionary object */
void dictionary_destroy(dictionary* d);
/* Compress the input byte and modifiy the state of the dictionary */
void compress_byte(lz78_c* o, int c_in);
/* Decompress the input code and modify the state of the dictionary */
int decompress_code(lz78_d* o, uint32_t code);
uint8_t bitlen(uint32_t i) {
uint8_t n = 0;
while (i) {
++n;
i >>= 1;
}
return n;
}
ht_dictionary* ht_dictionary_new(uint32_t d_size) {
ht_dictionary* dict = malloc(sizeof(ht_dictionary));
if (dict == NULL)
return NULL;
d_size = DICT_LIMIT(d_size);
dict->root = calloc(1, sizeof(ht_entry) * d_size);
if (dict->root == NULL) {
free(dict);
return NULL;
} else {
dict->d_size = d_size;
dict->d_thr = DICT_SIZE_THRESHOLD(d_size);
dict->d_next = DICT_SIZE_MIN;
dict->cur_node = -1;
return dict;
}
}
int ht_dictionary_update(ht_dictionary* d, uint16_t label) {
uint8_t i;
uint32_t key;
uint32_t hash;
d->prev_node = d->cur_node;
if (d->cur_node == (uint32_t)-1) {
d->cur_node = label;
return -1;
}
/* Bernstein hash function */
key = (label << bitlen(d->d_size)) + d->cur_node;
hash = 0;
for (i = 0; i < 4; ++i) {
hash = ((hash << 5) + hash) + (key & 0xFF);
key >>= 8;
}
hash %= d->d_size;
/* Search if current sequence is present, else return an empty hash entry
where insert it */
while (d->root[hash].used) {
if (d->root[hash].parent == d->cur_node &&
d->root[hash].label == label) {
d->cur_node = d->root[hash].child;
return -1;
} else {
/* Collision (linear search) */
hash = (hash + 1) % d->d_size;
}
}
/* At this point, in d->prev_node there is the symbol we will send */
/* Fill out hash entry */
d->root[hash].used = 1;
d->root[hash].parent = d->prev_node;
d->root[hash].label = label;
d->root[hash].child = d->d_next;
/* Update current node */
d->cur_node = label;
/* Update next symbol */
++(d->d_next);
return 0;
}
void ht_dictionary_reset(ht_dictionary* d) {
memset(d->root, 0, sizeof(ht_entry) * d->d_size);
d->d_next = DICT_SIZE_MIN;
d->cur_node = -1;
}
void ht_dictionary_destroy(ht_dictionary* d) {
if (d != NULL)
free(d);
}
dictionary* dictionary_new(uint32_t d_size) {
uint16_t i;
dictionary* dict = malloc(sizeof(dictionary) + d_size);
if (dict == NULL)
return NULL;
d_size = DICT_LIMIT(d_size);
dict->root = malloc(sizeof(entry) * d_size);
if (dict->root == NULL) {
free(dict);
return NULL;
}
dict->d_size = d_size;
dict->d_thr = DICT_SIZE_THRESHOLD(d_size);
dict->d_min = DICT_SIZE_MIN;
dict->d_next = DICT_SIZE_MIN;
for (i = 0; i < DICT_SIZE_MIN; ++i) {
dict->root[i].parent = 0;
dict->root[i].label = i;
}
return dict;
}
void dictionary_update(dictionary* d, uint32_t code) {
uint32_t d_size = d->d_size - 1;
uint32_t d_next = d->d_next;
uint32_t d_min = d->d_min;
uint32_t i = d_size;
uint32_t p = code;
/* Recover original sequence */
while (1) {
d->bytebuf[i--] = d->root[p].label;
if (p < DICT_SIZE_MIN || i == 0)
break;
p = d->root[p].parent;
}
/* Fill last char with the first char of the sequence */
if (code >= d_min && code == d_next - 1)
d->bytebuf[d_size] = d->bytebuf[i + 1];
/* Update last incomplete entry of the dictionary */
if (d_next > d_min)
d->root[d_next - 1].label = d->bytebuf[i + 1];
/* Update */
d->n_bytes = d_size - i;
d->offset = d_size + 1 - d->n_bytes;
d->root[d_next].parent = code;
++(d->d_next);
}
void dictionary_reset(dictionary* d) {
d->d_min = DICT_SIZE_MIN;
d->d_next = DICT_SIZE_MIN;
}
void dictionary_destroy(dictionary* d) {
if (d != NULL) {
free(d->root);
free(d);
}
}
void compress_byte(lz78_c* o, int c_in) {
/* Optimization pointers */
ht_dictionary* d_main = o->main;
ht_dictionary* d_sec = o->secondary;
switch(d_main->cur_node) {
case DICT_CODE_START:
o->bitbuf = d_main->d_size;
o->n_bits = bitlen(DICT_SIZE_MAX);
d_main->cur_node = -1;
break;
case DICT_CODE_EOF:
o->bitbuf = d_main->cur_node;
o->n_bits = bitlen(d_main->d_next);
d_main->cur_node = DICT_CODE_STOP;
return;
case DICT_CODE_STOP:
o->completed = 1;
return;
default:
break;
}
c_in = c_in == EOF ? DICT_CODE_EOF : c_in;
/* Dictonaries update */
if (ht_dictionary_update(d_main, c_in) != 0) {
if (d_main->d_next >= d_main->d_thr)
ht_dictionary_update(d_sec, c_in);
return;
}
o->bitbuf = d_main->prev_node;
o->n_bits = bitlen(d_main->d_next - 1);
/* Dictonaries swap */
if (d_main->d_next == d_main->d_size) {
o->main = o->secondary;
o->secondary = d_main;
d_main = d_sec;
d_sec = o->secondary;
d_main->cur_node = c_in;
ht_dictionary_reset(d_sec);
}
/* Update of secondary if threshold is reached */
if (d_main->d_next >= d_main->d_thr)
ht_dictionary_update(d_sec, c_in);
}
int decompress_code(lz78_d* o, uint32_t code) {
uint32_t i;
int c_in;
/* Optimization pointers */
dictionary* d_main = o->main;
ht_dictionary* d_sec = o->secondary;
switch(code) {
case DICT_CODE_EOF:
o->completed = 1;
return 0;
case DICT_CODE_START:
case DICT_CODE_SIZE:
d_main->d_next = DICT_SIZE_MAX;
o->n_bits = 0;
return 0;
default:
/* Initial operations */
if (d_main->d_next == DICT_SIZE_MAX) {
dictionary_destroy(d_main);
d_main = dictionary_new(code);
o->main = d_main;
if (d_main == NULL)
return -1;
ht_dictionary_destroy(d_sec);
d_sec = ht_dictionary_new(code);
o->secondary = d_sec;
if (d_sec == NULL) {
dictionary_destroy(d_main);
o->main = NULL;
return -1;
}
o->bitbuf = 0;
o->n_bits = 0;
return 0;
}
break;
}
/* Bad compressed file */
if (d_sec == NULL || d_main == NULL)
return -2;
dictionary_update(d_main, code);
/* Update of secondary if threshold is reached */
if (d_main->d_next > d_main->d_thr) {
for (i = 0; i < d_main->n_bytes; ++i) {
c_in = (uint8_t) d_main->bytebuf[d_main->offset + i];
ht_dictionary_update(d_sec, c_in);
}
}
/* Dictonaries swap */
if (d_main->d_next == d_main->d_size) {
dictionary_reset(d_main);
d_main->d_min = d_sec->d_next;
d_main->d_next = d_sec->d_next;
for (i = 0; i < d_sec->d_size && d_sec->d_next; ++i) {
if (d_sec->root[i].used) {
d_main->root[d_sec->root[i].child].parent =
d_sec->root[i].parent;
d_main->root[d_sec->root[i].child].label =
d_sec->root[i].label;
--(d_sec->d_next);
}
}
ht_dictionary_reset(d_sec);
}
return 0;
}
lz78_instance* lz78_new(uint8_t cmode, uint32_t dsize) {
lz78_instance* i;
lz78_c* c;
lz78_d* d;
int max_dim = (sizeof(lz78_c) > sizeof(lz78_d)) ? sizeof(lz78_c) : sizeof(lz78_d);
i = malloc(sizeof(lz78_instance) + max_dim);
if (i == NULL)
return NULL;
i->mode = cmode;
switch (cmode) {
case LZ78_MODE_COMPRESS:
c = (lz78_c*)&i->state;
dsize = (dsize == 0) ? DICT_SIZE_DEFAULT : dsize;
c->d_size = DICT_LIMIT(dsize);
c->completed = 0;
c->main = ht_dictionary_new(c->d_size);
if (c->main == NULL) {
free(i);
return NULL;
}
c->secondary = ht_dictionary_new(c->d_size);
if (c->secondary == NULL) {
ht_dictionary_destroy(c->main);
free(i);
return NULL;
}
c->bitbuf = DICT_CODE_START;
c->n_bits = bitlen(DICT_SIZE_MIN);
c->main->cur_node = DICT_CODE_START;
return i;
case LZ78_MODE_DECOMPRESS:
d = (lz78_d*)&i->state;
d->completed = 0;
d->main = dictionary_new(DICT_SIZE_MIN);
if (d->main == NULL) {
free(i);
return NULL;
}
return i;
default:
return NULL;
}
}
uint8_t lz78_compress(lz78_instance* lz78, int fd_in, int fd_out) {
FILE* in;
bit_file* out;
lz78_c* o;
int bits;
int c_in;
if (lz78 == NULL)
return LZ78_ERROR_INITIALIZATION;
if (lz78->mode != LZ78_MODE_COMPRESS)
return LZ78_ERROR_MODE;
in = fdopen(fd_in, "r");
if (in == NULL)
return LZ78_ERROR_READ;
out = bit_open(fd_out, ACCESS_WRITE, B_SIZE_DEFAULT);
if (out == NULL)
return LZ78_ERROR_WRITE;
o = (lz78_c*)&lz78->state;
for (;;) {
if (o->n_bits > 0) {
bits = bit_write(out, (char*) &o->bitbuf, o->n_bits, 0);
if (bits == -1)
return LZ78_ERROR_WRITE;
o->bitbuf >>= bits;
o->n_bits -= bits;
if (o->n_bits > 0)
return LZ78_ERROR_EAGAIN;
}
c_in = fgetc(in);
if (c_in == EOF) {
if (errno == EAGAIN) {
errno = 0;
return LZ78_ERROR_EAGAIN;
} else if (errno != 0) {
return LZ78_ERROR_READ;
}
}
compress_byte(o, c_in);
if (o->completed == 1) {
bit_close(out);
return LZ78_SUCCESS;
}
}
}
uint8_t lz78_decompress(lz78_instance* lz78, int fd_in, int fd_out) {
bit_file* in;
FILE* out;
lz78_d* o;
dictionary* d_main;
uint32_t bits, written;
int ret;
if (lz78 == NULL)
return LZ78_ERROR_INITIALIZATION;
if (lz78->mode != LZ78_MODE_DECOMPRESS)
return LZ78_ERROR_MODE;
in = bit_open(fd_in, ACCESS_READ, B_SIZE_DEFAULT);
if (in == NULL)
return LZ78_ERROR_READ;
out = fdopen(fd_out, "w");
if (out == NULL)
return LZ78_ERROR_WRITE;
o = (lz78_d*) &lz78->state;
for (;;) {
/* Optimization pointer (MUST be init every cycle) */
d_main = o->main;
if (d_main->n_bytes) {
written = 0;
while (written != d_main->n_bytes) {
ret = fwrite(d_main->bytebuf + d_main->offset + written, 1,
d_main->n_bytes - written, out);
if (ret == -1) {
d_main->offset += written;
d_main->n_bytes -= written;
if (errno == EAGAIN) {
errno = 0;
return LZ78_ERROR_EAGAIN;
} else {
return LZ78_ERROR_WRITE;
}
}
written += ret;
}
}
o->bitbuf = 0;
o->n_bits = 0;
bits = bitlen(d_main->d_next);
if (bits > 0) {
ret = bit_read(in, (char*) &o->bitbuf, bits, 0);
if (ret == -1)
return LZ78_ERROR_READ;
o->n_bits = ret;
if (bits != o->n_bits)
return LZ78_ERROR_EAGAIN;
}
ret = decompress_code(o, o->bitbuf);
if (ret < 0) {
switch(ret) {
case -1:
return LZ78_ERROR_DICTIONARY;
case -2:
return LZ78_ERROR_DECOMPRESS;
}
}
if (o->completed == 1) {
fflush(out);
return LZ78_SUCCESS;
}
}
}
void lz78_destroy(lz78_instance *lz78) {
lz78_c *c;
lz78_d *d;
if (lz78 != NULL) {
switch (lz78->mode) {
case LZ78_MODE_COMPRESS:
c = (lz78_c*)&lz78->state;
if (c != NULL) {
ht_dictionary_destroy(c->main);
ht_dictionary_destroy(c->secondary);
}
break;
case LZ78_MODE_DECOMPRESS:
d = (lz78_d*)&lz78->state;
if (d != NULL) {
dictionary_destroy(d->main);
ht_dictionary_destroy(d->secondary);
}
break;
}
free(lz78);
}
}

70
src/lz78/lz78.h Normal file
View File

@ -0,0 +1,70 @@
/*
* Basic implementation of LZ78 compression algorithm
*
* Copyright (C) 2010 evilaliv3 <giovanni.pellerano@evilaliv3.org>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __LZ78_H
#define __LZ78_H
#include "bitio.h"
/* Modes of compression */
#define LZ78_MODE_COMPRESS 0
#define LZ78_MODE_DECOMPRESS 1
/* List of lz78-level return codes */
#define LZ78_SUCCESS 0
#define LZ78_ERROR_DICTIONARY 1
#define LZ78_ERROR_READ 2
#define LZ78_ERROR_WRITE 3
#define LZ78_ERROR_EAGAIN 4
#define LZ78_ERROR_COMPRESS 5
#define LZ78_ERROR_DECOMPRESS 6
#define LZ78_ERROR_INITIALIZATION 7
#define LZ78_ERROR_MODE 8
/* Size of the dictionary */
#define DICT_SIZE_MIN 260
#define DICT_SIZE_DEFAULT 4096
#define DICT_SIZE_MAX 1048576
/* Opaque type representing the compression instance */
typedef struct __lz78_instance lz78_instance;
/* Allocate and return an instance of lz78 compressor
cmode: specify compress/decompress mode
dsize: specify the size of the dictionary (byte)
*/
lz78_instance* lz78_new(uint8_t cmode, uint32_t dsize);
/* Compress the input stream by sending the result to the output stream
arg: current instance of compressor obtained by invoking lz78_init()
Return: one of defined lz78-level return codes
*/
uint8_t lz78_compress(lz78_instance* lz78, int fd_in, int fd_out);
/* Decompress the input stream by sending the result to the output stream
arg: current instance of compressor obtained by invoking lz78_init()
Return: one of defined lz78-level return codes
*/
uint8_t lz78_decompress(lz78_instance* lz78, int fd_in, int fd_out);
/* Deallocate current instance */
void lz78_destroy(lz78_instance* lz78);
#endif /* __LZ78_H */

291
src/lz78/wrapper.c Normal file
View File

@ -0,0 +1,291 @@
/*
* Basic implementation of LZ78 compression algorithm
*
* Copyright (C) 2010 evilaliv3 <giovanni.pellerano@evilaliv3.org>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include "wrapper.h"
/* Structure representing the type of algorithm */
struct __algorithm {
char* name; /* String representing the name */
uint8_t type; /* Constant representing the type */
};
/* Opaque type representing the type of algorithm */
typedef struct __algorithm algorithm;
/* Struct of available algorithms */
const algorithm algo_list[] = {
{"lz78", LZ78_ALGORITHM},
{NULL, UNKNOWN_ALGORITHM}
};
/* Struct representing the wrapper used for compression or decompression */
struct __wrapper {
uint8_t type; /* Algorithm used to compress or decompress data */
uint8_t mode; /* Flag indicating compress/decompress mode */
void* data; /* Opaque structure representing the algorithm */
};
/* Global variable representing the current error stored */
uint8_t wrapper_cur_err = WRAPPER_SUCCESS;
/* Associate an algorithm-dependent error to a wrapper-generic error */
uint8_t wrapper_return(uint8_t code) {
wrapper_cur_err = code;
switch (code) {
case LZ78_SUCCESS:
return WRAPPER_SUCCESS;
case LZ78_ERROR_READ:
return WRAPPER_ERROR_READ;
case LZ78_ERROR_WRITE:
return WRAPPER_ERROR_WRITE;
case LZ78_ERROR_EAGAIN:
return WRAPPER_ERROR_EAGAIN;
case LZ78_ERROR_COMPRESS:
return WRAPPER_ERROR_COMPRESS;
case LZ78_ERROR_DECOMPRESS:
return WRAPPER_ERROR_DECOMPRESS;
case LZ78_ERROR_DICTIONARY:
case LZ78_ERROR_INITIALIZATION:
case LZ78_ERROR_MODE:
return WRAPPER_ERROR_GENERIC;
}
return code;
}
uint8_t get_algorithm(char* type) {
uint8_t i = 0;
while (algo_list[i].name != NULL) {
if (strcmp(type, algo_list[i].name) == 0)
return algo_list[i].type;
++i;
}
return UNKNOWN_ALGORITHM;
}
int byte_size(char* size) {
int n;
if (size == NULL)
return 0;
n = atoi(size);
switch (size[strlen(size) - 1]) {
case 'K':
n <<= 10;
break;
case 'M':
n <<= 20;
break;
}
return (n < 0) ? 0 : n;
}
void wrapper_perror() {
switch (wrapper_cur_err) {
case WRAPPER_SUCCESS:
break;
case WRAPPER_ERROR_ALGORITHM:
fprintf(stderr, "Unrecognized compression algorithm\n");
break;
case WRAPPER_ERROR_FILE_IN:
fprintf(stderr, "Unable to read input file\n");
break;
case WRAPPER_ERROR_FILE_OUT:
fprintf(stderr, "Unable to write output file\n");
break;
case LZ78_SUCCESS:
break;
case LZ78_ERROR_DICTIONARY:
fprintf(stderr, "LZ78: unable to allocate dictionaries\n");
break;
case LZ78_ERROR_INITIALIZATION:
fprintf(stderr, "LZ78: bad initialization\n");
break;
case LZ78_ERROR_MODE:
fprintf(stderr, "LZ78: wrong compression/decompression mode\n");
break;
case LZ78_ERROR_READ:
fprintf(stderr, "LZ78: unable to read input data\n");
break;
case LZ78_ERROR_WRITE:
fprintf(stderr, "LZ78: unable to write output data\n");
break;
case LZ78_ERROR_EAGAIN:
fprintf(stderr, "LZ78: I/O operation would block: retry...\n");
break;
case LZ78_ERROR_COMPRESS:
fprintf(stderr, "LZ78: unable to compress input data\n");
break;
case LZ78_ERROR_DECOMPRESS:
fprintf(stderr, "LZ78: unable to decompress input data\n");
break;
default:
fprintf(stderr, "Unhandled error code %d\n", wrapper_cur_err);
}
}
wrapper* wrapper_new(uint8_t w_mode, uint8_t w_type, char* argv) {
wrapper* w = malloc(sizeof(wrapper));
if (w == NULL)
return NULL;
w->type = w_type;
w->mode = w_mode;
switch (w->type) {
case LZ78_ALGORITHM:
w->data = lz78_new(w_mode, byte_size(argv));
break;
default:
free(w);
return NULL;
}
if (w->data)
return w;
else {
free(w);
return NULL;
}
}
void wrapper_destroy(wrapper* w) {
if (w == NULL)
return;
switch (w->type) {
case LZ78_ALGORITHM:
lz78_destroy(w->data);
break;
default:
return;
}
free(w);
}
uint8_t wrapper_compress(wrapper* w, char* input, char* output) {
uint8_t ret;
int fd_in;
int fd_out;
switch (w->type) {
case LZ78_ALGORITHM:
if (input == NULL) {
fd_in = STDIN_FILENO;
} else {
fd_in = open(input, ACCESS_READ);
if (fd_in == -1)
return wrapper_return(WRAPPER_ERROR_FILE_IN);
}
if (output == NULL) {
fd_out = STDOUT_FILENO;
} else {
fd_out = open(output, ACCESS_WRITE, 0644);
if (fd_out == -1) {
close(fd_in);
return wrapper_return(WRAPPER_ERROR_FILE_OUT);
}
}
ret = lz78_compress(w->data, fd_in, fd_out);
close(fd_in);
close(fd_out);
return wrapper_return(ret);
default:
return wrapper_return(WRAPPER_ERROR_ALGORITHM);
}
}
uint8_t wrapper_decompress(wrapper* w, char* input, char* output) {
uint8_t ret;
int fd_in;
int fd_out;
switch (w->type) {
case LZ78_ALGORITHM:
if (input == NULL) {
fd_in = STDIN_FILENO;
} else {
fd_in = open(input, ACCESS_READ);
if (fd_in == -1)
return wrapper_return(WRAPPER_ERROR_FILE_IN);
}
if (output == NULL) {
fd_out = STDOUT_FILENO;
} else {
fd_out = open(output, ACCESS_WRITE, 0644);
if (fd_out == -1) {
close(fd_in);
return wrapper_return(WRAPPER_ERROR_FILE_OUT);
}
}
ret = lz78_decompress(w->data, fd_in, fd_out);
close(fd_in);
close(fd_out);
return wrapper_return(ret);
default:
return wrapper_return(WRAPPER_ERROR_ALGORITHM);
}
}
uint8_t wrapper_exec(wrapper* w, char* input, char* output) {
uint8_t ret;
if (w->mode == WRAPPER_MODE_COMPRESS) {
for (;;) {
ret = wrapper_compress(w, input, output);
if (ret != WRAPPER_ERROR_EAGAIN)
return ret;
}
} else {
for (;;) {
ret = wrapper_decompress(w, input, output);
if (ret != WRAPPER_ERROR_EAGAIN)
return ret;
}
}
}

86
src/lz78/wrapper.h Normal file
View File

@ -0,0 +1,86 @@
/*
* Basic implementation of LZ78 compression algorithm
*
* Copyright (C) 2010 evilaliv3 <giovanni.pellerano@evilaliv3.org>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __WRAPPER_H
#define __WRAPPER_H
#include "lz78.h"
/* List of included compression algorithms */
#define UNKNOWN_ALGORITHM 0
#define LZ78_ALGORITHM 1
/* Modes of compression */
#define WRAPPER_MODE_COMPRESS 0
#define WRAPPER_MODE_DECOMPRESS 1
/* List of managed wrapper-level errors */
#define WRAPPER_SUCCESS 20
#define WRAPPER_ERROR_ALGORITHM 21
#define WRAPPER_ERROR_FILE_IN 22
#define WRAPPER_ERROR_FILE_OUT 23
#define WRAPPER_ERROR_READ 24
#define WRAPPER_ERROR_WRITE 25
#define WRAPPER_ERROR_EAGAIN 26
#define WRAPPER_ERROR_COMPRESS 27
#define WRAPPER_ERROR_DECOMPRESS 28
#define WRAPPER_ERROR_GENERIC 29
/* Opaque type representing the wrapper */
typedef struct __wrapper wrapper;
/* Creates a new wrapper:
w_mode mode of compression
w_type type of algorithm
w_argv additional parameter
*/
wrapper* wrapper_new(uint8_t w_mode, uint8_t w_type, char* w_argv);
/* Deallocates a wrapper */
void wrapper_destroy(wrapper* w);
/* Execute the function associated with the wrapper (compress/decompress)
Return:
WRAPPER_SUCCESS on success
WRAPPER_ERROR_FILE_IN unable to open input file
WRAPPER_ERROR_FILE_OUT unable to open output file
WRAPPER_ERROR_READ unable to read input data
WRAPPER_ERROR_WRITE unable to write output data
WRAPPER_ERROR_EAGAIN unable to accomplish current operation
WRAPPER_ERROR_ALGORITHM type of wrapper unknown
WRAPPER_ERROR_COMPRESS unable to compress input data
WRAPPER_ERROR_DECOMPRESS unable to decompress input data
WRAPPER_ERROR_GENERIC algorithm-dependent error
*/
uint8_t wrapper_exec(wrapper* w, char* in, char* out);
/* Return a positive constant associated to a particular algorithm
(UNKNOWN_ALGORITHM if doesn't exist)
*/
uint8_t get_algorithm(char* type);
/* Return an integer representing the given size
(K = KBytes, M = MBytes)
*/
int byte_size(char* size);
/* Print last wrapper error occurred into standard error stream */
void wrapper_perror();
#endif /* __WRAPPER_H */

View File

@ -17,6 +17,16 @@ void syn_error(char *message)
exit(1);
}
long fsize (FILE *in)
{
long pos, length;
pos = ftell(in);
fseek(in, 0L, SEEK_END);
length = ftell(in);
fseek(in, pos, SEEK_SET);
return length;
}
void syn_warn(char *message)
{
printf("%s\n", message);