Remove LZ77 for LZ78, optimized
This commit is contained in:
parent
353ed7e1e5
commit
0e12919bb3
4
Makefile
4
Makefile
@ -11,13 +11,13 @@ PREFIX ?= /usr
|
|||||||
CC ?= gcc
|
CC ?= gcc
|
||||||
#CC ?= tcc
|
#CC ?= tcc
|
||||||
#CC ?= musl-tcc
|
#CC ?= musl-tcc
|
||||||
CFLAGS += -O2 -pedantic -g -Wall -Wextra
|
CFLAGS += -O3 -g -Wall -Wextra
|
||||||
CPPFLAGS += -DVERSION=$(VERSION) -D_FORTIFY_SOURCE=2
|
CPPFLAGS += -DVERSION=$(VERSION) -D_FORTIFY_SOURCE=2
|
||||||
#CPPFLAGS += -DVERSION=$(VERSION)
|
#CPPFLAGS += -DVERSION=$(VERSION)
|
||||||
LDFLAGS += -lm
|
LDFLAGS += -lm
|
||||||
BIN ?= slidescript
|
BIN ?= slidescript
|
||||||
|
|
||||||
SRCS=$(wildcard src/*.c)
|
SRCS=$(wildcard src/lz78/*.c) $(wildcard src/*.c)
|
||||||
|
|
||||||
OBJECTS=$(SRCS:%.c=%.o)
|
OBJECTS=$(SRCS:%.c=%.o)
|
||||||
|
|
||||||
|
8
docs/examples/tar.ss
Executable file
8
docs/examples/tar.ss
Executable file
@ -0,0 +1,8 @@
|
|||||||
|
#!/usr/bin/slidescript
|
||||||
|
|
||||||
|
print "Compressing..."
|
||||||
|
compress "test" "docs"
|
||||||
|
|
||||||
|
sleep "1"
|
||||||
|
print "Decompressing..."
|
||||||
|
decompress "test.tar.ss"
|
@ -1,340 +0,0 @@
|
|||||||
// LZ77 compression examples, simple and lightweight
|
|
||||||
// Being quick to process and execute, this will be great
|
|
||||||
// For internal compression on modern machines
|
|
||||||
//
|
|
||||||
// Andy Herbert
|
|
||||||
// lz1 https://github.com/andyherbert/lz1
|
|
||||||
//
|
|
||||||
|
|
||||||
#include "inc/deps.h"
|
|
||||||
#include "inc/compression.h"
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------
|
|
||||||
|
|
||||||
#define MY_ERR "Error: lz77"
|
|
||||||
|
|
||||||
#define my_free(dm) \
|
|
||||||
{ \
|
|
||||||
if (dm == (void *) 0) \
|
|
||||||
{ fprintf (stderr, "WTH. Mooo.\n"); exit (1); }; \
|
|
||||||
free (dm); \
|
|
||||||
dm = (void *) 0; \
|
|
||||||
}
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------
|
|
||||||
|
|
||||||
uint32_t lz77_compress (uint8_t *uncompressed_text, uint32_t uncompressed_size, uint8_t *compressed_text, uint8_t pointer_length_width)
|
|
||||||
{
|
|
||||||
uint16_t pointer_pos, temp_pointer_pos, output_pointer, pointer_length, temp_pointer_length;
|
|
||||||
uint32_t compressed_pointer, output_size, coding_pos, output_lookahead_ref, look_behind, look_ahead;
|
|
||||||
uint16_t pointer_pos_max, pointer_length_max;
|
|
||||||
pointer_pos_max = pow(2, 16 - pointer_length_width);
|
|
||||||
pointer_length_max = pow(2, pointer_length_width);
|
|
||||||
|
|
||||||
*((uint32_t *) compressed_text) = uncompressed_size;
|
|
||||||
*(compressed_text + 4) = pointer_length_width;
|
|
||||||
compressed_pointer = output_size = 5;
|
|
||||||
|
|
||||||
for(coding_pos = 0; coding_pos < uncompressed_size; ++coding_pos)
|
|
||||||
{
|
|
||||||
pointer_pos = 0;
|
|
||||||
pointer_length = 0;
|
|
||||||
for(temp_pointer_pos = 1; (temp_pointer_pos < pointer_pos_max) && (temp_pointer_pos <= coding_pos); ++temp_pointer_pos)
|
|
||||||
{
|
|
||||||
look_behind = coding_pos - temp_pointer_pos;
|
|
||||||
look_ahead = coding_pos;
|
|
||||||
for(temp_pointer_length = 0; uncompressed_text[look_ahead++] == uncompressed_text[look_behind++]; ++temp_pointer_length)
|
|
||||||
{
|
|
||||||
if(temp_pointer_length == pointer_length_max)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(temp_pointer_length > pointer_length)
|
|
||||||
{
|
|
||||||
pointer_pos = temp_pointer_pos;
|
|
||||||
pointer_length = temp_pointer_length;
|
|
||||||
if(pointer_length == pointer_length_max)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
coding_pos += pointer_length;
|
|
||||||
if((coding_pos == uncompressed_size) && pointer_length)
|
|
||||||
{
|
|
||||||
output_pointer = (pointer_length == 1) ? 0 : ((pointer_pos << pointer_length_width) | (pointer_length - 2));
|
|
||||||
output_lookahead_ref = coding_pos - 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
output_pointer = (pointer_pos << pointer_length_width) | (pointer_length ? (pointer_length - 1) : 0);
|
|
||||||
output_lookahead_ref = coding_pos;
|
|
||||||
}
|
|
||||||
*((uint16_t *) (compressed_text + compressed_pointer)) = output_pointer;
|
|
||||||
compressed_pointer += 2;
|
|
||||||
*(compressed_text + compressed_pointer++) = *(uncompressed_text + output_lookahead_ref);
|
|
||||||
output_size += 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
return output_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t lz77_decompress (uint8_t *compressed_text, uint8_t *uncompressed_text)
|
|
||||||
{
|
|
||||||
uint8_t pointer_length_width;
|
|
||||||
uint16_t input_pointer, pointer_length, pointer_pos, pointer_length_mask;
|
|
||||||
uint32_t compressed_pointer, coding_pos, pointer_offset, uncompressed_size;
|
|
||||||
|
|
||||||
uncompressed_size = *((uint32_t *) compressed_text);
|
|
||||||
pointer_length_width = *(compressed_text + 4);
|
|
||||||
compressed_pointer = 5;
|
|
||||||
|
|
||||||
pointer_length_mask = pow(2, pointer_length_width) - 1;
|
|
||||||
|
|
||||||
for(coding_pos = 0; coding_pos < uncompressed_size; ++coding_pos)
|
|
||||||
{
|
|
||||||
input_pointer = *((uint16_t *) (compressed_text + compressed_pointer));
|
|
||||||
compressed_pointer += 2;
|
|
||||||
pointer_pos = input_pointer >> pointer_length_width;
|
|
||||||
pointer_length = pointer_pos ? ((input_pointer & pointer_length_mask) + 1) : 0;
|
|
||||||
if(pointer_pos)
|
|
||||||
for(pointer_offset = coding_pos - pointer_pos; pointer_length > 0; --pointer_length)
|
|
||||||
uncompressed_text[coding_pos++] = uncompressed_text[pointer_offset++];
|
|
||||||
*(uncompressed_text + coding_pos) = *(compressed_text + compressed_pointer++);
|
|
||||||
}
|
|
||||||
|
|
||||||
return coding_pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
long fsize (FILE *in)
|
|
||||||
{
|
|
||||||
long pos, length;
|
|
||||||
pos = ftell(in);
|
|
||||||
fseek(in, 0L, SEEK_END);
|
|
||||||
length = ftell(in);
|
|
||||||
fseek(in, pos, SEEK_SET);
|
|
||||||
return length;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t ss_compress (const char *filename_in, char *filename_out, uint8_t pointer_length_width)
|
|
||||||
{
|
|
||||||
FILE *in, *out;
|
|
||||||
uint8_t *uncompressed_text, *compressed_text;
|
|
||||||
uint32_t uncompressed_size, compressed_size;
|
|
||||||
|
|
||||||
in = fopen(filename_in, "rb");
|
|
||||||
if(in == NULL)
|
|
||||||
return 0;
|
|
||||||
uncompressed_size = fsize(in);
|
|
||||||
uncompressed_text = malloc(uncompressed_size + 20);
|
|
||||||
// +20 for uncompressed data size sway in algorithm
|
|
||||||
if((uncompressed_size != fread(uncompressed_text, 1, uncompressed_size, in)))
|
|
||||||
{
|
|
||||||
my_free(uncompressed_text);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
fclose(in);
|
|
||||||
|
|
||||||
compressed_text = malloc((int)(uncompressed_size * 1.25));
|
|
||||||
// * 2 for uncompressed size climb on first compress pass
|
|
||||||
compressed_size = lz77_compress(uncompressed_text, uncompressed_size, compressed_text, pointer_length_width);
|
|
||||||
|
|
||||||
out = fopen(filename_out, "wb");
|
|
||||||
if(out == NULL)
|
|
||||||
{
|
|
||||||
my_free(uncompressed_text);
|
|
||||||
my_free(compressed_text);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if((compressed_size != fwrite(compressed_text, 1, compressed_size, out)))
|
|
||||||
{
|
|
||||||
my_free(uncompressed_text);
|
|
||||||
my_free(compressed_text);
|
|
||||||
fclose(out);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
fclose(out);
|
|
||||||
|
|
||||||
free(compressed_text);
|
|
||||||
free(uncompressed_text);
|
|
||||||
|
|
||||||
return compressed_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------
|
|
||||||
|
|
||||||
#ifndef ZERO
|
|
||||||
#define ZERO 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef ONE
|
|
||||||
#define ONE 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------
|
|
||||||
|
|
||||||
uint32_t ss_decompress
|
|
||||||
(
|
|
||||||
char *ifname, // Input -file name
|
|
||||||
char *ofname // Output -file name
|
|
||||||
)
|
|
||||||
{
|
|
||||||
FILE *ifp; // Input -file pointer
|
|
||||||
FILE *ofp; // Output -file pointer
|
|
||||||
|
|
||||||
uint8_t *dm_comp; // DM: Compressed data
|
|
||||||
uint8_t *dm_deco; // DM: Uncompressed data
|
|
||||||
|
|
||||||
int orig_size; // Original size
|
|
||||||
int comp_size; // Compressed size
|
|
||||||
int deco_size; // Decompressed size
|
|
||||||
int writ_size; // Written size
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------
|
|
||||||
// Open input file.
|
|
||||||
|
|
||||||
if ((ifp = fopen (ifname, "rb")) == NULL)
|
|
||||||
{ // Error
|
|
||||||
fprintf (stderr,
|
|
||||||
"%s: File not found: %s\n",
|
|
||||||
MY_ERR, ifname);
|
|
||||||
|
|
||||||
return ZERO;
|
|
||||||
}
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------
|
|
||||||
// Set up compressed-data buffer.
|
|
||||||
|
|
||||||
// Compressed size
|
|
||||||
comp_size = (int) fsize (ifp);
|
|
||||||
// DM: Compressed data
|
|
||||||
dm_comp = (uint8_t *) malloc (comp_size + 10);
|
|
||||||
// Add some extra memory at the end of malloc comp_size
|
|
||||||
// Saves dirty archive size sway, WIP
|
|
||||||
if (dm_comp == NULL) // Error?
|
|
||||||
{ // Yes - Error exit
|
|
||||||
fprintf (stderr,
|
|
||||||
"%s: Error: malloc failed\n",
|
|
||||||
MY_ERR);
|
|
||||||
|
|
||||||
return ZERO;
|
|
||||||
}
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------
|
|
||||||
// Read compressed data.
|
|
||||||
|
|
||||||
if (fread (dm_comp, ONE, comp_size, ifp) != (size_t) comp_size)
|
|
||||||
{ // Error
|
|
||||||
my_free (dm_comp); // Release DM
|
|
||||||
|
|
||||||
fprintf (stderr,
|
|
||||||
"%s: Read of input data failed\n",
|
|
||||||
MY_ERR);
|
|
||||||
|
|
||||||
return ZERO;
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose (ifp); // Close input file
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------
|
|
||||||
// Set up decompressed-data buffer.
|
|
||||||
|
|
||||||
orig_size = (int) *((uint32_t *) dm_comp);
|
|
||||||
dm_deco = (uint8_t *) malloc (orig_size + 20);
|
|
||||||
// +20 to cover byte sway, dirty trick for mem leak
|
|
||||||
if (dm_comp == NULL) // Error?
|
|
||||||
{ // Yes
|
|
||||||
my_free (dm_comp); // Release DM
|
|
||||||
|
|
||||||
fprintf (stderr, "%s: malloc failed\n", MY_ERR);
|
|
||||||
return ZERO;
|
|
||||||
}
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------
|
|
||||||
// Decompress.
|
|
||||||
|
|
||||||
deco_size = (int) lz77_decompress (dm_comp, dm_deco);
|
|
||||||
my_free (dm_comp); // Release DM
|
|
||||||
|
|
||||||
if (deco_size < orig_size) // Error?
|
|
||||||
{ // Yes
|
|
||||||
my_free (dm_deco); // Release DM
|
|
||||||
|
|
||||||
fprintf (stderr,
|
|
||||||
"%s: deco size %d < orig size %d\n",
|
|
||||||
MY_ERR, deco_size, orig_size);
|
|
||||||
|
|
||||||
return ZERO;
|
|
||||||
}
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------
|
|
||||||
// Open output file.
|
|
||||||
|
|
||||||
if ((ofp = fopen (ofname, "wb")) == NULL)
|
|
||||||
{ // Error
|
|
||||||
my_free (dm_deco); // Release DM
|
|
||||||
|
|
||||||
fprintf (stderr,
|
|
||||||
"%s: Can't open output file: %s\n",
|
|
||||||
MY_ERR, ofname);
|
|
||||||
|
|
||||||
return ZERO;
|
|
||||||
}
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------
|
|
||||||
// Write to output file.
|
|
||||||
|
|
||||||
writ_size = (int) fwrite (dm_deco, ONE, deco_size, ofp);
|
|
||||||
|
|
||||||
fclose (ofp); // Close output file
|
|
||||||
my_free (dm_deco); // Release DM
|
|
||||||
|
|
||||||
if (writ_size != deco_size) // Error?
|
|
||||||
{ // Yes
|
|
||||||
fprintf (stderr,
|
|
||||||
"%s: Bytes written %d != Data size %d\n",
|
|
||||||
MY_ERR, writ_size, deco_size);
|
|
||||||
|
|
||||||
return ZERO;
|
|
||||||
}
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------
|
|
||||||
// Wrap it up.
|
|
||||||
|
|
||||||
return deco_size; // == orig_size
|
|
||||||
}
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------
|
|
||||||
|
|
||||||
/*
|
|
||||||
int main (int argc, char const *argv[])
|
|
||||||
{
|
|
||||||
FILE *in;
|
|
||||||
|
|
||||||
char filename[129];
|
|
||||||
char filedecout[141];
|
|
||||||
|
|
||||||
if(argc < 2)
|
|
||||||
{
|
|
||||||
printf("Please enter a filename: ./comp file.txt");
|
|
||||||
}
|
|
||||||
|
|
||||||
in = fopen(argv[1], "r");
|
|
||||||
if(in == NULL)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if(strlen(argv[1]) > 128)
|
|
||||||
{
|
|
||||||
printf("Filename too long");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
sprintf(filename, "%s.ss", argv[1]);
|
|
||||||
sprintf(filedecout, "%s.1", argv[1]);
|
|
||||||
|
|
||||||
printf("Original size: %ld\n", fsize(in));
|
|
||||||
fclose(in);
|
|
||||||
for(uint8_t i = 1; i <= 6; ++i)
|
|
||||||
printf("Compressed (%i): %u, decompressed: (%u)\n", i, ss_compress(argv[1], filename, 20000000, i), ss_decompress(filename, filedecout));
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
*/
|
|
@ -31,6 +31,7 @@
|
|||||||
|
|
||||||
void syn_error(char *message);
|
void syn_error(char *message);
|
||||||
void syn_warn(char *message);
|
void syn_warn(char *message);
|
||||||
|
long fsize (FILE *in);
|
||||||
char *strip_nl(char *string);
|
char *strip_nl(char *string);
|
||||||
int file_exists(char *path);
|
int file_exists(char *path);
|
||||||
int is_dir(char *path);
|
int is_dir(char *path);
|
||||||
|
114
src/lexer.c
114
src/lexer.c
@ -18,7 +18,7 @@
|
|||||||
|
|
||||||
// For slidescript compression algorithm
|
// For slidescript compression algorithm
|
||||||
#include "inc/tar.h"
|
#include "inc/tar.h"
|
||||||
#include "inc/compression.h"
|
#include "lz78/wrapper.h"
|
||||||
|
|
||||||
#define strtok_next(s) strtok_r (NULL, s, &strtok_save)
|
#define strtok_next(s) strtok_r (NULL, s, &strtok_save)
|
||||||
|
|
||||||
@ -487,9 +487,14 @@ char *process_line (char *line)
|
|||||||
{
|
{
|
||||||
char *filename;
|
char *filename;
|
||||||
struct tar_t *archive = NULL;
|
struct tar_t *archive = NULL;
|
||||||
int fd;
|
int fd, lzret;
|
||||||
|
wrapper *lzwrapper;
|
||||||
|
int bsize = B_SIZE_DEFAULT;
|
||||||
|
uint8_t w_mode = WRAPPER_MODE_DECOMPRESS;
|
||||||
|
uint8_t w_type = LZ78_ALGORITHM;
|
||||||
|
|
||||||
|
retbuf = qmalloc(QM_SS, 8129);
|
||||||
|
|
||||||
tar_free_pool();
|
|
||||||
tok_srch = strtok_next ("\"");
|
tok_srch = strtok_next ("\"");
|
||||||
if (tok_srch == NULL)
|
if (tok_srch == NULL)
|
||||||
{
|
{
|
||||||
@ -505,27 +510,33 @@ char *process_line (char *line)
|
|||||||
|
|
||||||
filename = parse_vars(tok_srch);
|
filename = parse_vars(tok_srch);
|
||||||
|
|
||||||
FILE *in;
|
char filedecout[128];
|
||||||
|
|
||||||
char origsize[128];
|
|
||||||
char filedecout[MAX_FILENAME_LEN+5];
|
|
||||||
|
|
||||||
in = fopen(filename, "rb");
|
|
||||||
if (in == NULL)
|
|
||||||
{
|
|
||||||
x_warn("ss:warn:compress, failed to open tar for compression");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
sprintf(filedecout, "uncompressed.tar");
|
sprintf(filedecout, "uncompressed.tar");
|
||||||
|
|
||||||
sprintf(origsize, "%ld", fsize(in));
|
if (bsize <= 0) {
|
||||||
|
x_warn("ss:warn:compress, default buffer not set?");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t deco_return = ss_decompress(filename, filedecout);
|
/* Creates a wrapper instance */
|
||||||
retbuf = qmalloc(QM_SS, (sizeof(deco_return) + strlen(filename) + strlen(origsize) + 40));
|
lzwrapper = wrapper_new(w_mode, w_type, NULL);
|
||||||
sprintf(retbuf, "ss: %s: decompressed: %s -> %u", filename, origsize, deco_return);
|
if (lzwrapper == NULL) {
|
||||||
|
x_warn("ss:warn:decompress, failed to open lz78 wrapper socket!");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
fclose(in);
|
/* Executes the wrapper function */
|
||||||
|
lzret = wrapper_exec(lzwrapper, filename, filedecout);
|
||||||
|
|
||||||
|
if (lzret != WRAPPER_SUCCESS)
|
||||||
|
{
|
||||||
|
x_warn("ss:warn:decompress, failed to decompress tarball: %d", lzret);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Destroyes the wrapper instance */
|
||||||
|
wrapper_destroy(lzwrapper);
|
||||||
|
|
||||||
// open existing file
|
// open existing file
|
||||||
if ((fd = open(filedecout, O_RDWR)) < 0) {
|
if ((fd = open(filedecout, O_RDWR)) < 0) {
|
||||||
@ -562,9 +573,14 @@ char *process_line (char *line)
|
|||||||
else if (strncmp("compress",tok_srch,8) == 0)
|
else if (strncmp("compress",tok_srch,8) == 0)
|
||||||
{
|
{
|
||||||
char filename[MAX_FILENAME_LEN+1]; // Files to be added into the archive
|
char filename[MAX_FILENAME_LEN+1]; // Files to be added into the archive
|
||||||
char comp_size[128];
|
|
||||||
struct tar_t *archive = NULL;
|
struct tar_t *archive = NULL;
|
||||||
int fd;
|
int fd, lzret;
|
||||||
|
wrapper *lzwrapper;
|
||||||
|
int bsize = B_SIZE_DEFAULT;
|
||||||
|
uint8_t w_mode = WRAPPER_MODE_COMPRESS;
|
||||||
|
uint8_t w_type = LZ78_ALGORITHM;
|
||||||
|
|
||||||
|
retbuf = qmalloc(QM_SS, 8129);
|
||||||
|
|
||||||
tar_free_pool();
|
tar_free_pool();
|
||||||
tok_srch = strtok_next ("\"");
|
tok_srch = strtok_next ("\"");
|
||||||
@ -645,45 +661,39 @@ char *process_line (char *line)
|
|||||||
|
|
||||||
close(fd); // don't bother checking for fd < 0
|
close(fd); // don't bother checking for fd < 0
|
||||||
|
|
||||||
FILE *in;
|
|
||||||
|
|
||||||
char origsize[128];
|
|
||||||
char file_comp[MAX_FILENAME_LEN+9];
|
char file_comp[MAX_FILENAME_LEN+9];
|
||||||
char filedecout[MAX_FILENAME_LEN+10];
|
|
||||||
|
|
||||||
in = fopen(filename, "rb");
|
sprintf(file_comp, "%s.tar.ss", filename);
|
||||||
if (in == NULL)
|
|
||||||
{
|
if (bsize <= 0) {
|
||||||
x_warn("ss:warn:compress, failed to open tar for compression");
|
x_warn("ss:warn:compress, default buffer not set?");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
sprintf(file_comp, "%s.tar.ss", filename);
|
/* Creates a wrapper instance */
|
||||||
sprintf(filedecout, "%s.tar.1", filename);
|
lzwrapper = wrapper_new(w_mode, w_type, NULL);
|
||||||
|
if (lzwrapper == NULL) {
|
||||||
sprintf(origsize, "%ld", fsize(in));
|
x_warn("ss:warn:compress, failed to open lz78 wrapper socket!");
|
||||||
|
return NULL;
|
||||||
sprintf(comp_size, "%s", origsize);
|
|
||||||
|
|
||||||
fclose(in);
|
|
||||||
for(uint32_t i = 1; i < 7; ++i)
|
|
||||||
{
|
|
||||||
uint32_t comp_return = ss_compress(filename, file_comp, i);
|
|
||||||
uint32_t deco_return = ss_decompress(file_comp, filedecout);
|
|
||||||
if (atoi(comp_size) < (int)comp_return && comp_return != 0 && deco_return != 0 && i != 1)
|
|
||||||
{
|
|
||||||
retbuf = qmalloc(QM_SS, sizeof(deco_return) + sizeof(comp_return) + strlen(file_comp) + 40);
|
|
||||||
sprintf(retbuf, "ss: %s: compressed: %u -> %u", file_comp, deco_return, comp_return);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
sprintf(comp_size, "%u", comp_return);
|
|
||||||
printf("pass %u decompressed/compressed: %u/%u\n", i, deco_return, comp_return);
|
|
||||||
fflush(stdout);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Executes the wrapper function */
|
||||||
|
lzret = wrapper_exec(lzwrapper, filename, file_comp);
|
||||||
|
|
||||||
|
if (lzret != WRAPPER_SUCCESS)
|
||||||
|
{
|
||||||
|
x_warn("ss:warn:compress, error on compress: %d", lzret);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sprintf(retbuf, "Compressed -> %s.tar.ss", filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Destroyes the wrapper instance */
|
||||||
|
wrapper_destroy(lzwrapper);
|
||||||
|
|
||||||
// Remove the decompressed version for sanity check
|
// Remove the decompressed version for sanity check
|
||||||
remove(filedecout);
|
|
||||||
remove(filename);
|
remove(filename);
|
||||||
|
|
||||||
return retbuf;
|
return retbuf;
|
||||||
|
305
src/lz78/bitio.c
Normal file
305
src/lz78/bitio.c
Normal file
@ -0,0 +1,305 @@
|
|||||||
|
/*
|
||||||
|
* Basic implementation of LZ78 compression algorithm
|
||||||
|
*
|
||||||
|
* Copyright (C) 2010 evilaliv3 <giovanni.pellerano@evilaliv3.org>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "bitio.h"
|
||||||
|
|
||||||
|
/* Struct of bitfile */
|
||||||
|
struct __bit_file {
|
||||||
|
int fd; /* File descriptor */
|
||||||
|
int mode; /* Mode: read 0, write 1 */
|
||||||
|
UINTMAX_T buff_size; /* Buffer size (bits) */
|
||||||
|
UINTMAX_T w_start; /* Window start (bits) */
|
||||||
|
UINTMAX_T w_len; /* Window length (bits) */
|
||||||
|
char buff[1]; /* Buffer (contiguous memory area) */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Return max value that can be represented using UINTMAX_T */
|
||||||
|
UINTMAX_T max_index() {
|
||||||
|
UINTMAX_T max = -1;
|
||||||
|
max /= 8;
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
bit_file* bit_open(int fd, int mode, UINTMAX_T buff_size) {
|
||||||
|
|
||||||
|
bit_file* bfp;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (mode != ACCESS_READ && mode != ACCESS_WRITE)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (mode == ACCESS_READ)
|
||||||
|
ret = read(fd, NULL, 0);
|
||||||
|
else /* mode == ACCESS_WRITE */
|
||||||
|
ret = write(fd, NULL, 0);
|
||||||
|
|
||||||
|
if(ret != 0)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (buff_size % 8 != 0)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
buff_size = (buff_size > max_index()) ? max_index() : buff_size;
|
||||||
|
|
||||||
|
/* Buffer allocation */
|
||||||
|
bfp = (bit_file*) calloc(1, sizeof(bit_file) + buff_size / 8);
|
||||||
|
if (bfp == NULL) {
|
||||||
|
close(fd);
|
||||||
|
} else {
|
||||||
|
bfp->fd = fd;
|
||||||
|
bfp->mode = mode;
|
||||||
|
bfp->buff_size = buff_size;
|
||||||
|
/* bfp->w_start and bfp->w_len are initialized by calloc */
|
||||||
|
}
|
||||||
|
|
||||||
|
return bfp;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bit_read(bit_file* bfp, char* buff_out, UINTMAX_T n_bits, uint8_t ofs) {
|
||||||
|
uint8_t* base;
|
||||||
|
uint8_t mask;
|
||||||
|
uint8_t r_mask;
|
||||||
|
uint8_t writebit;
|
||||||
|
const uint8_t* readptr;
|
||||||
|
UINTMAX_T buff_ready_bytes;
|
||||||
|
UINTMAX_T bits_read = 0;
|
||||||
|
UINTMAX_T bits_read_total = 0;
|
||||||
|
UINTMAX_T buff_size;
|
||||||
|
UINTMAX_T w_start;
|
||||||
|
UINTMAX_T w_len;
|
||||||
|
UINTMAX_T c;
|
||||||
|
uint8_t aligned;
|
||||||
|
|
||||||
|
if (bfp == NULL || buff_out == NULL || ofs > 7)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (bfp->mode != ACCESS_READ)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
buff_size = bfp->buff_size;
|
||||||
|
w_start = bfp->w_start;
|
||||||
|
w_len = bfp->w_len;
|
||||||
|
|
||||||
|
mask = 1 << ofs;
|
||||||
|
base = (uint8_t*) buff_out;
|
||||||
|
|
||||||
|
/* Check if input ad output are aligned to byte */
|
||||||
|
aligned = (mask == 1 && (w_start % 8 == 0)) ? 1 : 0;
|
||||||
|
|
||||||
|
while (n_bits > 0) {
|
||||||
|
/* Buffer refill if needed */
|
||||||
|
if (w_len == 0) {
|
||||||
|
c = read(bfp->fd, bfp->buff, buff_size / 8);
|
||||||
|
if (c == (uint32_t)-1) {
|
||||||
|
if (errno == EAGAIN) {
|
||||||
|
errno = 0;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
} else if (c == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
w_start = 0;
|
||||||
|
w_len = c * 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
readptr = (uint8_t*)&(bfp->buff) + w_start / 8;
|
||||||
|
|
||||||
|
if (aligned && w_len > 7 && n_bits >= w_len) {
|
||||||
|
/* Optimization: due to alignment we can use memcpy */
|
||||||
|
buff_ready_bytes = w_len / 8;
|
||||||
|
memcpy(base, readptr, buff_ready_bytes);
|
||||||
|
base += buff_ready_bytes;
|
||||||
|
|
||||||
|
bits_read = buff_ready_bytes * 8;
|
||||||
|
w_start = (w_start + bits_read) % buff_size;
|
||||||
|
w_len -= bits_read;
|
||||||
|
n_bits -= bits_read;
|
||||||
|
bits_read_total += bits_read;
|
||||||
|
} else {
|
||||||
|
/* Single bit read */
|
||||||
|
r_mask = 1 << w_start % 8;
|
||||||
|
|
||||||
|
writebit = (*readptr & r_mask) ? 1 : 0;
|
||||||
|
if (writebit == 0) {
|
||||||
|
*base &= ~mask;
|
||||||
|
} else {
|
||||||
|
*base |= mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
w_start = ((w_start + 1) % buff_size);
|
||||||
|
--w_len;
|
||||||
|
--n_bits;
|
||||||
|
++bits_read_total;
|
||||||
|
|
||||||
|
if (mask == 0x80) {
|
||||||
|
mask = 1;
|
||||||
|
++base;
|
||||||
|
aligned = (mask == 1 && (w_start % 8 == 0)) ? 1 : 0;
|
||||||
|
} else {
|
||||||
|
mask <<= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Update */
|
||||||
|
bfp->buff_size = buff_size;
|
||||||
|
bfp->w_start = w_start;
|
||||||
|
bfp->w_len = w_len;
|
||||||
|
|
||||||
|
return bits_read_total;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bit_write(bit_file* bfp, const char* buff_in, UINTMAX_T n_bits, uint8_t ofs) {
|
||||||
|
UINTMAX_T ret = 0;
|
||||||
|
const uint8_t* base;
|
||||||
|
uint8_t mask;
|
||||||
|
uint8_t readbit;
|
||||||
|
uint8_t* writeptr;
|
||||||
|
UINTMAX_T pos;
|
||||||
|
UINTMAX_T buff_free_bits;
|
||||||
|
UINTMAX_T buff_free_bytes;
|
||||||
|
UINTMAX_T bits_written = 0;
|
||||||
|
uint8_t aligned;
|
||||||
|
|
||||||
|
if (bfp == NULL || buff_in == NULL || ofs > 7)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (bfp->mode != ACCESS_WRITE)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
mask = 1 << ofs;
|
||||||
|
base = (uint8_t*)buff_in;
|
||||||
|
|
||||||
|
pos = bfp->w_start + bfp->w_len;
|
||||||
|
buff_free_bits = bfp->buff_size - bfp->w_len;
|
||||||
|
|
||||||
|
/* Check if input ad output are aligned to byte */
|
||||||
|
aligned = (mask == 1 && (pos % 8 == 0)) ? 1 : 0;
|
||||||
|
|
||||||
|
while (n_bits > 0) {
|
||||||
|
writeptr = (uint8_t*)&(bfp->buff) + pos / 8;
|
||||||
|
|
||||||
|
if (aligned && buff_free_bits > 7 && n_bits >= buff_free_bits) {
|
||||||
|
/* Optimization: due to alignment we can use memcpy */
|
||||||
|
buff_free_bytes = buff_free_bits / 8;
|
||||||
|
memcpy(writeptr, base, buff_free_bytes);
|
||||||
|
base += buff_free_bytes;
|
||||||
|
bits_written = buff_free_bytes * 8;
|
||||||
|
|
||||||
|
pos += bits_written;
|
||||||
|
bfp->w_len += bits_written;
|
||||||
|
n_bits -= bits_written;
|
||||||
|
ret += bits_written;
|
||||||
|
buff_free_bits -= bits_written;
|
||||||
|
} else {
|
||||||
|
/* Single bit write */
|
||||||
|
readbit = (*base & mask) ? 1 : 0;
|
||||||
|
if (readbit == 0) {
|
||||||
|
*writeptr &= ~(1 << pos % 8);
|
||||||
|
} else {
|
||||||
|
*writeptr |= (1 << pos % 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mask == 0x80) {
|
||||||
|
mask = 1;
|
||||||
|
++base;
|
||||||
|
aligned = (mask == 1 && (pos % 8 == 0)) ? 1 : 0;
|
||||||
|
} else {
|
||||||
|
mask <<= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
++pos;
|
||||||
|
++(bfp->w_len);
|
||||||
|
--(n_bits);
|
||||||
|
--buff_free_bits;
|
||||||
|
++ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Flush if needed */
|
||||||
|
if (bfp->w_len == bfp->buff_size) {
|
||||||
|
if (bit_flush(bfp) == -1)
|
||||||
|
return -1;
|
||||||
|
if (bfp->w_len != 0)
|
||||||
|
return ret;
|
||||||
|
pos = bfp->w_start + bfp->w_len;
|
||||||
|
buff_free_bits = bfp->buff_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bit_flush(bit_file* bfp) {
|
||||||
|
UINTMAX_T count;
|
||||||
|
UINTMAX_T written;
|
||||||
|
UINTMAX_T n;
|
||||||
|
uint8_t* base;
|
||||||
|
|
||||||
|
if (bfp == NULL)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
count = bfp->w_len / 8;
|
||||||
|
written = 0;
|
||||||
|
base = (uint8_t*) bfp->buff + bfp->w_start / 8;
|
||||||
|
|
||||||
|
while (count > 0) {
|
||||||
|
n = write(bfp->fd, base, count);
|
||||||
|
if (n == (uint32_t)-1) {
|
||||||
|
if (errno == EAGAIN) {
|
||||||
|
errno = 0;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
base += n;
|
||||||
|
written += n;
|
||||||
|
count -= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
bfp->w_start = (bfp->w_start + written * 8) % bfp->buff_size;
|
||||||
|
bfp->w_len -= written * 8;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bit_close(bit_file* bfp) {
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
if (bfp == NULL)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
fd = bfp->fd;
|
||||||
|
|
||||||
|
if (bfp->w_len % 8)
|
||||||
|
bfp->w_len += 8 - (bfp->w_len % 8);
|
||||||
|
|
||||||
|
bit_flush(bfp);
|
||||||
|
free(bfp);
|
||||||
|
close(fd);
|
||||||
|
return 0;
|
||||||
|
}
|
53
src/lz78/bitio.h
Normal file
53
src/lz78/bitio.h
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
/*
|
||||||
|
* Basic implementation of LZ78 compression algorithm
|
||||||
|
*
|
||||||
|
* Copyright (C) 2010 evilaliv3 <giovanni.pellerano@evilaliv3.org>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __BITIO_H
|
||||||
|
#define __BITIO_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#define B_SIZE_DEFAULT 1048576
|
||||||
|
|
||||||
|
#define UINTMAX_T uint32_t
|
||||||
|
|
||||||
|
/* Access mode for reading and writing */
|
||||||
|
#define ACCESS_READ (O_RDONLY | O_NONBLOCK)
|
||||||
|
#define ACCESS_WRITE (O_WRONLY | O_CREAT | O_TRUNC | O_NONBLOCK)
|
||||||
|
|
||||||
|
/* The opaque type used for bitwise streams */
|
||||||
|
typedef struct __bit_file bit_file;
|
||||||
|
|
||||||
|
/* Creates a new bit_file opening a file f with the specificated mode and size */
|
||||||
|
bit_file* bit_open(int fd, int mode, UINTMAX_T bufsize);
|
||||||
|
|
||||||
|
/* Does a memory read (occasionally an i/o read) */
|
||||||
|
int bit_read(bit_file* bf, char* base, UINTMAX_T n_bits, uint8_t ofs);
|
||||||
|
|
||||||
|
/* Does a memory write (occasionally an i/o flush) */
|
||||||
|
int bit_write(bit_file* bf, const char* base, UINTMAX_T n_bits, uint8_t ofs);
|
||||||
|
|
||||||
|
/* Effectively swap out the buffer into memory */
|
||||||
|
int bit_flush(bit_file* bf);
|
||||||
|
|
||||||
|
/* Relases the resources allocated by the bit_file */
|
||||||
|
int bit_close(bit_file* bf);
|
||||||
|
|
||||||
|
#endif /* __BITIO_H */
|
638
src/lz78/lz78.c
Normal file
638
src/lz78/lz78.c
Normal file
@ -0,0 +1,638 @@
|
|||||||
|
/*
|
||||||
|
* Basic implementation of LZ78 compression algorithm
|
||||||
|
*
|
||||||
|
* Copyright (C) 2010 evilaliv3 <giovanni.pellerano@evilaliv3.org>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "lz78.h"
|
||||||
|
|
||||||
|
/* Code used to represent an EOF */
|
||||||
|
#define DICT_CODE_EOF 256
|
||||||
|
/* Code used before to send the size of the dictionary */
|
||||||
|
#define DICT_CODE_SIZE 257
|
||||||
|
/* Code used by the compressor to start the operations */
|
||||||
|
#define DICT_CODE_START 258
|
||||||
|
/* Code used by the compressor to stop the operations */
|
||||||
|
#define DICT_CODE_STOP 259
|
||||||
|
|
||||||
|
/* Limits dict_size inside [DICT_SIZE_MIN, DICT_SIZE_MAX] */
|
||||||
|
#ifndef DICT_LIMIT
|
||||||
|
#define DICT_LIMIT(x) (((x) < (DICT_SIZE_MIN + 1)) ? (DICT_SIZE_MIN + 1) : (((x) > (DICT_SIZE_MAX)) ? (DICT_SIZE_MAX) : (x)))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Compute the threshold for the start of secondary dictionary */
|
||||||
|
#define DICT_SIZE_THRESHOLD(x) ((x) * 8 / 10)
|
||||||
|
|
||||||
|
/* Entry of the hash table used by the compressor to encode data */
|
||||||
|
struct __ht_entry {
|
||||||
|
uint8_t used; /* Flag indicating if the node is used or not */
|
||||||
|
uint32_t parent; /* Parent node */
|
||||||
|
uint16_t label; /* Node's label */
|
||||||
|
uint32_t child; /* Child node */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* The opaque type of hash table entry used by the compressor */
|
||||||
|
typedef struct __ht_entry ht_entry;
|
||||||
|
|
||||||
|
/* Dictionary of the compressor implemented as an hash table */
|
||||||
|
struct __ht_dictionary {
|
||||||
|
ht_entry* root; /* Root node of the dictionary */
|
||||||
|
uint32_t cur_node; /* Current position inside the dictionary */
|
||||||
|
uint32_t prev_node; /* Pointer to the father of cur_node */
|
||||||
|
uint32_t d_size; /* Size of the dictionary */
|
||||||
|
uint32_t d_thr; /* Threshold for activation of secondary dictionary */
|
||||||
|
uint32_t d_next; /* Next code to put in the dictionary */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* The opaque type representing the dictionary used by the compressor */
|
||||||
|
typedef struct __ht_dictionary ht_dictionary;
|
||||||
|
|
||||||
|
/* State of a compressor */
|
||||||
|
struct __lz78_c {
|
||||||
|
uint8_t completed; /* Termination flag */
|
||||||
|
uint32_t d_size; /* Size of the dictionaries */
|
||||||
|
ht_dictionary* main; /* Main dictionary */
|
||||||
|
ht_dictionary* secondary; /* Secondary dictionary */
|
||||||
|
uint32_t bitbuf; /* Buffer containing bits not yet written */
|
||||||
|
uint32_t n_bits; /* Number of valid bits in the buffer */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* The opaque type representing the state of the compressor */
|
||||||
|
typedef struct __lz78_c lz78_c;
|
||||||
|
|
||||||
|
/* Entry of the dictionary used by the decompressor */
|
||||||
|
struct __entry {
|
||||||
|
uint32_t parent; /* Parent node */
|
||||||
|
uint16_t label; /* Node's label */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* The opaque type of a dictionary entry used by the decompressor */
|
||||||
|
typedef struct __entry entry;
|
||||||
|
|
||||||
|
/* Dictionary of the decompressor */
|
||||||
|
struct __dictionary {
|
||||||
|
entry* root; /* Root node of the dictionary */
|
||||||
|
uint32_t d_size; /* Size of the dictionray */
|
||||||
|
uint32_t d_thr; /* Threshold for activation of secondary dictionary */
|
||||||
|
uint32_t d_min; /* Minimum size of the dictionary */
|
||||||
|
uint32_t d_next; /* Next code to put in the dictionary */
|
||||||
|
uint32_t n_bytes; /* Number of bytes contained in bytebuf */
|
||||||
|
uint32_t offset; /* Offset of the first valid byte inside bytebuf */
|
||||||
|
char bytebuf[1]; /* Buffer used to output strings */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* The opaque type representing the dictionary used by the decompressor */
|
||||||
|
typedef struct __dictionary dictionary;
|
||||||
|
|
||||||
|
/* State of the decompressor */
|
||||||
|
struct __lz78_d {
|
||||||
|
uint8_t completed; /* Termination flag */
|
||||||
|
dictionary* main; /* Main dictionary */
|
||||||
|
ht_dictionary* secondary; /* Secondary dictionary */
|
||||||
|
uint32_t bitbuf; /* Buffer containing bits not yet written */
|
||||||
|
uint32_t n_bits; /* Number of valid bits contained in the buffer */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* The opaque type representing the status of the decompressor */
|
||||||
|
typedef struct __lz78_d lz78_d;
|
||||||
|
|
||||||
|
/* lz78 instance descriptor */
|
||||||
|
struct __lz78_instance {
|
||||||
|
uint8_t mode; /* Discriminate compression operations */
|
||||||
|
char state[1]; /* Compression/Decompression state struct */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Return the number of bits needed to represent the given number */
|
||||||
|
uint8_t bitlen(uint32_t i);
|
||||||
|
|
||||||
|
/* Create a new ht_dictionary to be used for the compression */
|
||||||
|
ht_dictionary* ht_dictionary_new(uint32_t d_size);
|
||||||
|
|
||||||
|
/* Update the dictionary depending with input byte
|
||||||
|
Return:
|
||||||
|
0 a new entry have been put in the dictionary
|
||||||
|
-1 switch the current node
|
||||||
|
*/
|
||||||
|
int ht_dictionary_update(ht_dictionary* d, uint16_t label);
|
||||||
|
|
||||||
|
/* Reset the dictionary associated to the given compressor */
|
||||||
|
void ht_dictionary_reset(ht_dictionary* d);
|
||||||
|
|
||||||
|
/* Destroy the given ht_dictionary object */
|
||||||
|
void ht_dictionary_destroy(ht_dictionary* d);
|
||||||
|
|
||||||
|
/* Create a new dictionary to be used for the decompression */
|
||||||
|
dictionary* dictionary_new(uint32_t d_size);
|
||||||
|
|
||||||
|
/* Update the internal state of the dictionary */
|
||||||
|
void dictionary_update(dictionary* d, uint32_t code);
|
||||||
|
|
||||||
|
/* Reset the dictionary associated to the given decompressor */
|
||||||
|
void dictionary_reset(dictionary* d);
|
||||||
|
|
||||||
|
/* Destroy the given dictionary object */
|
||||||
|
void dictionary_destroy(dictionary* d);
|
||||||
|
|
||||||
|
/* Compress the input byte and modifiy the state of the dictionary */
|
||||||
|
void compress_byte(lz78_c* o, int c_in);
|
||||||
|
|
||||||
|
/* Decompress the input code and modify the state of the dictionary */
|
||||||
|
int decompress_code(lz78_d* o, uint32_t code);
|
||||||
|
|
||||||
|
uint8_t bitlen(uint32_t i) {
|
||||||
|
uint8_t n = 0;
|
||||||
|
while (i) {
|
||||||
|
++n;
|
||||||
|
i >>= 1;
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
ht_dictionary* ht_dictionary_new(uint32_t d_size) {
|
||||||
|
ht_dictionary* dict = malloc(sizeof(ht_dictionary));
|
||||||
|
if (dict == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
d_size = DICT_LIMIT(d_size);
|
||||||
|
dict->root = calloc(1, sizeof(ht_entry) * d_size);
|
||||||
|
if (dict->root == NULL) {
|
||||||
|
free(dict);
|
||||||
|
return NULL;
|
||||||
|
} else {
|
||||||
|
dict->d_size = d_size;
|
||||||
|
dict->d_thr = DICT_SIZE_THRESHOLD(d_size);
|
||||||
|
dict->d_next = DICT_SIZE_MIN;
|
||||||
|
dict->cur_node = -1;
|
||||||
|
return dict;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int ht_dictionary_update(ht_dictionary* d, uint16_t label) {
|
||||||
|
uint8_t i;
|
||||||
|
uint32_t key;
|
||||||
|
uint32_t hash;
|
||||||
|
d->prev_node = d->cur_node;
|
||||||
|
|
||||||
|
if (d->cur_node == (uint32_t)-1) {
|
||||||
|
d->cur_node = label;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Bernstein hash function */
|
||||||
|
key = (label << bitlen(d->d_size)) + d->cur_node;
|
||||||
|
hash = 0;
|
||||||
|
for (i = 0; i < 4; ++i) {
|
||||||
|
hash = ((hash << 5) + hash) + (key & 0xFF);
|
||||||
|
key >>= 8;
|
||||||
|
}
|
||||||
|
hash %= d->d_size;
|
||||||
|
|
||||||
|
/* Search if current sequence is present, else return an empty hash entry
|
||||||
|
where insert it */
|
||||||
|
while (d->root[hash].used) {
|
||||||
|
if (d->root[hash].parent == d->cur_node &&
|
||||||
|
d->root[hash].label == label) {
|
||||||
|
d->cur_node = d->root[hash].child;
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
/* Collision (linear search) */
|
||||||
|
hash = (hash + 1) % d->d_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* At this point, in d->prev_node there is the symbol we will send */
|
||||||
|
|
||||||
|
/* Fill out hash entry */
|
||||||
|
d->root[hash].used = 1;
|
||||||
|
d->root[hash].parent = d->prev_node;
|
||||||
|
d->root[hash].label = label;
|
||||||
|
d->root[hash].child = d->d_next;
|
||||||
|
/* Update current node */
|
||||||
|
d->cur_node = label;
|
||||||
|
/* Update next symbol */
|
||||||
|
++(d->d_next);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ht_dictionary_reset(ht_dictionary* d) {
|
||||||
|
memset(d->root, 0, sizeof(ht_entry) * d->d_size);
|
||||||
|
d->d_next = DICT_SIZE_MIN;
|
||||||
|
d->cur_node = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ht_dictionary_destroy(ht_dictionary* d) {
|
||||||
|
if (d != NULL)
|
||||||
|
free(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
dictionary* dictionary_new(uint32_t d_size) {
|
||||||
|
uint16_t i;
|
||||||
|
dictionary* dict = malloc(sizeof(dictionary) + d_size);
|
||||||
|
if (dict == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
d_size = DICT_LIMIT(d_size);
|
||||||
|
dict->root = malloc(sizeof(entry) * d_size);
|
||||||
|
if (dict->root == NULL) {
|
||||||
|
free(dict);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
dict->d_size = d_size;
|
||||||
|
dict->d_thr = DICT_SIZE_THRESHOLD(d_size);
|
||||||
|
dict->d_min = DICT_SIZE_MIN;
|
||||||
|
dict->d_next = DICT_SIZE_MIN;
|
||||||
|
for (i = 0; i < DICT_SIZE_MIN; ++i) {
|
||||||
|
dict->root[i].parent = 0;
|
||||||
|
dict->root[i].label = i;
|
||||||
|
}
|
||||||
|
return dict;
|
||||||
|
}
|
||||||
|
|
||||||
|
void dictionary_update(dictionary* d, uint32_t code) {
|
||||||
|
uint32_t d_size = d->d_size - 1;
|
||||||
|
uint32_t d_next = d->d_next;
|
||||||
|
uint32_t d_min = d->d_min;
|
||||||
|
uint32_t i = d_size;
|
||||||
|
uint32_t p = code;
|
||||||
|
|
||||||
|
/* Recover original sequence */
|
||||||
|
while (1) {
|
||||||
|
d->bytebuf[i--] = d->root[p].label;
|
||||||
|
if (p < DICT_SIZE_MIN || i == 0)
|
||||||
|
break;
|
||||||
|
p = d->root[p].parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fill last char with the first char of the sequence */
|
||||||
|
if (code >= d_min && code == d_next - 1)
|
||||||
|
d->bytebuf[d_size] = d->bytebuf[i + 1];
|
||||||
|
|
||||||
|
/* Update last incomplete entry of the dictionary */
|
||||||
|
if (d_next > d_min)
|
||||||
|
d->root[d_next - 1].label = d->bytebuf[i + 1];
|
||||||
|
|
||||||
|
/* Update */
|
||||||
|
d->n_bytes = d_size - i;
|
||||||
|
d->offset = d_size + 1 - d->n_bytes;
|
||||||
|
d->root[d_next].parent = code;
|
||||||
|
++(d->d_next);
|
||||||
|
}
|
||||||
|
|
||||||
|
void dictionary_reset(dictionary* d) {
|
||||||
|
d->d_min = DICT_SIZE_MIN;
|
||||||
|
d->d_next = DICT_SIZE_MIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
void dictionary_destroy(dictionary* d) {
|
||||||
|
if (d != NULL) {
|
||||||
|
free(d->root);
|
||||||
|
free(d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void compress_byte(lz78_c* o, int c_in) {
|
||||||
|
/* Optimization pointers */
|
||||||
|
ht_dictionary* d_main = o->main;
|
||||||
|
ht_dictionary* d_sec = o->secondary;
|
||||||
|
|
||||||
|
switch(d_main->cur_node) {
|
||||||
|
case DICT_CODE_START:
|
||||||
|
o->bitbuf = d_main->d_size;
|
||||||
|
o->n_bits = bitlen(DICT_SIZE_MAX);
|
||||||
|
d_main->cur_node = -1;
|
||||||
|
break;
|
||||||
|
case DICT_CODE_EOF:
|
||||||
|
o->bitbuf = d_main->cur_node;
|
||||||
|
o->n_bits = bitlen(d_main->d_next);
|
||||||
|
d_main->cur_node = DICT_CODE_STOP;
|
||||||
|
return;
|
||||||
|
case DICT_CODE_STOP:
|
||||||
|
o->completed = 1;
|
||||||
|
return;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
c_in = c_in == EOF ? DICT_CODE_EOF : c_in;
|
||||||
|
/* Dictonaries update */
|
||||||
|
if (ht_dictionary_update(d_main, c_in) != 0) {
|
||||||
|
if (d_main->d_next >= d_main->d_thr)
|
||||||
|
ht_dictionary_update(d_sec, c_in);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
o->bitbuf = d_main->prev_node;
|
||||||
|
o->n_bits = bitlen(d_main->d_next - 1);
|
||||||
|
|
||||||
|
/* Dictonaries swap */
|
||||||
|
if (d_main->d_next == d_main->d_size) {
|
||||||
|
o->main = o->secondary;
|
||||||
|
o->secondary = d_main;
|
||||||
|
d_main = d_sec;
|
||||||
|
d_sec = o->secondary;
|
||||||
|
d_main->cur_node = c_in;
|
||||||
|
ht_dictionary_reset(d_sec);
|
||||||
|
}
|
||||||
|
/* Update of secondary if threshold is reached */
|
||||||
|
if (d_main->d_next >= d_main->d_thr)
|
||||||
|
ht_dictionary_update(d_sec, c_in);
|
||||||
|
}
|
||||||
|
|
||||||
|
int decompress_code(lz78_d* o, uint32_t code) {
|
||||||
|
uint32_t i;
|
||||||
|
int c_in;
|
||||||
|
/* Optimization pointers */
|
||||||
|
dictionary* d_main = o->main;
|
||||||
|
ht_dictionary* d_sec = o->secondary;
|
||||||
|
|
||||||
|
switch(code) {
|
||||||
|
case DICT_CODE_EOF:
|
||||||
|
o->completed = 1;
|
||||||
|
return 0;
|
||||||
|
case DICT_CODE_START:
|
||||||
|
case DICT_CODE_SIZE:
|
||||||
|
d_main->d_next = DICT_SIZE_MAX;
|
||||||
|
o->n_bits = 0;
|
||||||
|
return 0;
|
||||||
|
default:
|
||||||
|
/* Initial operations */
|
||||||
|
if (d_main->d_next == DICT_SIZE_MAX) {
|
||||||
|
dictionary_destroy(d_main);
|
||||||
|
d_main = dictionary_new(code);
|
||||||
|
o->main = d_main;
|
||||||
|
if (d_main == NULL)
|
||||||
|
return -1;
|
||||||
|
ht_dictionary_destroy(d_sec);
|
||||||
|
d_sec = ht_dictionary_new(code);
|
||||||
|
o->secondary = d_sec;
|
||||||
|
if (d_sec == NULL) {
|
||||||
|
dictionary_destroy(d_main);
|
||||||
|
o->main = NULL;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
o->bitbuf = 0;
|
||||||
|
o->n_bits = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Bad compressed file */
|
||||||
|
if (d_sec == NULL || d_main == NULL)
|
||||||
|
return -2;
|
||||||
|
|
||||||
|
dictionary_update(d_main, code);
|
||||||
|
|
||||||
|
/* Update of secondary if threshold is reached */
|
||||||
|
if (d_main->d_next > d_main->d_thr) {
|
||||||
|
for (i = 0; i < d_main->n_bytes; ++i) {
|
||||||
|
c_in = (uint8_t) d_main->bytebuf[d_main->offset + i];
|
||||||
|
ht_dictionary_update(d_sec, c_in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Dictonaries swap */
|
||||||
|
if (d_main->d_next == d_main->d_size) {
|
||||||
|
dictionary_reset(d_main);
|
||||||
|
d_main->d_min = d_sec->d_next;
|
||||||
|
d_main->d_next = d_sec->d_next;
|
||||||
|
for (i = 0; i < d_sec->d_size && d_sec->d_next; ++i) {
|
||||||
|
if (d_sec->root[i].used) {
|
||||||
|
d_main->root[d_sec->root[i].child].parent =
|
||||||
|
d_sec->root[i].parent;
|
||||||
|
d_main->root[d_sec->root[i].child].label =
|
||||||
|
d_sec->root[i].label;
|
||||||
|
--(d_sec->d_next);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ht_dictionary_reset(d_sec);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
lz78_instance* lz78_new(uint8_t cmode, uint32_t dsize) {
|
||||||
|
lz78_instance* i;
|
||||||
|
lz78_c* c;
|
||||||
|
lz78_d* d;
|
||||||
|
|
||||||
|
int max_dim = (sizeof(lz78_c) > sizeof(lz78_d)) ? sizeof(lz78_c) : sizeof(lz78_d);
|
||||||
|
i = malloc(sizeof(lz78_instance) + max_dim);
|
||||||
|
if (i == NULL)
|
||||||
|
return NULL;
|
||||||
|
i->mode = cmode;
|
||||||
|
|
||||||
|
switch (cmode) {
|
||||||
|
case LZ78_MODE_COMPRESS:
|
||||||
|
c = (lz78_c*)&i->state;
|
||||||
|
dsize = (dsize == 0) ? DICT_SIZE_DEFAULT : dsize;
|
||||||
|
c->d_size = DICT_LIMIT(dsize);
|
||||||
|
c->completed = 0;
|
||||||
|
c->main = ht_dictionary_new(c->d_size);
|
||||||
|
if (c->main == NULL) {
|
||||||
|
free(i);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
c->secondary = ht_dictionary_new(c->d_size);
|
||||||
|
if (c->secondary == NULL) {
|
||||||
|
ht_dictionary_destroy(c->main);
|
||||||
|
free(i);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
c->bitbuf = DICT_CODE_START;
|
||||||
|
c->n_bits = bitlen(DICT_SIZE_MIN);
|
||||||
|
c->main->cur_node = DICT_CODE_START;
|
||||||
|
return i;
|
||||||
|
|
||||||
|
case LZ78_MODE_DECOMPRESS:
|
||||||
|
d = (lz78_d*)&i->state;
|
||||||
|
d->completed = 0;
|
||||||
|
d->main = dictionary_new(DICT_SIZE_MIN);
|
||||||
|
if (d->main == NULL) {
|
||||||
|
free(i);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t lz78_compress(lz78_instance* lz78, int fd_in, int fd_out) {
|
||||||
|
FILE* in;
|
||||||
|
bit_file* out;
|
||||||
|
lz78_c* o;
|
||||||
|
int bits;
|
||||||
|
int c_in;
|
||||||
|
|
||||||
|
if (lz78 == NULL)
|
||||||
|
return LZ78_ERROR_INITIALIZATION;
|
||||||
|
|
||||||
|
if (lz78->mode != LZ78_MODE_COMPRESS)
|
||||||
|
return LZ78_ERROR_MODE;
|
||||||
|
|
||||||
|
in = fdopen(fd_in, "r");
|
||||||
|
if (in == NULL)
|
||||||
|
return LZ78_ERROR_READ;
|
||||||
|
|
||||||
|
out = bit_open(fd_out, ACCESS_WRITE, B_SIZE_DEFAULT);
|
||||||
|
if (out == NULL)
|
||||||
|
return LZ78_ERROR_WRITE;
|
||||||
|
|
||||||
|
o = (lz78_c*)&lz78->state;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
|
||||||
|
if (o->n_bits > 0) {
|
||||||
|
bits = bit_write(out, (char*) &o->bitbuf, o->n_bits, 0);
|
||||||
|
if (bits == -1)
|
||||||
|
return LZ78_ERROR_WRITE;
|
||||||
|
|
||||||
|
o->bitbuf >>= bits;
|
||||||
|
o->n_bits -= bits;
|
||||||
|
|
||||||
|
if (o->n_bits > 0)
|
||||||
|
return LZ78_ERROR_EAGAIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
c_in = fgetc(in);
|
||||||
|
if (c_in == EOF) {
|
||||||
|
if (errno == EAGAIN) {
|
||||||
|
errno = 0;
|
||||||
|
return LZ78_ERROR_EAGAIN;
|
||||||
|
} else if (errno != 0) {
|
||||||
|
return LZ78_ERROR_READ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
compress_byte(o, c_in);
|
||||||
|
if (o->completed == 1) {
|
||||||
|
bit_close(out);
|
||||||
|
return LZ78_SUCCESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t lz78_decompress(lz78_instance* lz78, int fd_in, int fd_out) {
|
||||||
|
bit_file* in;
|
||||||
|
FILE* out;
|
||||||
|
lz78_d* o;
|
||||||
|
dictionary* d_main;
|
||||||
|
uint32_t bits, written;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (lz78 == NULL)
|
||||||
|
return LZ78_ERROR_INITIALIZATION;
|
||||||
|
|
||||||
|
if (lz78->mode != LZ78_MODE_DECOMPRESS)
|
||||||
|
return LZ78_ERROR_MODE;
|
||||||
|
|
||||||
|
in = bit_open(fd_in, ACCESS_READ, B_SIZE_DEFAULT);
|
||||||
|
if (in == NULL)
|
||||||
|
return LZ78_ERROR_READ;
|
||||||
|
|
||||||
|
out = fdopen(fd_out, "w");
|
||||||
|
if (out == NULL)
|
||||||
|
return LZ78_ERROR_WRITE;
|
||||||
|
|
||||||
|
o = (lz78_d*) &lz78->state;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
/* Optimization pointer (MUST be init every cycle) */
|
||||||
|
d_main = o->main;
|
||||||
|
if (d_main->n_bytes) {
|
||||||
|
written = 0;
|
||||||
|
while (written != d_main->n_bytes) {
|
||||||
|
ret = fwrite(d_main->bytebuf + d_main->offset + written, 1,
|
||||||
|
d_main->n_bytes - written, out);
|
||||||
|
if (ret == -1) {
|
||||||
|
d_main->offset += written;
|
||||||
|
d_main->n_bytes -= written;
|
||||||
|
if (errno == EAGAIN) {
|
||||||
|
errno = 0;
|
||||||
|
return LZ78_ERROR_EAGAIN;
|
||||||
|
} else {
|
||||||
|
return LZ78_ERROR_WRITE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
written += ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
o->bitbuf = 0;
|
||||||
|
o->n_bits = 0;
|
||||||
|
bits = bitlen(d_main->d_next);
|
||||||
|
|
||||||
|
if (bits > 0) {
|
||||||
|
ret = bit_read(in, (char*) &o->bitbuf, bits, 0);
|
||||||
|
if (ret == -1)
|
||||||
|
return LZ78_ERROR_READ;
|
||||||
|
|
||||||
|
o->n_bits = ret;
|
||||||
|
if (bits != o->n_bits)
|
||||||
|
return LZ78_ERROR_EAGAIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = decompress_code(o, o->bitbuf);
|
||||||
|
if (ret < 0) {
|
||||||
|
switch(ret) {
|
||||||
|
case -1:
|
||||||
|
return LZ78_ERROR_DICTIONARY;
|
||||||
|
case -2:
|
||||||
|
return LZ78_ERROR_DECOMPRESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (o->completed == 1) {
|
||||||
|
fflush(out);
|
||||||
|
return LZ78_SUCCESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void lz78_destroy(lz78_instance *lz78) {
|
||||||
|
lz78_c *c;
|
||||||
|
lz78_d *d;
|
||||||
|
if (lz78 != NULL) {
|
||||||
|
switch (lz78->mode) {
|
||||||
|
case LZ78_MODE_COMPRESS:
|
||||||
|
c = (lz78_c*)&lz78->state;
|
||||||
|
if (c != NULL) {
|
||||||
|
ht_dictionary_destroy(c->main);
|
||||||
|
ht_dictionary_destroy(c->secondary);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case LZ78_MODE_DECOMPRESS:
|
||||||
|
d = (lz78_d*)&lz78->state;
|
||||||
|
if (d != NULL) {
|
||||||
|
dictionary_destroy(d->main);
|
||||||
|
ht_dictionary_destroy(d->secondary);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(lz78);
|
||||||
|
}
|
||||||
|
}
|
70
src/lz78/lz78.h
Normal file
70
src/lz78/lz78.h
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
/*
|
||||||
|
* Basic implementation of LZ78 compression algorithm
|
||||||
|
*
|
||||||
|
* Copyright (C) 2010 evilaliv3 <giovanni.pellerano@evilaliv3.org>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __LZ78_H
|
||||||
|
#define __LZ78_H
|
||||||
|
|
||||||
|
#include "bitio.h"
|
||||||
|
|
||||||
|
/* Modes of compression */
|
||||||
|
#define LZ78_MODE_COMPRESS 0
|
||||||
|
#define LZ78_MODE_DECOMPRESS 1
|
||||||
|
|
||||||
|
/* List of lz78-level return codes */
|
||||||
|
#define LZ78_SUCCESS 0
|
||||||
|
#define LZ78_ERROR_DICTIONARY 1
|
||||||
|
#define LZ78_ERROR_READ 2
|
||||||
|
#define LZ78_ERROR_WRITE 3
|
||||||
|
#define LZ78_ERROR_EAGAIN 4
|
||||||
|
#define LZ78_ERROR_COMPRESS 5
|
||||||
|
#define LZ78_ERROR_DECOMPRESS 6
|
||||||
|
#define LZ78_ERROR_INITIALIZATION 7
|
||||||
|
#define LZ78_ERROR_MODE 8
|
||||||
|
|
||||||
|
/* Size of the dictionary */
|
||||||
|
#define DICT_SIZE_MIN 260
|
||||||
|
#define DICT_SIZE_DEFAULT 4096
|
||||||
|
#define DICT_SIZE_MAX 1048576
|
||||||
|
|
||||||
|
/* Opaque type representing the compression instance */
|
||||||
|
typedef struct __lz78_instance lz78_instance;
|
||||||
|
|
||||||
|
/* Allocate and return an instance of lz78 compressor
|
||||||
|
cmode: specify compress/decompress mode
|
||||||
|
dsize: specify the size of the dictionary (byte)
|
||||||
|
*/
|
||||||
|
lz78_instance* lz78_new(uint8_t cmode, uint32_t dsize);
|
||||||
|
|
||||||
|
/* Compress the input stream by sending the result to the output stream
|
||||||
|
arg: current instance of compressor obtained by invoking lz78_init()
|
||||||
|
Return: one of defined lz78-level return codes
|
||||||
|
*/
|
||||||
|
uint8_t lz78_compress(lz78_instance* lz78, int fd_in, int fd_out);
|
||||||
|
|
||||||
|
/* Decompress the input stream by sending the result to the output stream
|
||||||
|
arg: current instance of compressor obtained by invoking lz78_init()
|
||||||
|
Return: one of defined lz78-level return codes
|
||||||
|
*/
|
||||||
|
uint8_t lz78_decompress(lz78_instance* lz78, int fd_in, int fd_out);
|
||||||
|
|
||||||
|
|
||||||
|
/* Deallocate current instance */
|
||||||
|
void lz78_destroy(lz78_instance* lz78);
|
||||||
|
|
||||||
|
#endif /* __LZ78_H */
|
291
src/lz78/wrapper.c
Normal file
291
src/lz78/wrapper.c
Normal file
@ -0,0 +1,291 @@
|
|||||||
|
/*
|
||||||
|
* Basic implementation of LZ78 compression algorithm
|
||||||
|
*
|
||||||
|
* Copyright (C) 2010 evilaliv3 <giovanni.pellerano@evilaliv3.org>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "wrapper.h"
|
||||||
|
|
||||||
|
/* Structure representing the type of algorithm */
|
||||||
|
struct __algorithm {
|
||||||
|
char* name; /* String representing the name */
|
||||||
|
uint8_t type; /* Constant representing the type */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Opaque type representing the type of algorithm */
|
||||||
|
typedef struct __algorithm algorithm;
|
||||||
|
|
||||||
|
/* Struct of available algorithms */
|
||||||
|
const algorithm algo_list[] = {
|
||||||
|
{"lz78", LZ78_ALGORITHM},
|
||||||
|
{NULL, UNKNOWN_ALGORITHM}
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Struct representing the wrapper used for compression or decompression */
|
||||||
|
struct __wrapper {
|
||||||
|
uint8_t type; /* Algorithm used to compress or decompress data */
|
||||||
|
uint8_t mode; /* Flag indicating compress/decompress mode */
|
||||||
|
void* data; /* Opaque structure representing the algorithm */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Global variable representing the current error stored */
|
||||||
|
uint8_t wrapper_cur_err = WRAPPER_SUCCESS;
|
||||||
|
|
||||||
|
/* Associate an algorithm-dependent error to a wrapper-generic error */
|
||||||
|
uint8_t wrapper_return(uint8_t code) {
|
||||||
|
wrapper_cur_err = code;
|
||||||
|
switch (code) {
|
||||||
|
case LZ78_SUCCESS:
|
||||||
|
return WRAPPER_SUCCESS;
|
||||||
|
case LZ78_ERROR_READ:
|
||||||
|
return WRAPPER_ERROR_READ;
|
||||||
|
case LZ78_ERROR_WRITE:
|
||||||
|
return WRAPPER_ERROR_WRITE;
|
||||||
|
case LZ78_ERROR_EAGAIN:
|
||||||
|
return WRAPPER_ERROR_EAGAIN;
|
||||||
|
case LZ78_ERROR_COMPRESS:
|
||||||
|
return WRAPPER_ERROR_COMPRESS;
|
||||||
|
case LZ78_ERROR_DECOMPRESS:
|
||||||
|
return WRAPPER_ERROR_DECOMPRESS;
|
||||||
|
case LZ78_ERROR_DICTIONARY:
|
||||||
|
case LZ78_ERROR_INITIALIZATION:
|
||||||
|
case LZ78_ERROR_MODE:
|
||||||
|
return WRAPPER_ERROR_GENERIC;
|
||||||
|
}
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t get_algorithm(char* type) {
|
||||||
|
uint8_t i = 0;
|
||||||
|
while (algo_list[i].name != NULL) {
|
||||||
|
if (strcmp(type, algo_list[i].name) == 0)
|
||||||
|
return algo_list[i].type;
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
return UNKNOWN_ALGORITHM;
|
||||||
|
}
|
||||||
|
|
||||||
|
int byte_size(char* size) {
|
||||||
|
int n;
|
||||||
|
|
||||||
|
if (size == NULL)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
n = atoi(size);
|
||||||
|
|
||||||
|
switch (size[strlen(size) - 1]) {
|
||||||
|
case 'K':
|
||||||
|
n <<= 10;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'M':
|
||||||
|
n <<= 20;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (n < 0) ? 0 : n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void wrapper_perror() {
|
||||||
|
switch (wrapper_cur_err) {
|
||||||
|
case WRAPPER_SUCCESS:
|
||||||
|
break;
|
||||||
|
|
||||||
|
case WRAPPER_ERROR_ALGORITHM:
|
||||||
|
fprintf(stderr, "Unrecognized compression algorithm\n");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case WRAPPER_ERROR_FILE_IN:
|
||||||
|
fprintf(stderr, "Unable to read input file\n");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case WRAPPER_ERROR_FILE_OUT:
|
||||||
|
fprintf(stderr, "Unable to write output file\n");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case LZ78_SUCCESS:
|
||||||
|
break;
|
||||||
|
|
||||||
|
case LZ78_ERROR_DICTIONARY:
|
||||||
|
fprintf(stderr, "LZ78: unable to allocate dictionaries\n");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case LZ78_ERROR_INITIALIZATION:
|
||||||
|
fprintf(stderr, "LZ78: bad initialization\n");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case LZ78_ERROR_MODE:
|
||||||
|
fprintf(stderr, "LZ78: wrong compression/decompression mode\n");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case LZ78_ERROR_READ:
|
||||||
|
fprintf(stderr, "LZ78: unable to read input data\n");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case LZ78_ERROR_WRITE:
|
||||||
|
fprintf(stderr, "LZ78: unable to write output data\n");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case LZ78_ERROR_EAGAIN:
|
||||||
|
fprintf(stderr, "LZ78: I/O operation would block: retry...\n");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case LZ78_ERROR_COMPRESS:
|
||||||
|
fprintf(stderr, "LZ78: unable to compress input data\n");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case LZ78_ERROR_DECOMPRESS:
|
||||||
|
fprintf(stderr, "LZ78: unable to decompress input data\n");
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
fprintf(stderr, "Unhandled error code %d\n", wrapper_cur_err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
wrapper* wrapper_new(uint8_t w_mode, uint8_t w_type, char* argv) {
|
||||||
|
wrapper* w = malloc(sizeof(wrapper));
|
||||||
|
if (w == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
w->type = w_type;
|
||||||
|
w->mode = w_mode;
|
||||||
|
|
||||||
|
switch (w->type) {
|
||||||
|
case LZ78_ALGORITHM:
|
||||||
|
w->data = lz78_new(w_mode, byte_size(argv));
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
free(w);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (w->data)
|
||||||
|
return w;
|
||||||
|
else {
|
||||||
|
free(w);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void wrapper_destroy(wrapper* w) {
|
||||||
|
if (w == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
switch (w->type) {
|
||||||
|
case LZ78_ALGORITHM:
|
||||||
|
lz78_destroy(w->data);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
free(w);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t wrapper_compress(wrapper* w, char* input, char* output) {
|
||||||
|
uint8_t ret;
|
||||||
|
int fd_in;
|
||||||
|
int fd_out;
|
||||||
|
|
||||||
|
switch (w->type) {
|
||||||
|
case LZ78_ALGORITHM:
|
||||||
|
if (input == NULL) {
|
||||||
|
fd_in = STDIN_FILENO;
|
||||||
|
} else {
|
||||||
|
fd_in = open(input, ACCESS_READ);
|
||||||
|
if (fd_in == -1)
|
||||||
|
return wrapper_return(WRAPPER_ERROR_FILE_IN);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (output == NULL) {
|
||||||
|
fd_out = STDOUT_FILENO;
|
||||||
|
} else {
|
||||||
|
fd_out = open(output, ACCESS_WRITE, 0644);
|
||||||
|
if (fd_out == -1) {
|
||||||
|
close(fd_in);
|
||||||
|
return wrapper_return(WRAPPER_ERROR_FILE_OUT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = lz78_compress(w->data, fd_in, fd_out);
|
||||||
|
|
||||||
|
close(fd_in);
|
||||||
|
close(fd_out);
|
||||||
|
return wrapper_return(ret);
|
||||||
|
|
||||||
|
default:
|
||||||
|
return wrapper_return(WRAPPER_ERROR_ALGORITHM);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t wrapper_decompress(wrapper* w, char* input, char* output) {
|
||||||
|
uint8_t ret;
|
||||||
|
int fd_in;
|
||||||
|
int fd_out;
|
||||||
|
|
||||||
|
switch (w->type) {
|
||||||
|
case LZ78_ALGORITHM:
|
||||||
|
if (input == NULL) {
|
||||||
|
fd_in = STDIN_FILENO;
|
||||||
|
} else {
|
||||||
|
fd_in = open(input, ACCESS_READ);
|
||||||
|
if (fd_in == -1)
|
||||||
|
return wrapper_return(WRAPPER_ERROR_FILE_IN);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (output == NULL) {
|
||||||
|
fd_out = STDOUT_FILENO;
|
||||||
|
} else {
|
||||||
|
fd_out = open(output, ACCESS_WRITE, 0644);
|
||||||
|
if (fd_out == -1) {
|
||||||
|
close(fd_in);
|
||||||
|
return wrapper_return(WRAPPER_ERROR_FILE_OUT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = lz78_decompress(w->data, fd_in, fd_out);
|
||||||
|
|
||||||
|
close(fd_in);
|
||||||
|
close(fd_out);
|
||||||
|
return wrapper_return(ret);
|
||||||
|
|
||||||
|
default:
|
||||||
|
return wrapper_return(WRAPPER_ERROR_ALGORITHM);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t wrapper_exec(wrapper* w, char* input, char* output) {
|
||||||
|
uint8_t ret;
|
||||||
|
if (w->mode == WRAPPER_MODE_COMPRESS) {
|
||||||
|
for (;;) {
|
||||||
|
ret = wrapper_compress(w, input, output);
|
||||||
|
if (ret != WRAPPER_ERROR_EAGAIN)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (;;) {
|
||||||
|
ret = wrapper_decompress(w, input, output);
|
||||||
|
if (ret != WRAPPER_ERROR_EAGAIN)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
86
src/lz78/wrapper.h
Normal file
86
src/lz78/wrapper.h
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
/*
|
||||||
|
* Basic implementation of LZ78 compression algorithm
|
||||||
|
*
|
||||||
|
* Copyright (C) 2010 evilaliv3 <giovanni.pellerano@evilaliv3.org>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __WRAPPER_H
|
||||||
|
#define __WRAPPER_H
|
||||||
|
|
||||||
|
#include "lz78.h"
|
||||||
|
|
||||||
|
/* List of included compression algorithms */
|
||||||
|
#define UNKNOWN_ALGORITHM 0
|
||||||
|
#define LZ78_ALGORITHM 1
|
||||||
|
|
||||||
|
/* Modes of compression */
|
||||||
|
#define WRAPPER_MODE_COMPRESS 0
|
||||||
|
#define WRAPPER_MODE_DECOMPRESS 1
|
||||||
|
|
||||||
|
/* List of managed wrapper-level errors */
|
||||||
|
#define WRAPPER_SUCCESS 20
|
||||||
|
#define WRAPPER_ERROR_ALGORITHM 21
|
||||||
|
#define WRAPPER_ERROR_FILE_IN 22
|
||||||
|
#define WRAPPER_ERROR_FILE_OUT 23
|
||||||
|
#define WRAPPER_ERROR_READ 24
|
||||||
|
#define WRAPPER_ERROR_WRITE 25
|
||||||
|
#define WRAPPER_ERROR_EAGAIN 26
|
||||||
|
#define WRAPPER_ERROR_COMPRESS 27
|
||||||
|
#define WRAPPER_ERROR_DECOMPRESS 28
|
||||||
|
#define WRAPPER_ERROR_GENERIC 29
|
||||||
|
|
||||||
|
/* Opaque type representing the wrapper */
|
||||||
|
typedef struct __wrapper wrapper;
|
||||||
|
|
||||||
|
/* Creates a new wrapper:
|
||||||
|
w_mode mode of compression
|
||||||
|
w_type type of algorithm
|
||||||
|
w_argv additional parameter
|
||||||
|
*/
|
||||||
|
wrapper* wrapper_new(uint8_t w_mode, uint8_t w_type, char* w_argv);
|
||||||
|
|
||||||
|
/* Deallocates a wrapper */
|
||||||
|
void wrapper_destroy(wrapper* w);
|
||||||
|
|
||||||
|
/* Execute the function associated with the wrapper (compress/decompress)
|
||||||
|
Return:
|
||||||
|
WRAPPER_SUCCESS on success
|
||||||
|
WRAPPER_ERROR_FILE_IN unable to open input file
|
||||||
|
WRAPPER_ERROR_FILE_OUT unable to open output file
|
||||||
|
WRAPPER_ERROR_READ unable to read input data
|
||||||
|
WRAPPER_ERROR_WRITE unable to write output data
|
||||||
|
WRAPPER_ERROR_EAGAIN unable to accomplish current operation
|
||||||
|
WRAPPER_ERROR_ALGORITHM type of wrapper unknown
|
||||||
|
WRAPPER_ERROR_COMPRESS unable to compress input data
|
||||||
|
WRAPPER_ERROR_DECOMPRESS unable to decompress input data
|
||||||
|
WRAPPER_ERROR_GENERIC algorithm-dependent error
|
||||||
|
*/
|
||||||
|
uint8_t wrapper_exec(wrapper* w, char* in, char* out);
|
||||||
|
|
||||||
|
/* Return a positive constant associated to a particular algorithm
|
||||||
|
(UNKNOWN_ALGORITHM if doesn't exist)
|
||||||
|
*/
|
||||||
|
uint8_t get_algorithm(char* type);
|
||||||
|
|
||||||
|
/* Return an integer representing the given size
|
||||||
|
(K = KBytes, M = MBytes)
|
||||||
|
*/
|
||||||
|
int byte_size(char* size);
|
||||||
|
|
||||||
|
/* Print last wrapper error occurred into standard error stream */
|
||||||
|
void wrapper_perror();
|
||||||
|
|
||||||
|
#endif /* __WRAPPER_H */
|
10
src/util.c
10
src/util.c
@ -17,6 +17,16 @@ void syn_error(char *message)
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long fsize (FILE *in)
|
||||||
|
{
|
||||||
|
long pos, length;
|
||||||
|
pos = ftell(in);
|
||||||
|
fseek(in, 0L, SEEK_END);
|
||||||
|
length = ftell(in);
|
||||||
|
fseek(in, pos, SEEK_SET);
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
void syn_warn(char *message)
|
void syn_warn(char *message)
|
||||||
{
|
{
|
||||||
printf("%s\n", message);
|
printf("%s\n", message);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user