Almost there, LZ77 file compression algorithm with decompress and compress
This commit is contained in:
parent
19012d6e66
commit
847fccfc66
4
Makefile
4
Makefile
@ -8,9 +8,9 @@ VERSION_EXTRA = \"$(EXTRA)\"
|
||||
PREFIX ?= /usr
|
||||
|
||||
CC ?= gcc
|
||||
CFLAGS += -O2 -pedantic -g -Wall -Wextra --param=ssp-buffer-size=2 -fstack-protector-all
|
||||
CFLAGS += -pedantic -g -Wall -Wextra
|
||||
CPPFLAGS += -DVERSION=$(VERSION) -D_FORTIFY_SOURCE=2
|
||||
LDFLAGS += -Wl,-O1 -Lsrc/libutil
|
||||
LDFLAGS += -Wl,-O1 -lm
|
||||
BIN ?= slidescript
|
||||
|
||||
SRCS=$(wildcard src/*.c)
|
||||
|
BIN
src/comp.1
Normal file
BIN
src/comp.1
Normal file
Binary file not shown.
BIN
src/comp.ss
Normal file
BIN
src/comp.ss
Normal file
Binary file not shown.
230
src/compression.c
Normal file
230
src/compression.c
Normal file
@ -0,0 +1,230 @@
|
||||
// LZ77 compression examples, simple and lightweight
|
||||
// Being quick to process and execute, this will be great
|
||||
// For internal compression on modern machines
|
||||
//
|
||||
// Andy Herbert
|
||||
// lz1 https://github.com/andyherbert/lz1
|
||||
//
|
||||
|
||||
#include "deps.h"
|
||||
#include "compression.h"
|
||||
|
||||
uint32_t lz77_compress (uint8_t *uncompressed_text, uint32_t uncompressed_size, uint8_t *compressed_text, uint8_t pointer_length_width)
|
||||
{
|
||||
uint16_t pointer_pos, temp_pointer_pos, output_pointer, pointer_length, temp_pointer_length;
|
||||
uint32_t compressed_pointer, output_size, coding_pos, output_lookahead_ref, look_behind, look_ahead;
|
||||
uint16_t pointer_pos_max, pointer_length_max;
|
||||
pointer_pos_max = pow(2, 16 - pointer_length_width);
|
||||
pointer_length_max = pow(2, pointer_length_width);
|
||||
|
||||
*((uint32_t *) compressed_text) = uncompressed_size;
|
||||
*(compressed_text + 4) = pointer_length_width;
|
||||
compressed_pointer = output_size = 5;
|
||||
|
||||
for(coding_pos = 0; coding_pos < uncompressed_size; ++coding_pos)
|
||||
{
|
||||
pointer_pos = 0;
|
||||
pointer_length = 0;
|
||||
for(temp_pointer_pos = 1; (temp_pointer_pos < pointer_pos_max) && (temp_pointer_pos <= coding_pos); ++temp_pointer_pos)
|
||||
{
|
||||
look_behind = coding_pos - temp_pointer_pos;
|
||||
look_ahead = coding_pos;
|
||||
for(temp_pointer_length = 0; uncompressed_text[look_ahead++] == uncompressed_text[look_behind++]; ++temp_pointer_length)
|
||||
if(temp_pointer_length == pointer_length_max)
|
||||
break;
|
||||
if(temp_pointer_length > pointer_length)
|
||||
{
|
||||
pointer_pos = temp_pointer_pos;
|
||||
pointer_length = temp_pointer_length;
|
||||
if(pointer_length == pointer_length_max)
|
||||
break;
|
||||
}
|
||||
}
|
||||
coding_pos += pointer_length;
|
||||
if((coding_pos == uncompressed_size) && pointer_length)
|
||||
{
|
||||
output_pointer = (pointer_length == 1) ? 0 : ((pointer_pos << pointer_length_width) | (pointer_length - 2));
|
||||
output_lookahead_ref = coding_pos - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
output_pointer = (pointer_pos << pointer_length_width) | (pointer_length ? (pointer_length - 1) : 0);
|
||||
output_lookahead_ref = coding_pos;
|
||||
}
|
||||
*((uint16_t *) (compressed_text + compressed_pointer)) = output_pointer;
|
||||
compressed_pointer += 2;
|
||||
*(compressed_text + compressed_pointer++) = *(uncompressed_text + output_lookahead_ref);
|
||||
output_size += 3;
|
||||
}
|
||||
|
||||
return output_size;
|
||||
}
|
||||
|
||||
uint32_t lz77_decompress (uint8_t *compressed_text, uint8_t *uncompressed_text)
|
||||
{
|
||||
uint8_t pointer_length_width;
|
||||
uint16_t input_pointer, pointer_length, pointer_pos, pointer_length_mask;
|
||||
uint32_t compressed_pointer, coding_pos, pointer_offset, uncompressed_size;
|
||||
|
||||
uncompressed_size = *((uint32_t *) compressed_text);
|
||||
pointer_length_width = *(compressed_text + 4);
|
||||
compressed_pointer = 5;
|
||||
|
||||
pointer_length_mask = pow(2, pointer_length_width) - 1;
|
||||
|
||||
for(coding_pos = 0; coding_pos < uncompressed_size; ++coding_pos)
|
||||
{
|
||||
input_pointer = *((uint16_t *) (compressed_text + compressed_pointer));
|
||||
compressed_pointer += 2;
|
||||
pointer_pos = input_pointer >> pointer_length_width;
|
||||
pointer_length = pointer_pos ? ((input_pointer & pointer_length_mask) + 1) : 0;
|
||||
if(pointer_pos)
|
||||
for(pointer_offset = coding_pos - pointer_pos; pointer_length > 0; --pointer_length)
|
||||
uncompressed_text[coding_pos++] = uncompressed_text[pointer_offset++];
|
||||
*(uncompressed_text + coding_pos) = *(compressed_text + compressed_pointer++);
|
||||
}
|
||||
|
||||
return coding_pos;
|
||||
}
|
||||
|
||||
long fsize (FILE *in)
|
||||
{
|
||||
long pos, length;
|
||||
pos = ftell(in);
|
||||
fseek(in, 0L, SEEK_END);
|
||||
length = ftell(in);
|
||||
fseek(in, pos, SEEK_SET);
|
||||
return length;
|
||||
}
|
||||
|
||||
uint32_t ss_compress (const char *filename_in, char *filename_out, size_t malloc_size, uint8_t pointer_length_width)
|
||||
{
|
||||
FILE *in, *out;
|
||||
uint8_t *uncompressed_text, *compressed_text;
|
||||
uint32_t uncompressed_size, compressed_size;
|
||||
|
||||
in = fopen(filename_in, "r");
|
||||
if(in == NULL)
|
||||
return 0;
|
||||
uncompressed_size = fsize(in);
|
||||
uncompressed_text = malloc(uncompressed_size);
|
||||
if((uncompressed_size != fread(uncompressed_text, 1, uncompressed_size, in)))
|
||||
{
|
||||
free(uncompressed_text);
|
||||
return 0;
|
||||
}
|
||||
fclose(in);
|
||||
|
||||
compressed_text = malloc(malloc_size);
|
||||
|
||||
compressed_size = lz77_compress(uncompressed_text, uncompressed_size, compressed_text, pointer_length_width);
|
||||
|
||||
out = fopen(filename_out, "w");
|
||||
if(out == NULL)
|
||||
{
|
||||
free(uncompressed_text);
|
||||
free(compressed_text);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if((compressed_size != fwrite(compressed_text, 1, compressed_size, out)))
|
||||
{
|
||||
free(uncompressed_text);
|
||||
free(compressed_text);
|
||||
fclose(out);
|
||||
return 0;
|
||||
}
|
||||
fclose(out);
|
||||
|
||||
free(compressed_text);
|
||||
free(uncompressed_text);
|
||||
|
||||
return compressed_size;
|
||||
}
|
||||
|
||||
uint32_t ss_decompress (char *filename_in, char *filename_out)
|
||||
{
|
||||
FILE *in, *out;
|
||||
uint32_t compressed_size, uncompressed_size;
|
||||
uint8_t *compressed_text, *uncompressed_text;
|
||||
|
||||
in = fopen(filename_in, "r");
|
||||
if(in == NULL)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
compressed_size = fsize(in);
|
||||
compressed_text = malloc(compressed_size);
|
||||
if(fread(compressed_text, 1, compressed_size, in) != compressed_size)
|
||||
{
|
||||
free(compressed_text);
|
||||
return 0;
|
||||
}
|
||||
fclose(in);
|
||||
|
||||
uncompressed_size = *((uint32_t *) compressed_text);
|
||||
uncompressed_text = malloc(uncompressed_size);
|
||||
|
||||
if(lz77_decompress(compressed_text, uncompressed_text) != uncompressed_size)
|
||||
{
|
||||
free(compressed_text);
|
||||
free(uncompressed_text);
|
||||
return 0;
|
||||
}
|
||||
|
||||
out = fopen(filename_out, "w");
|
||||
if(out == NULL)
|
||||
{
|
||||
free(compressed_text);
|
||||
free(uncompressed_text);
|
||||
return 0;
|
||||
}
|
||||
if(fwrite(uncompressed_text, 1, uncompressed_size, out) != uncompressed_size)
|
||||
{
|
||||
free(compressed_text);
|
||||
free(uncompressed_text);
|
||||
fclose(out);
|
||||
return 0;
|
||||
}
|
||||
fclose(out);
|
||||
|
||||
free(compressed_text);
|
||||
free(uncompressed_text);
|
||||
|
||||
return uncompressed_size;
|
||||
}
|
||||
|
||||
/*
|
||||
int main (int argc, char const *argv[])
|
||||
{
|
||||
FILE *in;
|
||||
|
||||
char filename[129];
|
||||
char filedecout[141];
|
||||
|
||||
if(argc < 2)
|
||||
{
|
||||
printf("Please enter a filename: ./comp file.txt");
|
||||
}
|
||||
|
||||
in = fopen(argv[1], "r");
|
||||
if(in == NULL)
|
||||
return 0;
|
||||
|
||||
if(strlen(argv[1]) > 128)
|
||||
{
|
||||
printf("Filename too long");
|
||||
return 1;
|
||||
}
|
||||
|
||||
sprintf(filename, "%s.ss", argv[1]);
|
||||
sprintf(filedecout, "%s.1", argv[1]);
|
||||
|
||||
printf("Original size: %ld\n", fsize(in));
|
||||
fclose(in);
|
||||
for(uint8_t i = 1; i <= 6; ++i)
|
||||
printf("Compressed (%i): %u, decompressed: (%u)\n", i, ss_compress(argv[1], filename, 20000000, i), ss_decompress(filename, filedecout));
|
||||
return 0;
|
||||
}
|
||||
*/
|
3
src/compression.h
Normal file
3
src/compression.h
Normal file
@ -0,0 +1,3 @@
|
||||
uint32_t ss_compress (const char *filename_in, char *filename_out, size_t malloc_size, uint8_t pointer_length_width);
|
||||
uint32_t ss_decompress (char *filename_in, char *filename_out);
|
||||
long fsize (FILE *in);
|
@ -29,6 +29,9 @@
|
||||
#include <sys/stat.h>
|
||||
// For string searching
|
||||
#include <regex.h>
|
||||
// Math for compression algorithm
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
|
||||
#define MAX_VAR_NAME_LEN 512
|
||||
#define MAX_VAR_NAME_BUFSIZE (MAX_VAR_NAME_LEN + 1)
|
||||
|
131
src/functions.c
131
src/functions.c
@ -15,7 +15,9 @@
|
||||
#include "network.h"
|
||||
#include "search.h"
|
||||
#include "inset.h"
|
||||
// For slidescript compression algorithm
|
||||
#include "tar.h"
|
||||
#include "compression.h"
|
||||
|
||||
char *process_line(char *line)
|
||||
{
|
||||
@ -25,11 +27,13 @@ char *process_line(char *line)
|
||||
|
||||
static char concatbuf[MAX_CONCAT_BUF+1];
|
||||
static char filebuf[MAX_READFILE_LEN+1];
|
||||
static char retbuf[MAX_STRING_BUFSIZE];
|
||||
|
||||
// Make sure static buffers are empty before initialize
|
||||
// This was a big bug here for a minute.
|
||||
bzero(filebuf, MAX_READFILE_LEN);
|
||||
bzero(concatbuf, MAX_CONCAT_BUF);
|
||||
bzero(retbuf, MAX_STRING_LEN);
|
||||
|
||||
tok_srch = strtok(line, "=\" |");
|
||||
|
||||
@ -70,15 +74,34 @@ char *process_line(char *line)
|
||||
|
||||
filename = parse_vars(tok_srch);
|
||||
|
||||
FILE *in;
|
||||
|
||||
char origsize[128];
|
||||
char filedecout[MAX_FILENAME_LEN+5];
|
||||
|
||||
in = fopen(filename, "r");
|
||||
if(in == NULL)
|
||||
{
|
||||
syn_warn("ss:warn:compress, failed to open tar for compression");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sprintf(filedecout, "uncompressed.tar");
|
||||
|
||||
sprintf(origsize, "%ld", fsize(in));
|
||||
|
||||
sprintf(retbuf, "ss: %s: compressed: %s, decompressed: %u", filename, origsize, ss_decompress(filename, filedecout));
|
||||
|
||||
fclose(in);
|
||||
|
||||
// open existing file
|
||||
if ((fd = open(filename, O_RDWR)) < 0) {
|
||||
syn_warn("ss:warn:decompress, failed to open archive");
|
||||
if ((fd = open(filedecout, O_RDWR)) < 0) {
|
||||
syn_warn("ss:warn:decompress, failed to tar open archive");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// read in data
|
||||
if (tar_read(fd, &archive, '1') < 0) {
|
||||
tar_free(archive);
|
||||
close(fd);
|
||||
syn_warn("ss:warn:decompress, failed to read archive");
|
||||
return NULL;
|
||||
@ -92,18 +115,20 @@ char *process_line(char *line)
|
||||
tar_free(archive);
|
||||
close(fd); // don't bother checking for fd < 0
|
||||
|
||||
return NULL;
|
||||
remove(filedecout);
|
||||
|
||||
return retbuf;
|
||||
|
||||
}
|
||||
|
||||
/* Compression function of tar */
|
||||
else if(strncmp("compress",tok_srch,8) == 0)
|
||||
{
|
||||
char files[2][MAX_FILENAME_LEN+1]; // Files to be added into the archive
|
||||
char filename[MAX_FILENAME_LEN+1]; // Files to be added into the archive
|
||||
char passval[3];
|
||||
struct tar_t *archive = NULL;
|
||||
int fd;
|
||||
|
||||
bzero(files[0], MAX_FILENAME_LEN);
|
||||
|
||||
tok_srch = strtok(NULL, "\"");
|
||||
if(tok_srch == NULL)
|
||||
{
|
||||
@ -117,9 +142,43 @@ char *process_line(char *line)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Save tarball filename
|
||||
// Collect how many compress passes we do from first argument
|
||||
// No more than 10 passes needed.
|
||||
if((strlen(parse_vars(tok_srch)) < 3) && (atoi(parse_vars(tok_srch)) <= 10) && (atoi(parse_vars(tok_srch)) >= 5))
|
||||
{
|
||||
sprintf(passval, "%s", parse_vars(tok_srch));
|
||||
}
|
||||
else
|
||||
{
|
||||
syn_warn("ss:warn:compress, pass value too long, 5 to 10 are accepted values.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tok_srch = strtok(NULL, "\"");
|
||||
if(tok_srch == NULL)
|
||||
{
|
||||
syn_warn("ss:warn:compress syntax error, missing quote?");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tok_srch = strtok(NULL, "\"");
|
||||
if(tok_srch == NULL)
|
||||
{
|
||||
syn_warn("ss:warn:compress syntax error, missing quote?");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(strcmp(tok_srch, "\n") == 0 || strcmp(tok_srch, " \n") == 0)
|
||||
{
|
||||
syn_warn("ss:warn:compress syntax error, missing archive name...");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Save tarball filename
|
||||
if(strlen(parse_vars(tok_srch)) < MAX_FILENAME_LEN)
|
||||
{
|
||||
sprintf(files[0], "%s", parse_vars(tok_srch));
|
||||
sprintf(filename, "%s", parse_vars(tok_srch));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -127,7 +186,7 @@ char *process_line(char *line)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((fd = open(files[0], O_WRONLY | O_TRUNC | O_CREAT, S_IRUSR | S_IWUSR)) == -1){
|
||||
if ((fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, S_IRUSR | S_IWUSR)) == -1){
|
||||
syn_warn("ss:warn:compress, failed to open new archive");
|
||||
return NULL;
|
||||
}
|
||||
@ -158,29 +217,59 @@ char *process_line(char *line)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(strlen(parse_vars(tok_srch)) < MAX_FILENAME_LEN)
|
||||
int argc = 0;
|
||||
char *argv[MAX_FILES];
|
||||
|
||||
char *p2 = strtok(tok_srch, " ");
|
||||
while (p2 && argc < MAX_FILES-1)
|
||||
{
|
||||
sprintf(files[1], "%s", parse_vars(tok_srch));
|
||||
}
|
||||
else
|
||||
{
|
||||
syn_warn("ss:warn:comrpress, filename too long!");
|
||||
return NULL;
|
||||
argv[argc++] = p2;
|
||||
p2 = strtok(NULL, " ");
|
||||
}
|
||||
|
||||
const char **tarin = (const char **) &files[1];
|
||||
argv[argc] = 0;
|
||||
|
||||
//printf("%s", files[0]);
|
||||
const char **tarin = (const char **) &argv[0];
|
||||
|
||||
if (tar_write(fd, &archive, 0, tarin, '1') < 0) {
|
||||
syn_warn("ss:warn:compress, failed to create archive");
|
||||
if (tar_write(fd, &archive, argc, tarin, '1') < 0) {
|
||||
syn_warn("ss:warn:compress, failed to create tar archive");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tar_free(archive);
|
||||
close(fd); // don't bother checking for fd < 0
|
||||
|
||||
return NULL;
|
||||
syn_warn("ss:warn:compressing...");
|
||||
|
||||
FILE *in;
|
||||
|
||||
char origsize[128];
|
||||
char file_comp[MAX_FILENAME_LEN+4];
|
||||
char filedecout[MAX_FILENAME_LEN+5];
|
||||
|
||||
in = fopen(filename, "r");
|
||||
if(in == NULL)
|
||||
{
|
||||
syn_warn("ss:warn:compress, failed to open tar for compression");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sprintf(file_comp, "%s.ss", filename);
|
||||
sprintf(filedecout, "%s.1", filename);
|
||||
|
||||
sprintf(origsize, "%ld", fsize(in));
|
||||
|
||||
fclose(in);
|
||||
for(uint8_t i = 1; i <= atoi(passval); ++i)
|
||||
{
|
||||
sprintf(retbuf, "ss: %s: compressed: %u, decompressed: %u", file_comp, ss_compress(filename, file_comp, 20000000, i), ss_decompress(file_comp, filedecout));
|
||||
}
|
||||
|
||||
// Remove the decompressed version for sanity check
|
||||
remove(filedecout);
|
||||
remove(filename);
|
||||
|
||||
return retbuf;
|
||||
}
|
||||
|
||||
/* mkdir function, and mkfile functions */
|
||||
|
@ -42,7 +42,7 @@ int tar_read(const int fd, struct tar_t ** archive, const char verbosity){
|
||||
char update = 1;
|
||||
|
||||
for(count = 0; ; count++){
|
||||
*tar = malloc(sizeof(struct tar_t));
|
||||
*tar = malloc(sizeof(struct tar_t) + 1);
|
||||
if (update && (read_size(fd, (*tar) -> block, 512) != 512)){
|
||||
V_PRINT(stderr, "Error: Bad read. Stopping");
|
||||
tar_free(*tar);
|
||||
|
Loading…
x
Reference in New Issue
Block a user