Almost there, LZ77 file compression algorithm with decompress and compress

master
Pentium44 2021-04-09 21:12:52 -07:00
parent 19012d6e66
commit 847fccfc66
9 changed files with 349 additions and 24 deletions

View File

@ -8,9 +8,9 @@ VERSION_EXTRA = \"$(EXTRA)\"
PREFIX ?= /usr
CC ?= gcc
CFLAGS += -O2 -pedantic -g -Wall -Wextra --param=ssp-buffer-size=2 -fstack-protector-all
CFLAGS += -pedantic -g -Wall -Wextra
CPPFLAGS += -DVERSION=$(VERSION) -D_FORTIFY_SOURCE=2
LDFLAGS += -Wl,-O1 -Lsrc/libutil
LDFLAGS += -Wl,-O1 -lm
BIN ?= slidescript
SRCS=$(wildcard src/*.c)

BIN
src/comp Executable file

Binary file not shown.

BIN
src/comp.1 Normal file

Binary file not shown.

BIN
src/comp.ss Normal file

Binary file not shown.

230
src/compression.c Normal file
View File

@ -0,0 +1,230 @@
// LZ77 compression examples, simple and lightweight
// Being quick to process and execute, this will be great
// For internal compression on modern machines
//
// Andy Herbert
// lz1 https://github.com/andyherbert/lz1
//
#include "deps.h"
#include "compression.h"
uint32_t lz77_compress (uint8_t *uncompressed_text, uint32_t uncompressed_size, uint8_t *compressed_text, uint8_t pointer_length_width)
{
uint16_t pointer_pos, temp_pointer_pos, output_pointer, pointer_length, temp_pointer_length;
uint32_t compressed_pointer, output_size, coding_pos, output_lookahead_ref, look_behind, look_ahead;
uint16_t pointer_pos_max, pointer_length_max;
pointer_pos_max = pow(2, 16 - pointer_length_width);
pointer_length_max = pow(2, pointer_length_width);
*((uint32_t *) compressed_text) = uncompressed_size;
*(compressed_text + 4) = pointer_length_width;
compressed_pointer = output_size = 5;
for(coding_pos = 0; coding_pos < uncompressed_size; ++coding_pos)
{
pointer_pos = 0;
pointer_length = 0;
for(temp_pointer_pos = 1; (temp_pointer_pos < pointer_pos_max) && (temp_pointer_pos <= coding_pos); ++temp_pointer_pos)
{
look_behind = coding_pos - temp_pointer_pos;
look_ahead = coding_pos;
for(temp_pointer_length = 0; uncompressed_text[look_ahead++] == uncompressed_text[look_behind++]; ++temp_pointer_length)
if(temp_pointer_length == pointer_length_max)
break;
if(temp_pointer_length > pointer_length)
{
pointer_pos = temp_pointer_pos;
pointer_length = temp_pointer_length;
if(pointer_length == pointer_length_max)
break;
}
}
coding_pos += pointer_length;
if((coding_pos == uncompressed_size) && pointer_length)
{
output_pointer = (pointer_length == 1) ? 0 : ((pointer_pos << pointer_length_width) | (pointer_length - 2));
output_lookahead_ref = coding_pos - 1;
}
else
{
output_pointer = (pointer_pos << pointer_length_width) | (pointer_length ? (pointer_length - 1) : 0);
output_lookahead_ref = coding_pos;
}
*((uint16_t *) (compressed_text + compressed_pointer)) = output_pointer;
compressed_pointer += 2;
*(compressed_text + compressed_pointer++) = *(uncompressed_text + output_lookahead_ref);
output_size += 3;
}
return output_size;
}
uint32_t lz77_decompress (uint8_t *compressed_text, uint8_t *uncompressed_text)
{
uint8_t pointer_length_width;
uint16_t input_pointer, pointer_length, pointer_pos, pointer_length_mask;
uint32_t compressed_pointer, coding_pos, pointer_offset, uncompressed_size;
uncompressed_size = *((uint32_t *) compressed_text);
pointer_length_width = *(compressed_text + 4);
compressed_pointer = 5;
pointer_length_mask = pow(2, pointer_length_width) - 1;
for(coding_pos = 0; coding_pos < uncompressed_size; ++coding_pos)
{
input_pointer = *((uint16_t *) (compressed_text + compressed_pointer));
compressed_pointer += 2;
pointer_pos = input_pointer >> pointer_length_width;
pointer_length = pointer_pos ? ((input_pointer & pointer_length_mask) + 1) : 0;
if(pointer_pos)
for(pointer_offset = coding_pos - pointer_pos; pointer_length > 0; --pointer_length)
uncompressed_text[coding_pos++] = uncompressed_text[pointer_offset++];
*(uncompressed_text + coding_pos) = *(compressed_text + compressed_pointer++);
}
return coding_pos;
}
long fsize (FILE *in)
{
long pos, length;
pos = ftell(in);
fseek(in, 0L, SEEK_END);
length = ftell(in);
fseek(in, pos, SEEK_SET);
return length;
}
uint32_t ss_compress (const char *filename_in, char *filename_out, size_t malloc_size, uint8_t pointer_length_width)
{
FILE *in, *out;
uint8_t *uncompressed_text, *compressed_text;
uint32_t uncompressed_size, compressed_size;
in = fopen(filename_in, "r");
if(in == NULL)
return 0;
uncompressed_size = fsize(in);
uncompressed_text = malloc(uncompressed_size);
if((uncompressed_size != fread(uncompressed_text, 1, uncompressed_size, in)))
{
free(uncompressed_text);
return 0;
}
fclose(in);
compressed_text = malloc(malloc_size);
compressed_size = lz77_compress(uncompressed_text, uncompressed_size, compressed_text, pointer_length_width);
out = fopen(filename_out, "w");
if(out == NULL)
{
free(uncompressed_text);
free(compressed_text);
return 0;
}
if((compressed_size != fwrite(compressed_text, 1, compressed_size, out)))
{
free(uncompressed_text);
free(compressed_text);
fclose(out);
return 0;
}
fclose(out);
free(compressed_text);
free(uncompressed_text);
return compressed_size;
}
uint32_t ss_decompress (char *filename_in, char *filename_out)
{
FILE *in, *out;
uint32_t compressed_size, uncompressed_size;
uint8_t *compressed_text, *uncompressed_text;
in = fopen(filename_in, "r");
if(in == NULL)
{
return 0;
}
compressed_size = fsize(in);
compressed_text = malloc(compressed_size);
if(fread(compressed_text, 1, compressed_size, in) != compressed_size)
{
free(compressed_text);
return 0;
}
fclose(in);
uncompressed_size = *((uint32_t *) compressed_text);
uncompressed_text = malloc(uncompressed_size);
if(lz77_decompress(compressed_text, uncompressed_text) != uncompressed_size)
{
free(compressed_text);
free(uncompressed_text);
return 0;
}
out = fopen(filename_out, "w");
if(out == NULL)
{
free(compressed_text);
free(uncompressed_text);
return 0;
}
if(fwrite(uncompressed_text, 1, uncompressed_size, out) != uncompressed_size)
{
free(compressed_text);
free(uncompressed_text);
fclose(out);
return 0;
}
fclose(out);
free(compressed_text);
free(uncompressed_text);
return uncompressed_size;
}
/*
int main (int argc, char const *argv[])
{
FILE *in;
char filename[129];
char filedecout[141];
if(argc < 2)
{
printf("Please enter a filename: ./comp file.txt");
}
in = fopen(argv[1], "r");
if(in == NULL)
return 0;
if(strlen(argv[1]) > 128)
{
printf("Filename too long");
return 1;
}
sprintf(filename, "%s.ss", argv[1]);
sprintf(filedecout, "%s.1", argv[1]);
printf("Original size: %ld\n", fsize(in));
fclose(in);
for(uint8_t i = 1; i <= 6; ++i)
printf("Compressed (%i): %u, decompressed: (%u)\n", i, ss_compress(argv[1], filename, 20000000, i), ss_decompress(filename, filedecout));
return 0;
}
*/

3
src/compression.h Normal file
View File

@ -0,0 +1,3 @@
uint32_t ss_compress (const char *filename_in, char *filename_out, size_t malloc_size, uint8_t pointer_length_width);
uint32_t ss_decompress (char *filename_in, char *filename_out);
long fsize (FILE *in);

View File

@ -29,6 +29,9 @@
#include <sys/stat.h>
// For string searching
#include <regex.h>
// Math for compression algorithm
#include <stdint.h>
#include <math.h>
#define MAX_VAR_NAME_LEN 512
#define MAX_VAR_NAME_BUFSIZE (MAX_VAR_NAME_LEN + 1)

View File

@ -15,7 +15,9 @@
#include "network.h"
#include "search.h"
#include "inset.h"
// For slidescript compression algorithm
#include "tar.h"
#include "compression.h"
char *process_line(char *line)
{
@ -25,11 +27,13 @@ char *process_line(char *line)
static char concatbuf[MAX_CONCAT_BUF+1];
static char filebuf[MAX_READFILE_LEN+1];
static char retbuf[MAX_STRING_BUFSIZE];
// Make sure static buffers are empty before initialize
// This was a big bug here for a minute.
bzero(filebuf, MAX_READFILE_LEN);
bzero(concatbuf, MAX_CONCAT_BUF);
bzero(retbuf, MAX_STRING_LEN);
tok_srch = strtok(line, "=\" |");
@ -70,15 +74,34 @@ char *process_line(char *line)
filename = parse_vars(tok_srch);
FILE *in;
char origsize[128];
char filedecout[MAX_FILENAME_LEN+5];
in = fopen(filename, "r");
if(in == NULL)
{
syn_warn("ss:warn:compress, failed to open tar for compression");
return NULL;
}
sprintf(filedecout, "uncompressed.tar");
sprintf(origsize, "%ld", fsize(in));
sprintf(retbuf, "ss: %s: compressed: %s, decompressed: %u", filename, origsize, ss_decompress(filename, filedecout));
fclose(in);
// open existing file
if ((fd = open(filename, O_RDWR)) < 0) {
syn_warn("ss:warn:decompress, failed to open archive");
if ((fd = open(filedecout, O_RDWR)) < 0) {
syn_warn("ss:warn:decompress, failed to tar open archive");
return NULL;
}
// read in data
if (tar_read(fd, &archive, '1') < 0) {
tar_free(archive);
close(fd);
syn_warn("ss:warn:decompress, failed to read archive");
return NULL;
@ -92,18 +115,20 @@ char *process_line(char *line)
tar_free(archive);
close(fd); // don't bother checking for fd < 0
return NULL;
remove(filedecout);
return retbuf;
}
/* Compression function of tar */
else if(strncmp("compress",tok_srch,8) == 0)
{
char files[2][MAX_FILENAME_LEN+1]; // Files to be added into the archive
char filename[MAX_FILENAME_LEN+1]; // Files to be added into the archive
char passval[3];
struct tar_t *archive = NULL;
int fd;
bzero(files[0], MAX_FILENAME_LEN);
tok_srch = strtok(NULL, "\"");
if(tok_srch == NULL)
{
@ -117,9 +142,43 @@ char *process_line(char *line)
return NULL;
}
// Save tarball filename
// Collect how many compress passes we do from first argument
// No more than 10 passes needed.
if((strlen(parse_vars(tok_srch)) < 3) && (atoi(parse_vars(tok_srch)) <= 10) && (atoi(parse_vars(tok_srch)) >= 5))
{
sprintf(passval, "%s", parse_vars(tok_srch));
}
else
{
syn_warn("ss:warn:compress, pass value too long, 5 to 10 are accepted values.");
return NULL;
}
tok_srch = strtok(NULL, "\"");
if(tok_srch == NULL)
{
syn_warn("ss:warn:compress syntax error, missing quote?");
return NULL;
}
tok_srch = strtok(NULL, "\"");
if(tok_srch == NULL)
{
syn_warn("ss:warn:compress syntax error, missing quote?");
return NULL;
}
if(strcmp(tok_srch, "\n") == 0 || strcmp(tok_srch, " \n") == 0)
{
syn_warn("ss:warn:compress syntax error, missing archive name...");
return NULL;
}
// Save tarball filename
if(strlen(parse_vars(tok_srch)) < MAX_FILENAME_LEN)
{
sprintf(files[0], "%s", parse_vars(tok_srch));
sprintf(filename, "%s", parse_vars(tok_srch));
}
else
{
@ -127,7 +186,7 @@ char *process_line(char *line)
return NULL;
}
if ((fd = open(files[0], O_WRONLY | O_TRUNC | O_CREAT, S_IRUSR | S_IWUSR)) == -1){
if ((fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, S_IRUSR | S_IWUSR)) == -1){
syn_warn("ss:warn:compress, failed to open new archive");
return NULL;
}
@ -158,29 +217,59 @@ char *process_line(char *line)
return NULL;
}
if(strlen(parse_vars(tok_srch)) < MAX_FILENAME_LEN)
int argc = 0;
char *argv[MAX_FILES];
char *p2 = strtok(tok_srch, " ");
while (p2 && argc < MAX_FILES-1)
{
sprintf(files[1], "%s", parse_vars(tok_srch));
}
else
{
syn_warn("ss:warn:comrpress, filename too long!");
return NULL;
argv[argc++] = p2;
p2 = strtok(NULL, " ");
}
const char **tarin = (const char **) &files[1];
argv[argc] = 0;
//printf("%s", files[0]);
const char **tarin = (const char **) &argv[0];
if (tar_write(fd, &archive, 0, tarin, '1') < 0) {
syn_warn("ss:warn:compress, failed to create archive");
if (tar_write(fd, &archive, argc, tarin, '1') < 0) {
syn_warn("ss:warn:compress, failed to create tar archive");
return NULL;
}
tar_free(archive);
close(fd); // don't bother checking for fd < 0
return NULL;
syn_warn("ss:warn:compressing...");
FILE *in;
char origsize[128];
char file_comp[MAX_FILENAME_LEN+4];
char filedecout[MAX_FILENAME_LEN+5];
in = fopen(filename, "r");
if(in == NULL)
{
syn_warn("ss:warn:compress, failed to open tar for compression");
return NULL;
}
sprintf(file_comp, "%s.ss", filename);
sprintf(filedecout, "%s.1", filename);
sprintf(origsize, "%ld", fsize(in));
fclose(in);
for(uint8_t i = 1; i <= atoi(passval); ++i)
{
sprintf(retbuf, "ss: %s: compressed: %u, decompressed: %u", file_comp, ss_compress(filename, file_comp, 20000000, i), ss_decompress(file_comp, filedecout));
}
// Remove the decompressed version for sanity check
remove(filedecout);
remove(filename);
return retbuf;
}
/* mkdir function, and mkfile functions */

View File

@ -42,7 +42,7 @@ int tar_read(const int fd, struct tar_t ** archive, const char verbosity){
char update = 1;
for(count = 0; ; count++){
*tar = malloc(sizeof(struct tar_t));
*tar = malloc(sizeof(struct tar_t) + 1);
if (update && (read_size(fd, (*tar) -> block, 512) != 512)){
V_PRINT(stderr, "Error: Bad read. Stopping");
tar_free(*tar);