From 9165e97fc69d6a6f0476575b696d5f93f5070ea6 Mon Sep 17 00:00:00 2001 From: Paul Cruz Date: Thu, 13 Jul 2017 13:50:23 -0700 Subject: [PATCH] added some tests for correctness, time, and compression ratio --- contrib/adaptive-compression/Makefile | 15 +- contrib/adaptive-compression/datagencli.c | 129 +++++++++++ .../adaptive-compression/test-correctness.sh | 205 ++++++++++++++++++ .../adaptive-compression/test-performance.sh | 34 +++ 4 files changed, 381 insertions(+), 2 deletions(-) create mode 100644 contrib/adaptive-compression/datagencli.c create mode 100755 contrib/adaptive-compression/test-correctness.sh create mode 100755 contrib/adaptive-compression/test-performance.sh diff --git a/contrib/adaptive-compression/Makefile b/contrib/adaptive-compression/Makefile index ed1a55ad..f2059a19 100644 --- a/contrib/adaptive-compression/Makefile +++ b/contrib/adaptive-compression/Makefile @@ -19,13 +19,24 @@ CFLAGS += $(DEBUGFLAGS) CFLAGS += $(MOREFLAGS) FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -all: adapt +all: adapt datagen adapt: $(ZSTD_FILES) adapt.c $(CC) $(FLAGS) $^ -o $@ +datagen : $(PRGDIR)/datagen.c datagencli.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +test-adapt-correctness: datagen adapt + @./test-correctness.sh + @echo "test correctness complete" + +test-adapt-performance: datagen adapt + @./test-performance.sh + @echo "test performance complete" + clean: - @$(RM) -f adapt + @$(RM) -f adapt datagen @$(RM) -rf *.dSYM @$(RM) -f tmp* @$(RM) -f tests/*.zst diff --git a/contrib/adaptive-compression/datagencli.c b/contrib/adaptive-compression/datagencli.c new file mode 100644 index 00000000..8a81939d --- /dev/null +++ b/contrib/adaptive-compression/datagencli.c @@ -0,0 +1,129 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + + +/*-************************************ +* Dependencies +**************************************/ +#include "util.h" /* Compiler options */ +#include /* fprintf, stderr */ +#include "datagen.h" /* RDG_generate */ + + +/*-************************************ +* Constants +**************************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define SIZE_DEFAULT ((64 KB) + 1) +#define SEED_DEFAULT 0 +#define COMPRESSIBILITY_DEFAULT 50 + + +/*-************************************ +* Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static unsigned displayLevel = 2; + + +/*-******************************************************* +* Command line +*********************************************************/ +static int usage(const char* programName) +{ + DISPLAY( "Compressible data generator\n"); + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -g# : generate # data (default:%i)\n", SIZE_DEFAULT); + DISPLAY( " -s# : Select seed (default:%i)\n", SEED_DEFAULT); + DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", + COMPRESSIBILITY_DEFAULT); + DISPLAY( " -h : display help and exit\n"); + return 0; +} + + +int main(int argc, const char** argv) +{ + unsigned probaU32 = COMPRESSIBILITY_DEFAULT; + double litProba = 0.0; + U64 size = SIZE_DEFAULT; + U32 seed = SEED_DEFAULT; + const char* const programName = argv[0]; + + int argNb; + for(argNb=1; argNb='0') && (*argument<='9')) + size *= 10, size += *argument++ - '0'; + if (*argument=='K') { size <<= 10; argument++; } + if (*argument=='M') { size <<= 20; argument++; } + if (*argument=='G') { size <<= 30; argument++; } + if (*argument=='B') { argument++; } + break; + case 's': + argument++; + seed=0; + while ((*argument>='0') && (*argument<='9')) + seed *= 10, seed += *argument++ - '0'; + break; + case 'P': + argument++; + probaU32 = 0; + while ((*argument>='0') && (*argument<='9')) + probaU32 *= 10, probaU32 += *argument++ - '0'; + if (probaU32>100) probaU32 = 100; + break; + case 'L': /* hidden argument : Literal distribution probability */ + argument++; + litProba=0.; + while ((*argument>='0') && (*argument<='9')) + litProba *= 10, litProba += *argument++ - '0'; + if (litProba>100.) litProba=100.; + litProba /= 100.; + break; + case 'v': + displayLevel = 4; + argument++; + break; + default: + return usage(programName); + } + } } } /* for(argNb=1; argNb tmp +./adapt -otmp.zst tmp +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g500MB > tmp +./adapt -otmp.zst tmp +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g250MB > tmp +./adapt -otmp.zst tmp +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g125MB > tmp +./adapt -otmp.zst tmp +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g50MB > tmp +./adapt -otmp.zst tmp +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g25MB > tmp +./adapt -otmp.zst tmp +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g10MB > tmp +./adapt -otmp.zst tmp +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g5MB > tmp +./adapt -otmp.zst tmp +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g500KB > tmp +./adapt -otmp.zst tmp +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +echo -e "\ncorrectness tests -- streaming" +./datagen -g1GB > tmp +cat tmp | ./adapt > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g100MB > tmp +cat tmp | ./adapt > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g10MB > tmp +cat tmp | ./adapt > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g1MB > tmp +cat tmp | ./adapt > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g100KB > tmp +cat tmp | ./adapt > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g10KB > tmp +cat tmp | ./adapt > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +echo -e "\ncorrectness tests -- read limit" +./datagen -g1GB > tmp +pv -L 50m -q tmp | ./adapt > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g100MB > tmp +pv -L 50m -q tmp | ./adapt > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g10MB > tmp +pv -L 50m -q tmp | ./adapt > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g1MB > tmp +pv -L 50m -q tmp | ./adapt > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g100KB > tmp +pv -L 50m -q tmp | ./adapt > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g10KB > tmp +pv -L 50m -q tmp | ./adapt > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +echo -e "\ncorrectness tests -- write limit" +./datagen -g1GB > tmp +pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g100MB > tmp +pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g10MB > tmp +pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g1MB > tmp +pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g100KB > tmp +pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g10KB > tmp +pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +echo -e "\ncorrectness tests -- read and write limits" +./datagen -g1GB > tmp +pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g100MB > tmp +pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g10MB > tmp +pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g1MB > tmp +pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g100KB > tmp +pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + +./datagen -g10KB > tmp +pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst +zstd -d tmp.zst -o tmp2 +diff -q tmp tmp2 +rm tmp* + + +make clean diff --git a/contrib/adaptive-compression/test-performance.sh b/contrib/adaptive-compression/test-performance.sh new file mode 100755 index 00000000..6a88325d --- /dev/null +++ b/contrib/adaptive-compression/test-performance.sh @@ -0,0 +1,34 @@ +echo "testing time" +./datagen -g1GB > tmp +time ./adapt -otmp1.zst tmp +time zstd -1 -o tmp2.zst tmp +rm tmp* + +./datagen -g2GB > tmp +time ./adapt -otmp1.zst tmp +time zstd -1 -o tmp2.zst tmp +rm tmp* + +./datagen -g4GB > tmp +time ./adapt -otmp1.zst tmp +time zstd -1 -o tmp2.zst tmp +rm tmp* + +echo -e "\ntesting compression ratio" +./datagen -g1GB > tmp +time ./adapt -otmp1.zst tmp +time zstd -1 -o tmp2.zst tmp +ls -l tmp1.zst tmp2.zst +rm tmp* + +./datagen -g2GB > tmp +time ./adapt -otmp1.zst tmp +time zstd -1 -o tmp2.zst tmp +ls -l tmp1.zst tmp2.zst +rm tmp* + +./datagen -g4GB > tmp +time ./adapt -otmp1.zst tmp +time zstd -1 -o tmp2.zst tmp +ls -l tmp1.zst tmp2.zst +rm tmp*