Merge pull request #753 from paulcruz74/adapt-approach-3

adaptive compression v1
dev
Yann Collet 2017-07-27 10:00:10 -07:00 committed by GitHub
commit e1222544be
8 changed files with 1573 additions and 2 deletions

View File

@ -0,0 +1,47 @@
ZSTDDIR = ../../lib
PRGDIR = ../../programs
ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c
ZSTDCOMP_FILES := $(ZSTDDIR)/compress/*.c
ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/*.c
ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES)
DEBUGFLAGS= -g -DZSTD_DEBUG=1
CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR)
CFLAGS ?= -O3
CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
-Wstrict-prototypes -Wundef -Wformat-security \
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
-Wredundant-decls
CFLAGS += $(DEBUGFLAGS)
CFLAGS += $(MOREFLAGS)
FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
all: adapt datagen
adapt: $(ZSTD_FILES) adapt.c
$(CC) $(FLAGS) $^ -o $@
adapt-debug: $(ZSTD_FILES) adapt.c
$(CC) $(FLAGS) -DDEBUG_MODE=2 $^ -o adapt
datagen : $(PRGDIR)/datagen.c datagencli.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
test-adapt-correctness: datagen adapt
@./test-correctness.sh
@echo "test correctness complete"
test-adapt-performance: datagen adapt
@./test-performance.sh
@echo "test performance complete"
clean:
@$(RM) -f adapt datagen
@$(RM) -rf *.dSYM
@$(RM) -f tmp*
@$(RM) -f tests/*.zst
@$(RM) -f tests/tmp*
@echo "finished cleaning"

View File

@ -0,0 +1,25 @@
###Summary
`adapt` is a new compression tool targeted at optimizing performance across network connections. The tool aims at sensing network speeds and adapting compression level based on network or pipe speeds.
In situations where the compression level does not appropriately match the network/pipe speed, the compression may be bottlenecking the entire pipeline or the files may not be compressed as much as they potentially could be, therefore losing efficiency. It also becomes quite impractical to manually measure and set compression level, therefore the tool does it for you.
###Using `adapt`
In order to build and use the tool, you can simply run `make adapt` in the `adaptive-compression` directory under `contrib`. This will generate an executable available for use.
###Options
`-oFILE` : write output to `FILE`
`-i#` : provide initial compression level
`-h` : display help/information
`-f` : force the compression level to stay constant
`-c` : force write to `stdout`
`-p` : hide progress bar
`-q` : quiet mode -- do not show progress bar or other information
###Benchmarking / Test results

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,129 @@
/**
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*/
/*-************************************
* Dependencies
**************************************/
#include "util.h" /* Compiler options */
#include <stdio.h> /* fprintf, stderr */
#include "datagen.h" /* RDG_generate */
/*-************************************
* Constants
**************************************/
#define KB *(1 <<10)
#define MB *(1 <<20)
#define GB *(1U<<30)
#define SIZE_DEFAULT ((64 KB) + 1)
#define SEED_DEFAULT 0
#define COMPRESSIBILITY_DEFAULT 50
/*-************************************
* Macros
**************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static unsigned displayLevel = 2;
/*-*******************************************************
* Command line
*********************************************************/
static int usage(const char* programName)
{
DISPLAY( "Compressible data generator\n");
DISPLAY( "Usage :\n");
DISPLAY( " %s [args]\n", programName);
DISPLAY( "\n");
DISPLAY( "Arguments :\n");
DISPLAY( " -g# : generate # data (default:%i)\n", SIZE_DEFAULT);
DISPLAY( " -s# : Select seed (default:%i)\n", SEED_DEFAULT);
DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n",
COMPRESSIBILITY_DEFAULT);
DISPLAY( " -h : display help and exit\n");
return 0;
}
int main(int argc, const char** argv)
{
unsigned probaU32 = COMPRESSIBILITY_DEFAULT;
double litProba = 0.0;
U64 size = SIZE_DEFAULT;
U32 seed = SEED_DEFAULT;
const char* const programName = argv[0];
int argNb;
for(argNb=1; argNb<argc; argNb++) {
const char* argument = argv[argNb];
if(!argument) continue; /* Protection if argument empty */
/* Handle commands. Aggregated commands are allowed */
if (*argument=='-') {
argument++;
while (*argument!=0) {
switch(*argument)
{
case 'h':
return usage(programName);
case 'g':
argument++;
size=0;
while ((*argument>='0') && (*argument<='9'))
size *= 10, size += *argument++ - '0';
if (*argument=='K') { size <<= 10; argument++; }
if (*argument=='M') { size <<= 20; argument++; }
if (*argument=='G') { size <<= 30; argument++; }
if (*argument=='B') { argument++; }
break;
case 's':
argument++;
seed=0;
while ((*argument>='0') && (*argument<='9'))
seed *= 10, seed += *argument++ - '0';
break;
case 'P':
argument++;
probaU32 = 0;
while ((*argument>='0') && (*argument<='9'))
probaU32 *= 10, probaU32 += *argument++ - '0';
if (probaU32>100) probaU32 = 100;
break;
case 'L': /* hidden argument : Literal distribution probability */
argument++;
litProba=0.;
while ((*argument>='0') && (*argument<='9'))
litProba *= 10, litProba += *argument++ - '0';
if (litProba>100.) litProba=100.;
litProba /= 100.;
break;
case 'v':
displayLevel = 4;
argument++;
break;
default:
return usage(programName);
}
} } } /* for(argNb=1; argNb<argc; argNb++) */
DISPLAYLEVEL(4, "Compressible data Generator \n");
if (probaU32!=COMPRESSIBILITY_DEFAULT)
DISPLAYLEVEL(3, "Compressibility : %i%%\n", probaU32);
DISPLAYLEVEL(3, "Seed = %u \n", seed);
RDG_genStdout(size, (double)probaU32/100, litProba, seed);
DISPLAYLEVEL(1, "\n");
return 0;
}

View File

@ -0,0 +1,245 @@
echo "correctness tests -- general"
./datagen -s1 -g1GB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s2 -g500MB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s3 -g250MB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s4 -g125MB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s5 -g50MB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s6 -g25MB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s7 -g10MB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s8 -g5MB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s9 -g500KB > tmp
./adapt -otmp.zst tmp
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
echo -e "\ncorrectness tests -- streaming"
./datagen -s10 -g1GB > tmp
cat tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s11 -g100MB > tmp
cat tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s12 -g10MB > tmp
cat tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s13 -g1MB > tmp
cat tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s14 -g100KB > tmp
cat tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s15 -g10KB > tmp
cat tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
echo -e "\ncorrectness tests -- read limit"
./datagen -s16 -g1GB > tmp
pv -L 50m -q tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s17 -g100MB > tmp
pv -L 50m -q tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s18 -g10MB > tmp
pv -L 50m -q tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s19 -g1MB > tmp
pv -L 50m -q tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s20 -g100KB > tmp
pv -L 50m -q tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s21 -g10KB > tmp
pv -L 50m -q tmp | ./adapt > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
echo -e "\ncorrectness tests -- write limit"
./datagen -s22 -g1GB > tmp
pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s23 -g100MB > tmp
pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s24 -g10MB > tmp
pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s25 -g1MB > tmp
pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s26 -g100KB > tmp
pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s27 -g10KB > tmp
pv -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
echo -e "\ncorrectness tests -- read and write limits"
./datagen -s28 -g1GB > tmp
pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s29 -g100MB > tmp
pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s30 -g10MB > tmp
pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s31 -g1MB > tmp
pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s32 -g100KB > tmp
pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s33 -g10KB > tmp
pv -L 50m -q tmp | ./adapt | pv -L 5m -q > tmp.zst
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
echo -e "\ncorrectness tests -- forced compression level"
./datagen -s34 -g1GB > tmp
./adapt tmp -otmp.zst -i11 -f
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s35 -g100MB > tmp
./adapt tmp -otmp.zst -i11 -f
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s36 -g10MB > tmp
./adapt tmp -otmp.zst -i11 -f
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s37 -g1MB > tmp
./adapt tmp -otmp.zst -i11 -f
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s38 -g100KB > tmp
./adapt tmp -otmp.zst -i11 -f
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
./datagen -s39 -g10KB > tmp
./adapt tmp -otmp.zst -i11 -f
zstd -d tmp.zst -o tmp2
diff -s -q tmp tmp2
rm tmp*
echo -e "\ncorrectness tests -- window size test"
./datagen -s39 -g1GB | pv -L 25m -q | ./adapt -i1 | pv -q > tmp.zst
zstd -d tmp.zst
rm tmp*
make clean

View File

@ -0,0 +1,59 @@
echo "testing time -- no limits set"
./datagen -s1 -g1GB > tmp
time ./adapt -otmp1.zst tmp
time zstd -1 -o tmp2.zst tmp
rm tmp*
./datagen -s2 -g2GB > tmp
time ./adapt -otmp1.zst tmp
time zstd -1 -o tmp2.zst tmp
rm tmp*
./datagen -s3 -g4GB > tmp
time ./adapt -otmp1.zst tmp
time zstd -1 -o tmp2.zst tmp
rm tmp*
echo -e "\ntesting compression ratio -- no limits set"
./datagen -s4 -g1GB > tmp
time ./adapt -otmp1.zst tmp
time zstd -1 -o tmp2.zst tmp
ls -l tmp1.zst tmp2.zst
rm tmp*
./datagen -s5 -g2GB > tmp
time ./adapt -otmp1.zst tmp
time zstd -1 -o tmp2.zst tmp
ls -l tmp1.zst tmp2.zst
rm tmp*
./datagen -s6 -g4GB > tmp
time ./adapt -otmp1.zst tmp
time zstd -1 -o tmp2.zst tmp
ls -l tmp1.zst tmp2.zst
rm tmp*
echo e "\ntesting performance at various compression levels -- no limits set"
./datagen -s7 -g1GB > tmp
echo "adapt"
time ./adapt -i5 -f tmp -otmp1.zst
echo "zstdcli"
time zstd -5 tmp -o tmp2.zst
ls -l tmp1.zst tmp2.zst
rm tmp*
./datagen -s8 -g1GB > tmp
echo "adapt"
time ./adapt -i10 -f tmp -otmp1.zst
echo "zstdcli"
time zstd -10 tmp -o tmp2.zst
ls -l tmp1.zst tmp2.zst
rm tmp*
./datagen -s9 -g1GB > tmp
echo "adapt"
time ./adapt -i15 -f tmp -otmp1.zst
echo "zstdcli"
time zstd -15 tmp -o tmp2.zst
ls -l tmp1.zst tmp2.zst
rm tmp*

View File

@ -2992,7 +2992,6 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
return fhSize;
}
size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)

View File

@ -802,7 +802,6 @@ ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstC
ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
/*-
Buffer-less streaming decompression (synchronous mode)