From 0c66a44d1bcc0b7eae7f8ef52d6008541abdb7b1 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 28 Aug 2018 15:47:07 -0700 Subject: [PATCH] first working test program measures : - compression ratio with / without dictionary - create one dictionary per block - memory budget for dictionaries - decompression speed, using one different dictionary per block current limitations : - only one file - 4K blocks only - automatic dictionary built with 4K size dictionary can be selected on command line, with -D --- contrib/largeNbDicts/.gitignore | 2 + contrib/largeNbDicts/Makefile | 15 ++- contrib/largeNbDicts/largeNbDicts | Bin 14034 -> 0 bytes contrib/largeNbDicts/largeNbDicts.c | 156 ++++++++++++++++++++++++++-- programs/bench.c | 4 +- 5 files changed, 162 insertions(+), 15 deletions(-) create mode 100644 contrib/largeNbDicts/.gitignore delete mode 100755 contrib/largeNbDicts/largeNbDicts diff --git a/contrib/largeNbDicts/.gitignore b/contrib/largeNbDicts/.gitignore new file mode 100644 index 00000000..e77c4e49 --- /dev/null +++ b/contrib/largeNbDicts/.gitignore @@ -0,0 +1,2 @@ +# build artifacts +largeNbDicts diff --git a/contrib/largeNbDicts/Makefile b/contrib/largeNbDicts/Makefile index 026d76f1..f4b060ae 100644 --- a/contrib/largeNbDicts/Makefile +++ b/contrib/largeNbDicts/Makefile @@ -7,8 +7,10 @@ # in the COPYING file in the root directory of this source tree). # ################################################################ +PROGDIR = ../../programs +LIBDIR = ../../lib -CPPFLAGS+= -I../../lib -I../../lib/common -I../../lib/dictBuilder -I../../programs +CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/common -I$(LIBDIR)/dictBuilder -I$(PROGDIR) CFLAGS ?= -O3 DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ @@ -24,9 +26,18 @@ default: largeNbDicts all : largeNbDicts largeNbDicts: LDFLAGS += -lzstd -largeNbDicts: largeNbDicts.c +largeNbDicts: bench.o datagen.o xxhash.o largeNbDicts.c $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ +bench.o : $(PROGDIR)/bench.c + $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c + +datagen.o: $(PROGDIR)/datagen.c + $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c + +xxhash.o : $(LIBDIR)/common/xxhash.c + $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c clean: + $(RM) *.o $(RM) largeNbDicts diff --git a/contrib/largeNbDicts/largeNbDicts b/contrib/largeNbDicts/largeNbDicts deleted file mode 100755 index c057a2b78aa551a2de831b6e304f8747a6ea3d0f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14034 zcmeHOe{fXCecwY;tzye32#)2{hDRzTqOk>sEnB$A(IE-m;U_GN6gwF4xI5jUo1E@6 zckhB^j3X>v%j@%{nlyNrq;8e8PKo0-YU3nW2}BOq$TPL$V3^_%+thKKM`gw;OfbQe z>*u?>_Y=%yCi(Mr^mf1B{p^0f-~H})_r2Tq^s@`6FI~f!ql~fPIgGJtTw@iCC7A+> zv72zIxOBbKy{3I*`~CM(ZDDd0vRC9tPCXRpdRO~{U4>0}f2FWas%Nl`!5sD{64rGi zFlgl3UMA0p!2lk)R<;3Oo@-9ba6YT+aUcA0&Zim*?b!-&t=LLx}s$BB?h*Tdec$*WO>yuGqL@%i#-yfqS|g8U}dspeUC zZ+n|3v&NPc`3*d!qcUJg4&0qhpTs`En;;|KRHCfUqKqvAzYiU37h{8{r@16M29`61 zsbvcq8Jhx5%dZm`C2B(#sU(><;`*qxJ(hp;aZxA$CidezNNiG;a>2R z5cb@T(<|1Lz4qJYhxYnz^7P%h{nNFmuciib>F+Nx9MvFUWW!|rEV(Z)k(L%*gpuXt zW#l7=)ExDD!%L$B#?pu5MlUx6#t_}$YJf>0AGfk8zyI=guqZSAVL2LY7>XN#{)Sbu z3tBVwDO_aj7I>4&C6XuamQu+BB@dK5Q1U>@10@fXJW%q$AMSx})%>;l0r%(J-R`ap z>d1RFs(Ds5cf6`v?I7EywUkkob{)~oS@nan9`mr4s(pd9YUXe32|VI@%om7!ij#*r zlBbO|9m#i$Rh|`ZIv;DqGozYnuU0LlT05py$6?L9q^4hSj?O@YC-uN3^M6PF`xk)Xhg#$8n-7J?5sPm6E9@rdl#J#MDSeAx4o*Ju&r?Sw_q<$utqu1V$bCX{9>y z;`zki!e~SBr0zKCNp&4#W2!kvH5c+0FQNrnrC&d7DCq=NO4W6 z<}d66kfNC%sOFpYk04}bQ=18!8t)-B^jPkivcHZdKOx%wHYkJv&Hd^FRHK>iYv$XU z`8&6H++GA!MQ`&hyAxz`D$&wmHh9dy0@L7ZfhJS_eBf59KH-N+;dSTh?&MRE`I$+do7w%&QA)`^H_B_=#0m(oTr{FTJ7DAEu@AFQ2@WZLL} zTkUP&$tTGZhH`*@SD}$+o`J{gwScr0vqly5YX#HpI_El}nQ40=7jkoLhiTMSQdT|l z3D4oSS~wiiK>#PBoPO{zb>|@(EIE}&)D`0a9AfaKmR<`j6%(X!@iGnSjhpd10%{Pu(&+c)M_hLv%D5&C}_fC%x?XV z8$uP>r>3|eZWy9kPcB297)A*4pQ9LaO(Cjn{QMRnI|j;Q9#YMo3Ag!%X8y|K{O%j% z@i#a?Gvyqe4{4gYe~@eF*@ec#ShXmHg}mPC7|u7Z32im=UHe-XxN*dC!`{VDXjq9| z^~rjwOYN`ZEzhY|_g>X}U9;LJ?DZGX+4?>q9m#Xf(N}q=o@ve8F{W8hRil2A4nPn= zq~jeN3PR>SuHYcrYS#W15^n8QsmKI^wF4W?CDrU6SFMgIXqr~7o(Yfnx?N4~GM}ey zck~;i%&Is|3(9H=s?NwuG>XTyrHq-n)g(>AWVG{p_?6F`E%@kL< zV-G2|a*9dm*ZHVOsmFZFIjRc13DS!dVf|%R_5D)y#w)8HhUy7NoB6gq4~kvq=r-{N zjuSMoAS4~^B+eqHGNQ9F%vtfnI9JHKQP6Uw!tHZAhK3QY zqB%)beFfa)f6>@pqbW}{&+_`lKNql#Vsbu4YCkLBp9{ELz~%7PdxVnKgb!z+rHcuaR4R3|P!>!Tl1?L%2gzNOw$m z%wwvVn5Ji=jukIElM0F5h<+aPMZR1cP=Qc!j?#_Db)wysR!4SJGoz;{9%vW1<2Y6d z*5Lvwr&Q}YD)(tt`Egp@R!6mc0n)HTQB>aqCAe)G9gg3~D4U$=&g|#tcU5`r=ioOU zW8ch4JPZ;N-8qT-X3Ai&W}e5LxV|uy4holkz&%)1NiMxeKHCK$@c2Iq__%;iftx%> za7e&O0Y4|;zasb)!D|H^6!6ys{0{`bNAPC=^YzB(<8vGM{i*RjDEC;`L+b_1-~l=j zokT-s(`0z8dl2!n0K|>!0suOv#md;+b1>tt?EK<+z&xv2w~wS9kJf3{_R4nGJ0s~i zM)yd%Jh4EvwpT;i^N?mAgN)>q(V|+nyh7$$*TR6ILIeI0ZLrkQCCUxJ-P|5Ro%_dR z!A!O5+)TM@#cOEw51mh^+su>MTk}}D?#qyyuA{e3>AJ7*{Hr{Fg6Dg9{xzQeJAG>Af0O4l4e7dX@q91OpXT{9JpVVIPw<>pZ@TVTp8o(jq9b)n1qk8$(7g4=LwG*|GsBWU@=F{gVYBf;dQpc6ZKC- zEd&Lv*|e+`XZY<|mJ?J*DN{djPkbO8GFBymLA=8Xdt+Mz>wIk?zY%Znvxsk1IO_jG zT)C%Naj{5X8)rL>n6k22iO2lykzN*#dVBGTN$EwOP&DF=4JmbTWiIo_0$!sKxK)V; z6@ye5N<8#%K&eY8z9A!!Z5|-SLI@*y!YXBPv$C#R)7avT8@k%`P`o`Bi^l4y!3IcK zk~N_&-q6qh)xJQ)-^XB|=-+^MI9VCotUxbWulV?&$tYn>gDa?XtbzdQN&^@D;|-!= zJS&8|ObJB>5(aW3$_FAm1!4W(!OMo%8#l;pnUR3Ks)F3$3yr7xtPp=!)bNJM1?!@1 z5?IFk(f)y0ARZ^zdkrNV_+lVT_H7FpeNos;BIG>Xt=x|(iZ<6ZEDtJ5eP%Y6(ClSp zle(N#crGDC=O*v>lejey3B*K1KvA|&C=kC@ima3VCl?0#qX?wAO!0Y1qjzgSxtmyo zRxlI}M7;e0<)Pc!a&m@x0N>WeV!o^gMC`I+Un8jbyvLPh9>fI`kp-FGZwzK#T^M;H zI7IA;cxog6w?@O^0Cz6yrSawBMX(Q1+&;VN> zV`DA!X)XUWUiabaZX5&B`&1UInb)FkZ1c2s=|;>OimZ-B`^Cu#@R&j%9mi$Pn^r{= zI1L0+UyQW28iNEZD+Y9al+;RRCxka;aXLB-WV*E!RcD&3h4uUhhQxGc>H~2j)DJ-% zq0<^dAIzY^SRjy9y(PakE2%G2D~}wh{{VNNHy#hf3_WH3Uoz=ZSN!XCz;Z53ND+GcKp2n)fE2zLQXci=)vBu~A_^ZCId-YVjsD&l`o#7`ITzbN9*6!CN#m#?2*L+0~}V9578(OWkB zW%(6Fyt{~3i}>|L{DVdO<{~~=#7BzwM~e8-A|7vE89qW_IC|v&5r8i&SQRdMcZDgT zZxa|ofnhq?$8gbJL~~C6A%KwJ|J6Xpn>g-cbS6qKWES9Bhzp5-2KC35ui$?>E?*}) z4xMGF>*8I4e%WHd%EU_1^H@a-_aMr>cA=JG92 z)aN$89G~AWv*F=Rk+;?5-8VA(o!sd3pxohfNXFJiMF6?j%rd)J;r0cN?^~DMeWbI+ Wy#|t^{i|{0&vJJf{-%Wo#s33;RQ9z1 diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c index 536e45ff..16876601 100644 --- a/contrib/largeNbDicts/largeNbDicts.c +++ b/contrib/largeNbDicts/largeNbDicts.c @@ -12,7 +12,7 @@ * This is a benchmark test tool * dedicated to the specific case of dictionary decompression * using a very large nb of dictionaries - * thus generating many cache-misses. + * thus suffering latency from lots of cache misses. * It's created in a bid to investigate performance and find optimizations. */ @@ -24,6 +24,7 @@ #include /* assert */ #include "util.h" +#include "bench.h" #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" #include "zdict.h" @@ -158,6 +159,17 @@ buffer_collection_t splitBuffer(buffer_t srcBuffer, size_t blockSize) return result; } +/* shrinkSizes() : + * update sizes in buffer collection */ +void shrinkSizes(buffer_collection_t collection, + const size_t* sizes) /* presumed same size as collection */ +{ + size_t const nbBlocks = collection.nbBuffers; + for (size_t blockNb = 0; blockNb < nbBlocks; blockNb++) { + assert(sizes[blockNb] <= collection.capacities[blockNb]); + collection.capacities[blockNb] = sizes[blockNb]; + } +} /*--- dictionary creation ---*/ @@ -221,13 +233,30 @@ static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t d } +/* mess with adresses, so that linear scanning dictionaries != linear address scanning */ +void shuffleDictionaries(ddict_collection_t dicts) +{ + size_t const nbDicts = dicts.nbDDict; + for (size_t r=0; rdctx, + dst, dstCapacity, + src, srcSize, + di->dictionaries.ddicts[di->blockNb]); + + di->blockNb = di->blockNb + 1; + if (di->blockNb >= di->nbBlocks) di->blockNb = 0; + + return result; +} + + +#define BENCH_TIME_DEFAULT_MS 6000 +#define RUN_TIME_DEFAULT_MS 1000 + +static int benchMem(buffer_collection_t dstBlocks, + buffer_collection_t srcBlocks, + ddict_collection_t dictionaries) +{ + assert(dstBlocks.nbBuffers == srcBlocks.nbBuffers); + assert(dstBlocks.nbBuffers == dictionaries.nbDDict); + + double bestSpeed = 0.; + + BMK_timedFnState_t* const benchState = + BMK_createTimedFnState(BENCH_TIME_DEFAULT_MS, RUN_TIME_DEFAULT_MS); + decompressInstructions di = createDecompressInstructions(dictionaries); + + for (;;) { + BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, + decompress, &di, + NULL, NULL, + dstBlocks.nbBuffers, + (const void* const *)srcBlocks.buffers, srcBlocks.capacities, + dstBlocks.buffers, dstBlocks.capacities, + NULL); + + assert(BMK_isSuccessful_runOutcome(outcome)); + BMK_runTime_t const result = BMK_extract_runTime(outcome); + U64 const dTime_ns = result.nanoSecPerRun; + double const dTime_sec = (double)dTime_ns / 1000000000; + size_t const srcSize = result.sumOfReturn; + double const dSpeed_MBps = (double)srcSize / dTime_sec / (1 MB); + if (dSpeed_MBps > bestSpeed) bestSpeed = dSpeed_MBps; + DISPLAY("Decompression Speed : %.1f MB/s \r", bestSpeed); + if (BMK_isCompleted_TimedFn(benchState)) break; + } + DISPLAY("\n"); + + freeDecompressInstructions(di); + BMK_freeTimedFnState(benchState); + + return 0; /* success */ +} + /* bench() : * fileName : file to load for benchmarking purpose @@ -272,8 +387,9 @@ int bench(const char* fileName, const char* dictionary) DISPLAYLEVEL(3, "loading %s... \n", fileName); buffer_t const srcBuffer = createBuffer_fromFile(fileName); assert(srcBuffer.ptr != NULL); + size_t const srcSize = srcBuffer.size; DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n", - (double)(srcBuffer.size) / (1 MB)); + (double)srcSize / (1 MB)); buffer_collection_t const srcBlockBuffers = splitBuffer(srcBuffer, BLOCKSIZE); assert(srcBlockBuffers.buffers != NULL); @@ -302,17 +418,20 @@ int bench(const char* fileName, const char* dictionary) ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, COMP_LEVEL); assert(cdict != NULL); - size_t const cTotalSizeNoDict = compressBlocks(dstBlockBuffers, srcBlockBuffers, NULL, COMP_LEVEL); + size_t const cTotalSizeNoDict = compressBlocks(NULL, dstBlockBuffers, srcBlockBuffers, NULL, COMP_LEVEL); assert(cTotalSizeNoDict != 0); DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n", COMP_LEVEL, - (double)srcBuffer.size / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict); + (double)srcSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict); - size_t const cTotalSize = compressBlocks(dstBlockBuffers, srcBlockBuffers, cdict, COMP_LEVEL); + size_t* const cSizes = malloc(nbBlocks * sizeof(size_t)); + assert(cSizes != NULL); + + size_t const cTotalSize = compressBlocks(cSizes, dstBlockBuffers, srcBlockBuffers, cdict, COMP_LEVEL); assert(cTotalSize != 0); DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n", (unsigned)dictBuffer.size, - (double)srcBuffer.size / cTotalSize, (unsigned)cTotalSize); + (double)srcSize / cTotalSize, (unsigned)cTotalSize); size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, ZSTD_dlm_byCopy); size_t const allDictMem = dictMem * nbBlocks; @@ -322,13 +441,28 @@ int bench(const char* fileName, const char* dictionary) ddict_collection_t const dictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbBlocks); assert(dictionaries.ddicts != NULL); + shuffleDictionaries(dictionaries); + // for (size_t u = 0; u < dictionaries.nbDDict; u++) DISPLAY("dict address : %p \n", dictionaries.ddicts[u]); /* check dictionary addresses */ + void* const resultPtr = malloc(srcSize); + assert(resultPtr != NULL); + buffer_t resultBuffer; + resultBuffer.ptr = resultPtr; + resultBuffer.capacity = srcSize; + resultBuffer.size = srcSize; - //result = benchMem(srcBlockBuffers, dstBlockBuffers, dictionaries);; + buffer_collection_t const resultBlockBuffers = splitBuffer(resultBuffer, BLOCKSIZE); + assert(resultBlockBuffers.buffers != NULL); + shrinkSizes(dstBlockBuffers, cSizes); + result = benchMem(resultBlockBuffers, dstBlockBuffers, dictionaries); + /* free all heap objects in reverse order */ + freeCollection(resultBlockBuffers); + free(resultPtr); freeDDictCollection(dictionaries); + free(cSizes); ZSTD_freeCDict(cdict); freeBuffer(dictBuffer); freeCollection(dstBlockBuffers); @@ -342,7 +476,7 @@ int bench(const char* fileName, const char* dictionary) -/*--- Command Line ---*/ +/* --- Command Line --- */ int bad_usage(const char* exeName) { diff --git a/programs/bench.c b/programs/bench.c index b3a8222d..5ff9afac 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -253,7 +253,7 @@ static size_t local_defaultCompress( /* `addArgs` is the context */ static size_t local_defaultDecompress( const void* srcBuffer, size_t srcSize, - void* dstBuffer, size_t dstSize, + void* dstBuffer, size_t dstCapacity, void* addArgs) { size_t moreToFlush = 1; @@ -261,7 +261,7 @@ static size_t local_defaultDecompress( ZSTD_inBuffer in; ZSTD_outBuffer out; in.src = srcBuffer; in.size = srcSize; in.pos = 0; - out.dst = dstBuffer; out.size = dstSize; out.pos = 0; + out.dst = dstBuffer; out.size = dstCapacity; out.pos = 0; while (moreToFlush) { if(out.pos == out.size) { return (size_t)-ZSTD_error_dstSize_tooSmall;