Merge pull request #1853 from facebook/ahmed_file

--filelist=FILE feature
dev
Yann Collet 2019-11-26 20:57:25 -08:00 committed by GitHub
commit 1d6070463f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 979 additions and 3937 deletions

View File

@ -1,44 +0,0 @@
ARG :=
CC ?= gcc
CFLAGS ?= -O3
INCLUDES := -I ../randomDictBuilder -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
RANDOM_FILE := ../randomDictBuilder/random.c
IO_FILE := ../randomDictBuilder/io.c
all: run clean
.PHONY: run
run: benchmark
echo "Benchmarking with $(ARG)"
./benchmark $(ARG)
.PHONY: test
test: benchmarkTest clean
.PHONY: benchmarkTest
benchmarkTest: benchmark test.sh
sh test.sh
benchmark: benchmark.o io.o random.o libzstd.a
$(CC) $(CFLAGS) benchmark.o io.o random.o libzstd.a -o benchmark
benchmark.o: benchmark.c
$(CC) $(CFLAGS) $(INCLUDES) -c benchmark.c
random.o: $(RANDOM_FILE)
$(CC) $(CFLAGS) $(INCLUDES) -c $(RANDOM_FILE)
io.o: $(IO_FILE)
$(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)
libzstd.a:
$(MAKE) -C ../../../lib libzstd.a
mv ../../../lib/libzstd.a .
.PHONY: clean
clean:
rm -f *.o benchmark libzstd.a
$(MAKE) -C ../../../lib clean
echo "Cleaning is completed"

View File

@ -1,849 +0,0 @@
Benchmarking Dictionary Builder
### Permitted Argument:
Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
###Running Test:
make test
###Usage:
Benchmark given input files: make ARG= followed by permitted arguments
### Examples:
make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
###Benchmarking Result:
- First Cover is optimize cover, second Cover uses optimized d and k from first one.
- For every f value of fastCover, the first one is optimize fastCover and the second one uses optimized d and k from first one. This is run for accel values from 1 to 10.
- Fourth column is chosen d and fifth column is chosen k
github:
NODICT 0.000004 2.999642
RANDOM 0.024560 8.791189
LEGACY 0.727109 8.173529
COVER 40.565676 10.652243 8 1298
COVER 3.608284 10.652243 8 1298
FAST f=15 a=1 4.181024 10.570882 8 1154
FAST f=15 a=1 0.040788 10.570882 8 1154
FAST f=15 a=2 3.548352 10.574287 6 1970
FAST f=15 a=2 0.035535 10.574287 6 1970
FAST f=15 a=3 3.287364 10.613950 6 1010
FAST f=15 a=3 0.032182 10.613950 6 1010
FAST f=15 a=4 3.184976 10.573883 6 1058
FAST f=15 a=4 0.029878 10.573883 6 1058
FAST f=15 a=5 3.045513 10.580640 8 1154
FAST f=15 a=5 0.022162 10.580640 8 1154
FAST f=15 a=6 3.003296 10.583677 6 1010
FAST f=15 a=6 0.028091 10.583677 6 1010
FAST f=15 a=7 2.952655 10.622551 6 1106
FAST f=15 a=7 0.02724 10.622551 6 1106
FAST f=15 a=8 2.945674 10.614657 6 1010
FAST f=15 a=8 0.027264 10.614657 6 1010
FAST f=15 a=9 3.153439 10.564018 8 1154
FAST f=15 a=9 0.020635 10.564018 8 1154
FAST f=15 a=10 2.950416 10.511454 6 1010
FAST f=15 a=10 0.026606 10.511454 6 1010
FAST f=16 a=1 3.970029 10.681035 8 1154
FAST f=16 a=1 0.038188 10.681035 8 1154
FAST f=16 a=2 3.422892 10.484978 6 1874
FAST f=16 a=2 0.034702 10.484978 6 1874
FAST f=16 a=3 3.215836 10.632631 8 1154
FAST f=16 a=3 0.026084 10.632631 8 1154
FAST f=16 a=4 3.081353 10.626533 6 1106
FAST f=16 a=4 0.030032 10.626533 6 1106
FAST f=16 a=5 3.041241 10.545027 8 1922
FAST f=16 a=5 0.022882 10.545027 8 1922
FAST f=16 a=6 2.989390 10.638284 6 1874
FAST f=16 a=6 0.028308 10.638284 6 1874
FAST f=16 a=7 3.001581 10.797136 6 1106
FAST f=16 a=7 0.027479 10.797136 6 1106
FAST f=16 a=8 2.984107 10.658356 8 1058
FAST f=16 a=8 0.021099 10.658356 8 1058
FAST f=16 a=9 2.925788 10.523869 6 1010
FAST f=16 a=9 0.026905 10.523869 6 1010
FAST f=16 a=10 2.889605 10.745841 6 1874
FAST f=16 a=10 0.026846 10.745841 6 1874
FAST f=17 a=1 4.031953 10.672080 8 1202
FAST f=17 a=1 0.040658 10.672080 8 1202
FAST f=17 a=2 3.458107 10.589352 8 1106
FAST f=17 a=2 0.02926 10.589352 8 1106
FAST f=17 a=3 3.291189 10.662714 8 1154
FAST f=17 a=3 0.026531 10.662714 8 1154
FAST f=17 a=4 3.154950 10.549456 8 1346
FAST f=17 a=4 0.024991 10.549456 8 1346
FAST f=17 a=5 3.092271 10.541670 6 1202
FAST f=17 a=5 0.038285 10.541670 6 1202
FAST f=17 a=6 3.166146 10.729112 6 1874
FAST f=17 a=6 0.038217 10.729112 6 1874
FAST f=17 a=7 3.035467 10.810485 6 1106
FAST f=17 a=7 0.036655 10.810485 6 1106
FAST f=17 a=8 3.035668 10.530532 6 1058
FAST f=17 a=8 0.037715 10.530532 6 1058
FAST f=17 a=9 2.987917 10.589802 8 1922
FAST f=17 a=9 0.02217 10.589802 8 1922
FAST f=17 a=10 2.981647 10.722579 8 1106
FAST f=17 a=10 0.021948 10.722579 8 1106
FAST f=18 a=1 4.067144 10.634943 8 1154
FAST f=18 a=1 0.041386 10.634943 8 1154
FAST f=18 a=2 3.507377 10.546230 6 1970
FAST f=18 a=2 0.037572 10.546230 6 1970
FAST f=18 a=3 3.323015 10.648061 8 1154
FAST f=18 a=3 0.028306 10.648061 8 1154
FAST f=18 a=4 3.216735 10.705402 6 1010
FAST f=18 a=4 0.030755 10.705402 6 1010
FAST f=18 a=5 3.175794 10.588154 8 1874
FAST f=18 a=5 0.025315 10.588154 8 1874
FAST f=18 a=6 3.127459 10.751104 8 1106
FAST f=18 a=6 0.023897 10.751104 8 1106
FAST f=18 a=7 3.083017 10.780402 6 1106
FAST f=18 a=7 0.029158 10.780402 6 1106
FAST f=18 a=8 3.069700 10.547226 8 1346
FAST f=18 a=8 0.024046 10.547226 8 1346
FAST f=18 a=9 3.056591 10.674759 6 1010
FAST f=18 a=9 0.028496 10.674759 6 1010
FAST f=18 a=10 3.063588 10.737578 8 1106
FAST f=18 a=10 0.023033 10.737578 8 1106
FAST f=19 a=1 4.164041 10.650333 8 1154
FAST f=19 a=1 0.042906 10.650333 8 1154
FAST f=19 a=2 3.585409 10.577066 6 1058
FAST f=19 a=2 0.038994 10.577066 6 1058
FAST f=19 a=3 3.439643 10.639403 8 1154
FAST f=19 a=3 0.028427 10.639403 8 1154
FAST f=19 a=4 3.268869 10.554410 8 1298
FAST f=19 a=4 0.026866 10.554410 8 1298
FAST f=19 a=5 3.238225 10.615109 6 1010
FAST f=19 a=5 0.03078 10.615109 6 1010
FAST f=19 a=6 3.199558 10.609782 6 1874
FAST f=19 a=6 0.030099 10.609782 6 1874
FAST f=19 a=7 3.132395 10.794753 6 1106
FAST f=19 a=7 0.028964 10.794753 6 1106
FAST f=19 a=8 3.148446 10.554842 8 1298
FAST f=19 a=8 0.024277 10.554842 8 1298
FAST f=19 a=9 3.108324 10.668763 6 1010
FAST f=19 a=9 0.02896 10.668763 6 1010
FAST f=19 a=10 3.159863 10.757347 8 1106
FAST f=19 a=10 0.023351 10.757347 8 1106
FAST f=20 a=1 4.462698 10.661788 8 1154
FAST f=20 a=1 0.047174 10.661788 8 1154
FAST f=20 a=2 3.820269 10.678612 6 1106
FAST f=20 a=2 0.040807 10.678612 6 1106
FAST f=20 a=3 3.644955 10.648424 8 1154
FAST f=20 a=3 0.031398 10.648424 8 1154
FAST f=20 a=4 3.546257 10.559756 8 1298
FAST f=20 a=4 0.029856 10.559756 8 1298
FAST f=20 a=5 3.485248 10.646637 6 1010
FAST f=20 a=5 0.033756 10.646637 6 1010
FAST f=20 a=6 3.490438 10.775824 8 1106
FAST f=20 a=6 0.028338 10.775824 8 1106
FAST f=20 a=7 3.631289 10.801795 6 1106
FAST f=20 a=7 0.035228 10.801795 6 1106
FAST f=20 a=8 3.758936 10.545116 8 1346
FAST f=20 a=8 0.027495 10.545116 8 1346
FAST f=20 a=9 3.707024 10.677454 6 1010
FAST f=20 a=9 0.031326 10.677454 6 1010
FAST f=20 a=10 3.586593 10.756017 8 1106
FAST f=20 a=10 0.027122 10.756017 8 1106
FAST f=21 a=1 5.701396 10.655398 8 1154
FAST f=21 a=1 0.067744 10.655398 8 1154
FAST f=21 a=2 5.270542 10.650743 6 1106
FAST f=21 a=2 0.052999 10.650743 6 1106
FAST f=21 a=3 4.945294 10.652380 8 1154
FAST f=21 a=3 0.052678 10.652380 8 1154
FAST f=21 a=4 4.894079 10.543185 8 1298
FAST f=21 a=4 0.04997 10.543185 8 1298
FAST f=21 a=5 4.785417 10.630321 6 1010
FAST f=21 a=5 0.045294 10.630321 6 1010
FAST f=21 a=6 4.789381 10.664477 6 1874
FAST f=21 a=6 0.046578 10.664477 6 1874
FAST f=21 a=7 4.302955 10.805179 6 1106
FAST f=21 a=7 0.041205 10.805179 6 1106
FAST f=21 a=8 4.034630 10.551211 8 1298
FAST f=21 a=8 0.040121 10.551211 8 1298
FAST f=21 a=9 4.523868 10.799114 6 1010
FAST f=21 a=9 0.043592 10.799114 6 1010
FAST f=21 a=10 4.760736 10.750255 8 1106
FAST f=21 a=10 0.043483 10.750255 8 1106
FAST f=22 a=1 6.743064 10.640537 8 1154
FAST f=22 a=1 0.086967 10.640537 8 1154
FAST f=22 a=2 6.121739 10.626638 6 1970
FAST f=22 a=2 0.066337 10.626638 6 1970
FAST f=22 a=3 5.248851 10.640688 8 1154
FAST f=22 a=3 0.054935 10.640688 8 1154
FAST f=22 a=4 5.436579 10.588333 8 1298
FAST f=22 a=4 0.064113 10.588333 8 1298
FAST f=22 a=5 5.812815 10.652653 6 1010
FAST f=22 a=5 0.058189 10.652653 6 1010
FAST f=22 a=6 5.745472 10.666437 6 1874
FAST f=22 a=6 0.057188 10.666437 6 1874
FAST f=22 a=7 5.716393 10.806911 6 1106
FAST f=22 a=7 0.056 10.806911 6 1106
FAST f=22 a=8 5.698799 10.530784 8 1298
FAST f=22 a=8 0.0583 10.530784 8 1298
FAST f=22 a=9 5.710533 10.777391 6 1010
FAST f=22 a=9 0.054945 10.777391 6 1010
FAST f=22 a=10 5.685395 10.745023 8 1106
FAST f=22 a=10 0.056526 10.745023 8 1106
FAST f=23 a=1 7.836923 10.638828 8 1154
FAST f=23 a=1 0.099522 10.638828 8 1154
FAST f=23 a=2 6.627834 10.631061 6 1970
FAST f=23 a=2 0.066769 10.631061 6 1970
FAST f=23 a=3 5.602533 10.647288 8 1154
FAST f=23 a=3 0.064513 10.647288 8 1154
FAST f=23 a=4 6.005580 10.568747 8 1298
FAST f=23 a=4 0.062022 10.568747 8 1298
FAST f=23 a=5 5.481816 10.676921 6 1010
FAST f=23 a=5 0.058959 10.676921 6 1010
FAST f=23 a=6 5.460444 10.666194 6 1874
FAST f=23 a=6 0.057687 10.666194 6 1874
FAST f=23 a=7 5.659822 10.800377 6 1106
FAST f=23 a=7 0.06783 10.800377 6 1106
FAST f=23 a=8 6.826940 10.522167 8 1298
FAST f=23 a=8 0.070533 10.522167 8 1298
FAST f=23 a=9 6.804757 10.577799 8 1682
FAST f=23 a=9 0.069949 10.577799 8 1682
FAST f=23 a=10 6.774933 10.742093 8 1106
FAST f=23 a=10 0.068395 10.742093 8 1106
FAST f=24 a=1 8.444110 10.632783 8 1154
FAST f=24 a=1 0.094357 10.632783 8 1154
FAST f=24 a=2 7.289578 10.631061 6 1970
FAST f=24 a=2 0.098515 10.631061 6 1970
FAST f=24 a=3 8.619780 10.646289 8 1154
FAST f=24 a=3 0.098041 10.646289 8 1154
FAST f=24 a=4 8.508455 10.555199 8 1298
FAST f=24 a=4 0.093885 10.555199 8 1298
FAST f=24 a=5 8.471145 10.674363 6 1010
FAST f=24 a=5 0.088676 10.674363 6 1010
FAST f=24 a=6 8.426727 10.667228 6 1874
FAST f=24 a=6 0.087247 10.667228 6 1874
FAST f=24 a=7 8.356826 10.803027 6 1106
FAST f=24 a=7 0.085835 10.803027 6 1106
FAST f=24 a=8 6.756811 10.522049 8 1298
FAST f=24 a=8 0.07107 10.522049 8 1298
FAST f=24 a=9 6.548169 10.571882 8 1682
FAST f=24 a=9 0.0713 10.571882 8 1682
FAST f=24 a=10 8.238079 10.736453 8 1106
FAST f=24 a=10 0.07004 10.736453 8 1106
hg-commands:
NODICT 0.000005 2.425276
RANDOM 0.046332 3.490331
LEGACY 0.720351 3.911682
COVER 45.507731 4.132653 8 386
COVER 1.868810 4.132653 8 386
FAST f=15 a=1 4.561427 3.866894 8 1202
FAST f=15 a=1 0.048946 3.866894 8 1202
FAST f=15 a=2 3.574462 3.892119 8 1538
FAST f=15 a=2 0.033677 3.892119 8 1538
FAST f=15 a=3 3.230227 3.888791 6 1346
FAST f=15 a=3 0.034312 3.888791 6 1346
FAST f=15 a=4 3.042388 3.899739 8 1010
FAST f=15 a=4 0.024307 3.899739 8 1010
FAST f=15 a=5 2.800148 3.896220 8 818
FAST f=15 a=5 0.022331 3.896220 8 818
FAST f=15 a=6 2.706518 3.882039 8 578
FAST f=15 a=6 0.020955 3.882039 8 578
FAST f=15 a=7 2.701820 3.885430 6 866
FAST f=15 a=7 0.026074 3.885430 6 866
FAST f=15 a=8 2.604445 3.906932 8 1826
FAST f=15 a=8 0.021789 3.906932 8 1826
FAST f=15 a=9 2.598568 3.870324 6 1682
FAST f=15 a=9 0.026004 3.870324 6 1682
FAST f=15 a=10 2.575920 3.920783 8 1442
FAST f=15 a=10 0.020228 3.920783 8 1442
FAST f=16 a=1 4.630623 4.001430 8 770
FAST f=16 a=1 0.047497 4.001430 8 770
FAST f=16 a=2 3.674721 3.974431 8 1874
FAST f=16 a=2 0.035761 3.974431 8 1874
FAST f=16 a=3 3.338384 3.978703 8 1010
FAST f=16 a=3 0.029436 3.978703 8 1010
FAST f=16 a=4 3.004412 3.983035 8 1010
FAST f=16 a=4 0.025744 3.983035 8 1010
FAST f=16 a=5 2.881892 3.987710 8 770
FAST f=16 a=5 0.023211 3.987710 8 770
FAST f=16 a=6 2.807410 3.952717 8 1298
FAST f=16 a=6 0.023199 3.952717 8 1298
FAST f=16 a=7 2.819623 3.994627 8 770
FAST f=16 a=7 0.021806 3.994627 8 770
FAST f=16 a=8 2.740092 3.954032 8 1826
FAST f=16 a=8 0.0226 3.954032 8 1826
FAST f=16 a=9 2.682564 3.969879 6 1442
FAST f=16 a=9 0.026324 3.969879 6 1442
FAST f=16 a=10 2.657959 3.969755 8 674
FAST f=16 a=10 0.020413 3.969755 8 674
FAST f=17 a=1 4.729228 4.046000 8 530
FAST f=17 a=1 0.049703 4.046000 8 530
FAST f=17 a=2 3.764510 3.991519 8 1970
FAST f=17 a=2 0.038195 3.991519 8 1970
FAST f=17 a=3 3.416992 4.006296 6 914
FAST f=17 a=3 0.036244 4.006296 6 914
FAST f=17 a=4 3.145626 3.979182 8 1970
FAST f=17 a=4 0.028676 3.979182 8 1970
FAST f=17 a=5 2.995070 4.050070 8 770
FAST f=17 a=5 0.025707 4.050070 8 770
FAST f=17 a=6 2.911833 4.040024 8 770
FAST f=17 a=6 0.02453 4.040024 8 770
FAST f=17 a=7 2.894796 4.015884 8 818
FAST f=17 a=7 0.023956 4.015884 8 818
FAST f=17 a=8 2.789962 4.039303 8 530
FAST f=17 a=8 0.023219 4.039303 8 530
FAST f=17 a=9 2.787625 3.996762 8 1634
FAST f=17 a=9 0.023651 3.996762 8 1634
FAST f=17 a=10 2.754796 4.005059 8 1058
FAST f=17 a=10 0.022537 4.005059 8 1058
FAST f=18 a=1 4.779117 4.038214 8 242
FAST f=18 a=1 0.048814 4.038214 8 242
FAST f=18 a=2 3.829753 4.045768 8 722
FAST f=18 a=2 0.036541 4.045768 8 722
FAST f=18 a=3 3.495053 4.021497 8 770
FAST f=18 a=3 0.032648 4.021497 8 770
FAST f=18 a=4 3.221395 4.039623 8 770
FAST f=18 a=4 0.027818 4.039623 8 770
FAST f=18 a=5 3.059369 4.050414 8 530
FAST f=18 a=5 0.026296 4.050414 8 530
FAST f=18 a=6 3.019292 4.010714 6 962
FAST f=18 a=6 0.031104 4.010714 6 962
FAST f=18 a=7 2.949322 4.031439 6 770
FAST f=18 a=7 0.030745 4.031439 6 770
FAST f=18 a=8 2.876425 4.032088 6 386
FAST f=18 a=8 0.027407 4.032088 6 386
FAST f=18 a=9 2.850958 4.053372 8 674
FAST f=18 a=9 0.023799 4.053372 8 674
FAST f=18 a=10 2.884352 4.020148 8 1730
FAST f=18 a=10 0.024401 4.020148 8 1730
FAST f=19 a=1 4.815669 4.061203 8 674
FAST f=19 a=1 0.051425 4.061203 8 674
FAST f=19 a=2 3.951356 4.013822 8 1442
FAST f=19 a=2 0.039968 4.013822 8 1442
FAST f=19 a=3 3.554682 4.050425 8 722
FAST f=19 a=3 0.032725 4.050425 8 722
FAST f=19 a=4 3.242585 4.054677 8 722
FAST f=19 a=4 0.028194 4.054677 8 722
FAST f=19 a=5 3.105909 4.064524 8 818
FAST f=19 a=5 0.02675 4.064524 8 818
FAST f=19 a=6 3.059901 4.036857 8 1250
FAST f=19 a=6 0.026396 4.036857 8 1250
FAST f=19 a=7 3.016151 4.068234 6 770
FAST f=19 a=7 0.031501 4.068234 6 770
FAST f=19 a=8 2.962902 4.077509 8 530
FAST f=19 a=8 0.023333 4.077509 8 530
FAST f=19 a=9 2.899607 4.067328 8 530
FAST f=19 a=9 0.024553 4.067328 8 530
FAST f=19 a=10 2.950978 4.059901 8 434
FAST f=19 a=10 0.023852 4.059901 8 434
FAST f=20 a=1 5.259834 4.027579 8 1634
FAST f=20 a=1 0.061123 4.027579 8 1634
FAST f=20 a=2 4.382150 4.025093 8 1634
FAST f=20 a=2 0.048009 4.025093 8 1634
FAST f=20 a=3 4.104323 4.060842 8 530
FAST f=20 a=3 0.040965 4.060842 8 530
FAST f=20 a=4 3.853340 4.023504 6 914
FAST f=20 a=4 0.041072 4.023504 6 914
FAST f=20 a=5 3.728841 4.018089 6 1634
FAST f=20 a=5 0.037469 4.018089 6 1634
FAST f=20 a=6 3.683045 4.069138 8 578
FAST f=20 a=6 0.028011 4.069138 8 578
FAST f=20 a=7 3.726973 4.063160 8 722
FAST f=20 a=7 0.028437 4.063160 8 722
FAST f=20 a=8 3.555073 4.057690 8 386
FAST f=20 a=8 0.027588 4.057690 8 386
FAST f=20 a=9 3.551095 4.067253 8 482
FAST f=20 a=9 0.025976 4.067253 8 482
FAST f=20 a=10 3.490127 4.068518 8 530
FAST f=20 a=10 0.025971 4.068518 8 530
FAST f=21 a=1 7.343816 4.064945 8 770
FAST f=21 a=1 0.085035 4.064945 8 770
FAST f=21 a=2 5.930894 4.048206 8 386
FAST f=21 a=2 0.067349 4.048206 8 386
FAST f=21 a=3 6.770775 4.063417 8 578
FAST f=21 a=3 0.077104 4.063417 8 578
FAST f=21 a=4 6.889409 4.066761 8 626
FAST f=21 a=4 0.0717 4.066761 8 626
FAST f=21 a=5 6.714896 4.051813 8 914
FAST f=21 a=5 0.071026 4.051813 8 914
FAST f=21 a=6 6.539890 4.047263 8 1922
FAST f=21 a=6 0.07127 4.047263 8 1922
FAST f=21 a=7 6.511052 4.068373 8 482
FAST f=21 a=7 0.065467 4.068373 8 482
FAST f=21 a=8 6.458788 4.071597 8 482
FAST f=21 a=8 0.063817 4.071597 8 482
FAST f=21 a=9 6.377591 4.052905 8 434
FAST f=21 a=9 0.063112 4.052905 8 434
FAST f=21 a=10 6.360752 4.047773 8 530
FAST f=21 a=10 0.063606 4.047773 8 530
FAST f=22 a=1 10.523471 4.040812 8 962
FAST f=22 a=1 0.14214 4.040812 8 962
FAST f=22 a=2 9.454758 4.059396 8 914
FAST f=22 a=2 0.118343 4.059396 8 914
FAST f=22 a=3 9.043197 4.043019 8 1922
FAST f=22 a=3 0.109798 4.043019 8 1922
FAST f=22 a=4 8.716261 4.044819 8 770
FAST f=22 a=4 0.099687 4.044819 8 770
FAST f=22 a=5 8.529472 4.070576 8 530
FAST f=22 a=5 0.093127 4.070576 8 530
FAST f=22 a=6 8.424241 4.070565 8 722
FAST f=22 a=6 0.093703 4.070565 8 722
FAST f=22 a=7 8.403391 4.070591 8 578
FAST f=22 a=7 0.089763 4.070591 8 578
FAST f=22 a=8 8.285221 4.089171 8 530
FAST f=22 a=8 0.087716 4.089171 8 530
FAST f=22 a=9 8.282506 4.047470 8 722
FAST f=22 a=9 0.089773 4.047470 8 722
FAST f=22 a=10 8.241809 4.064151 8 818
FAST f=22 a=10 0.090413 4.064151 8 818
FAST f=23 a=1 12.389208 4.051635 6 530
FAST f=23 a=1 0.147796 4.051635 6 530
FAST f=23 a=2 11.300910 4.042835 6 914
FAST f=23 a=2 0.133178 4.042835 6 914
FAST f=23 a=3 10.879455 4.047415 8 626
FAST f=23 a=3 0.129571 4.047415 8 626
FAST f=23 a=4 10.522718 4.038269 6 914
FAST f=23 a=4 0.118121 4.038269 6 914
FAST f=23 a=5 10.348043 4.066884 8 434
FAST f=23 a=5 0.112098 4.066884 8 434
FAST f=23 a=6 10.238630 4.048635 8 1010
FAST f=23 a=6 0.120281 4.048635 8 1010
FAST f=23 a=7 10.213255 4.061809 8 530
FAST f=23 a=7 0.1121 4.061809 8 530
FAST f=23 a=8 10.107879 4.074104 8 818
FAST f=23 a=8 0.116544 4.074104 8 818
FAST f=23 a=9 10.063424 4.064811 8 674
FAST f=23 a=9 0.109045 4.064811 8 674
FAST f=23 a=10 10.035801 4.054918 8 530
FAST f=23 a=10 0.108735 4.054918 8 530
FAST f=24 a=1 14.963878 4.073490 8 722
FAST f=24 a=1 0.206344 4.073490 8 722
FAST f=24 a=2 13.833472 4.036100 8 962
FAST f=24 a=2 0.17486 4.036100 8 962
FAST f=24 a=3 13.404631 4.026281 6 1106
FAST f=24 a=3 0.153961 4.026281 6 1106
FAST f=24 a=4 13.041164 4.065448 8 674
FAST f=24 a=4 0.155509 4.065448 8 674
FAST f=24 a=5 12.879412 4.054636 8 674
FAST f=24 a=5 0.148282 4.054636 8 674
FAST f=24 a=6 12.773736 4.081376 8 530
FAST f=24 a=6 0.142563 4.081376 8 530
FAST f=24 a=7 12.711310 4.059834 8 770
FAST f=24 a=7 0.149321 4.059834 8 770
FAST f=24 a=8 12.635459 4.052050 8 1298
FAST f=24 a=8 0.15095 4.052050 8 1298
FAST f=24 a=9 12.558104 4.076516 8 722
FAST f=24 a=9 0.144361 4.076516 8 722
FAST f=24 a=10 10.661348 4.062137 8 818
FAST f=24 a=10 0.108232 4.062137 8 818
hg-changelog:
NODICT 0.000017 1.377590
RANDOM 0.186171 2.097487
LEGACY 1.670867 2.058907
COVER 173.561948 2.189685 8 98
COVER 4.811180 2.189685 8 98
FAST f=15 a=1 18.685906 2.129682 8 434
FAST f=15 a=1 0.173376 2.129682 8 434
FAST f=15 a=2 12.928259 2.131890 8 482
FAST f=15 a=2 0.102582 2.131890 8 482
FAST f=15 a=3 11.132343 2.128027 8 386
FAST f=15 a=3 0.077122 2.128027 8 386
FAST f=15 a=4 10.120683 2.125797 8 434
FAST f=15 a=4 0.065175 2.125797 8 434
FAST f=15 a=5 9.479092 2.127697 8 386
FAST f=15 a=5 0.057905 2.127697 8 386
FAST f=15 a=6 9.159523 2.127132 8 1682
FAST f=15 a=6 0.058604 2.127132 8 1682
FAST f=15 a=7 8.724003 2.129914 8 434
FAST f=15 a=7 0.0493 2.129914 8 434
FAST f=15 a=8 8.595001 2.127137 8 338
FAST f=15 a=8 0.0474 2.127137 8 338
FAST f=15 a=9 8.356405 2.125512 8 482
FAST f=15 a=9 0.046126 2.125512 8 482
FAST f=15 a=10 8.207111 2.126066 8 338
FAST f=15 a=10 0.043292 2.126066 8 338
FAST f=16 a=1 18.464436 2.144040 8 242
FAST f=16 a=1 0.172156 2.144040 8 242
FAST f=16 a=2 12.844825 2.148171 8 194
FAST f=16 a=2 0.099619 2.148171 8 194
FAST f=16 a=3 11.082568 2.140837 8 290
FAST f=16 a=3 0.079165 2.140837 8 290
FAST f=16 a=4 10.066749 2.144405 8 386
FAST f=16 a=4 0.068411 2.144405 8 386
FAST f=16 a=5 9.501121 2.140720 8 386
FAST f=16 a=5 0.061316 2.140720 8 386
FAST f=16 a=6 9.179332 2.139478 8 386
FAST f=16 a=6 0.056322 2.139478 8 386
FAST f=16 a=7 8.849438 2.142412 8 194
FAST f=16 a=7 0.050493 2.142412 8 194
FAST f=16 a=8 8.810919 2.143454 8 434
FAST f=16 a=8 0.051304 2.143454 8 434
FAST f=16 a=9 8.553900 2.140339 8 194
FAST f=16 a=9 0.047285 2.140339 8 194
FAST f=16 a=10 8.398027 2.143130 8 386
FAST f=16 a=10 0.046386 2.143130 8 386
FAST f=17 a=1 18.644657 2.157192 8 98
FAST f=17 a=1 0.173884 2.157192 8 98
FAST f=17 a=2 13.071242 2.159830 8 146
FAST f=17 a=2 0.10388 2.159830 8 146
FAST f=17 a=3 11.332366 2.153654 6 194
FAST f=17 a=3 0.08983 2.153654 6 194
FAST f=17 a=4 10.362413 2.156813 8 242
FAST f=17 a=4 0.070389 2.156813 8 242
FAST f=17 a=5 9.808159 2.155098 6 338
FAST f=17 a=5 0.072661 2.155098 6 338
FAST f=17 a=6 9.451165 2.153845 6 146
FAST f=17 a=6 0.064959 2.153845 6 146
FAST f=17 a=7 9.163097 2.155424 6 242
FAST f=17 a=7 0.064323 2.155424 6 242
FAST f=17 a=8 9.047276 2.156640 8 242
FAST f=17 a=8 0.053382 2.156640 8 242
FAST f=17 a=9 8.807671 2.152396 8 146
FAST f=17 a=9 0.049617 2.152396 8 146
FAST f=17 a=10 8.649827 2.152370 8 146
FAST f=17 a=10 0.047849 2.152370 8 146
FAST f=18 a=1 18.809502 2.168116 8 98
FAST f=18 a=1 0.175226 2.168116 8 98
FAST f=18 a=2 13.756502 2.170870 6 242
FAST f=18 a=2 0.119507 2.170870 6 242
FAST f=18 a=3 12.059748 2.163094 6 98
FAST f=18 a=3 0.093912 2.163094 6 98
FAST f=18 a=4 11.410294 2.172372 8 98
FAST f=18 a=4 0.073048 2.172372 8 98
FAST f=18 a=5 10.560297 2.166388 8 98
FAST f=18 a=5 0.065136 2.166388 8 98
FAST f=18 a=6 10.071390 2.162672 8 98
FAST f=18 a=6 0.059402 2.162672 8 98
FAST f=18 a=7 10.084214 2.166624 6 194
FAST f=18 a=7 0.073276 2.166624 6 194
FAST f=18 a=8 9.953226 2.167454 8 98
FAST f=18 a=8 0.053659 2.167454 8 98
FAST f=18 a=9 8.982461 2.161593 6 146
FAST f=18 a=9 0.05955 2.161593 6 146
FAST f=18 a=10 8.986092 2.164373 6 242
FAST f=18 a=10 0.059135 2.164373 6 242
FAST f=19 a=1 18.908277 2.176021 8 98
FAST f=19 a=1 0.177316 2.176021 8 98
FAST f=19 a=2 13.471313 2.176103 8 98
FAST f=19 a=2 0.106344 2.176103 8 98
FAST f=19 a=3 11.571406 2.172812 8 98
FAST f=19 a=3 0.083293 2.172812 8 98
FAST f=19 a=4 10.632775 2.177770 6 146
FAST f=19 a=4 0.079864 2.177770 6 146
FAST f=19 a=5 10.030190 2.175574 6 146
FAST f=19 a=5 0.07223 2.175574 6 146
FAST f=19 a=6 9.717818 2.169997 8 98
FAST f=19 a=6 0.060049 2.169997 8 98
FAST f=19 a=7 9.397531 2.172770 8 146
FAST f=19 a=7 0.057188 2.172770 8 146
FAST f=19 a=8 9.281061 2.175822 8 98
FAST f=19 a=8 0.053711 2.175822 8 98
FAST f=19 a=9 9.165242 2.169849 6 146
FAST f=19 a=9 0.059898 2.169849 6 146
FAST f=19 a=10 9.048763 2.173394 8 98
FAST f=19 a=10 0.049757 2.173394 8 98
FAST f=20 a=1 21.166917 2.183923 6 98
FAST f=20 a=1 0.205425 2.183923 6 98
FAST f=20 a=2 15.642753 2.182349 6 98
FAST f=20 a=2 0.135957 2.182349 6 98
FAST f=20 a=3 14.053730 2.173544 6 98
FAST f=20 a=3 0.11266 2.173544 6 98
FAST f=20 a=4 15.270019 2.183656 8 98
FAST f=20 a=4 0.107892 2.183656 8 98
FAST f=20 a=5 15.497927 2.174661 6 98
FAST f=20 a=5 0.100305 2.174661 6 98
FAST f=20 a=6 13.973505 2.172391 8 98
FAST f=20 a=6 0.087565 2.172391 8 98
FAST f=20 a=7 14.083296 2.172443 8 98
FAST f=20 a=7 0.078062 2.172443 8 98
FAST f=20 a=8 12.560048 2.175581 8 98
FAST f=20 a=8 0.070282 2.175581 8 98
FAST f=20 a=9 13.078645 2.173975 6 146
FAST f=20 a=9 0.081041 2.173975 6 146
FAST f=20 a=10 12.823328 2.177778 8 98
FAST f=20 a=10 0.074522 2.177778 8 98
FAST f=21 a=1 29.825370 2.183057 6 98
FAST f=21 a=1 0.334453 2.183057 6 98
FAST f=21 a=2 29.476474 2.182752 8 98
FAST f=21 a=2 0.286602 2.182752 8 98
FAST f=21 a=3 25.937186 2.175867 8 98
FAST f=21 a=3 0.17626 2.175867 8 98
FAST f=21 a=4 20.413865 2.179780 8 98
FAST f=21 a=4 0.206085 2.179780 8 98
FAST f=21 a=5 20.541889 2.178328 6 146
FAST f=21 a=5 0.199157 2.178328 6 146
FAST f=21 a=6 21.090670 2.174443 6 146
FAST f=21 a=6 0.190645 2.174443 6 146
FAST f=21 a=7 20.221569 2.177384 6 146
FAST f=21 a=7 0.184278 2.177384 6 146
FAST f=21 a=8 20.322357 2.179456 6 98
FAST f=21 a=8 0.178458 2.179456 6 98
FAST f=21 a=9 20.683912 2.174396 6 146
FAST f=21 a=9 0.190829 2.174396 6 146
FAST f=21 a=10 20.840865 2.174905 8 98
FAST f=21 a=10 0.172515 2.174905 8 98
FAST f=22 a=1 36.822827 2.181612 6 98
FAST f=22 a=1 0.437389 2.181612 6 98
FAST f=22 a=2 30.616902 2.183142 8 98
FAST f=22 a=2 0.324284 2.183142 8 98
FAST f=22 a=3 28.472482 2.178130 8 98
FAST f=22 a=3 0.236538 2.178130 8 98
FAST f=22 a=4 25.847028 2.181878 8 98
FAST f=22 a=4 0.263744 2.181878 8 98
FAST f=22 a=5 27.095881 2.180775 8 98
FAST f=22 a=5 0.24988 2.180775 8 98
FAST f=22 a=6 25.939172 2.170916 8 98
FAST f=22 a=6 0.240033 2.170916 8 98
FAST f=22 a=7 27.064194 2.177849 8 98
FAST f=22 a=7 0.242383 2.177849 8 98
FAST f=22 a=8 25.140221 2.178216 8 98
FAST f=22 a=8 0.237601 2.178216 8 98
FAST f=22 a=9 25.505283 2.177455 6 146
FAST f=22 a=9 0.223217 2.177455 6 146
FAST f=22 a=10 24.529362 2.176705 6 98
FAST f=22 a=10 0.222876 2.176705 6 98
FAST f=23 a=1 39.127310 2.183006 6 98
FAST f=23 a=1 0.417338 2.183006 6 98
FAST f=23 a=2 32.468161 2.183524 6 98
FAST f=23 a=2 0.351645 2.183524 6 98
FAST f=23 a=3 31.577620 2.172604 6 98
FAST f=23 a=3 0.319659 2.172604 6 98
FAST f=23 a=4 30.129247 2.183932 6 98
FAST f=23 a=4 0.307239 2.183932 6 98
FAST f=23 a=5 29.103376 2.183529 6 146
FAST f=23 a=5 0.285533 2.183529 6 146
FAST f=23 a=6 29.776045 2.174367 8 98
FAST f=23 a=6 0.276846 2.174367 8 98
FAST f=23 a=7 28.940407 2.178022 6 146
FAST f=23 a=7 0.274082 2.178022 6 146
FAST f=23 a=8 29.256009 2.179462 6 98
FAST f=23 a=8 0.26949 2.179462 6 98
FAST f=23 a=9 29.347312 2.170407 8 98
FAST f=23 a=9 0.265034 2.170407 8 98
FAST f=23 a=10 29.140081 2.171762 8 98
FAST f=23 a=10 0.259183 2.171762 8 98
FAST f=24 a=1 44.871179 2.182115 6 98
FAST f=24 a=1 0.509433 2.182115 6 98
FAST f=24 a=2 38.694867 2.180549 8 98
FAST f=24 a=2 0.406695 2.180549 8 98
FAST f=24 a=3 38.363769 2.172821 8 98
FAST f=24 a=3 0.359581 2.172821 8 98
FAST f=24 a=4 36.580797 2.184142 8 98
FAST f=24 a=4 0.340614 2.184142 8 98
FAST f=24 a=5 33.125701 2.183301 8 98
FAST f=24 a=5 0.324874 2.183301 8 98
FAST f=24 a=6 34.776068 2.173019 6 146
FAST f=24 a=6 0.340397 2.173019 6 146
FAST f=24 a=7 34.417625 2.176561 6 146
FAST f=24 a=7 0.308223 2.176561 6 146
FAST f=24 a=8 35.470291 2.182161 6 98
FAST f=24 a=8 0.307724 2.182161 6 98
FAST f=24 a=9 34.927252 2.172682 6 146
FAST f=24 a=9 0.300598 2.172682 6 146
FAST f=24 a=10 33.238355 2.173395 6 98
FAST f=24 a=10 0.249916 2.173395 6 98
hg-manifest:
NODICT 0.000004 1.866377
RANDOM 0.696346 2.309436
LEGACY 7.064527 2.506977
COVER 876.312865 2.582528 8 434
COVER 35.684533 2.582528 8 434
FAST f=15 a=1 76.618201 2.404013 8 1202
FAST f=15 a=1 0.700722 2.404013 8 1202
FAST f=15 a=2 49.213058 2.409248 6 1826
FAST f=15 a=2 0.473393 2.409248 6 1826
FAST f=15 a=3 41.753197 2.409677 8 1490
FAST f=15 a=3 0.336848 2.409677 8 1490
FAST f=15 a=4 38.648295 2.407996 8 1538
FAST f=15 a=4 0.283952 2.407996 8 1538
FAST f=15 a=5 36.144936 2.402895 8 1874
FAST f=15 a=5 0.270128 2.402895 8 1874
FAST f=15 a=6 35.484675 2.394873 8 1586
FAST f=15 a=6 0.251637 2.394873 8 1586
FAST f=15 a=7 34.280599 2.397311 8 1778
FAST f=15 a=7 0.23984 2.397311 8 1778
FAST f=15 a=8 32.122572 2.396089 6 1490
FAST f=15 a=8 0.251508 2.396089 6 1490
FAST f=15 a=9 29.909842 2.390092 6 1970
FAST f=15 a=9 0.251233 2.390092 6 1970
FAST f=15 a=10 30.102938 2.400086 6 1682
FAST f=15 a=10 0.23688 2.400086 6 1682
FAST f=16 a=1 67.750401 2.475460 6 1346
FAST f=16 a=1 0.796035 2.475460 6 1346
FAST f=16 a=2 52.812027 2.480860 6 1730
FAST f=16 a=2 0.480384 2.480860 6 1730
FAST f=16 a=3 44.179259 2.469304 8 1970
FAST f=16 a=3 0.332657 2.469304 8 1970
FAST f=16 a=4 37.612728 2.478208 6 1970
FAST f=16 a=4 0.32498 2.478208 6 1970
FAST f=16 a=5 35.056222 2.475568 6 1298
FAST f=16 a=5 0.302824 2.475568 6 1298
FAST f=16 a=6 34.713012 2.486079 8 1730
FAST f=16 a=6 0.24755 2.486079 8 1730
FAST f=16 a=7 33.713687 2.477180 6 1682
FAST f=16 a=7 0.280358 2.477180 6 1682
FAST f=16 a=8 31.571412 2.475418 8 1538
FAST f=16 a=8 0.241241 2.475418 8 1538
FAST f=16 a=9 31.608069 2.478263 8 1922
FAST f=16 a=9 0.241764 2.478263 8 1922
FAST f=16 a=10 31.358002 2.472263 8 1442
FAST f=16 a=10 0.221661 2.472263 8 1442
FAST f=17 a=1 66.185775 2.536085 6 1346
FAST f=17 a=1 0.713549 2.536085 6 1346
FAST f=17 a=2 50.365000 2.546105 8 1298
FAST f=17 a=2 0.467846 2.546105 8 1298
FAST f=17 a=3 42.712843 2.536250 8 1298
FAST f=17 a=3 0.34047 2.536250 8 1298
FAST f=17 a=4 39.514227 2.535555 8 1442
FAST f=17 a=4 0.302989 2.535555 8 1442
FAST f=17 a=5 35.189292 2.524925 8 1202
FAST f=17 a=5 0.273451 2.524925 8 1202
FAST f=17 a=6 35.791683 2.523466 8 1202
FAST f=17 a=6 0.268261 2.523466 8 1202
FAST f=17 a=7 37.416136 2.526625 6 1010
FAST f=17 a=7 0.277558 2.526625 6 1010
FAST f=17 a=8 37.084707 2.533274 6 1250
FAST f=17 a=8 0.285104 2.533274 6 1250
FAST f=17 a=9 34.183814 2.532765 8 1298
FAST f=17 a=9 0.235133 2.532765 8 1298
FAST f=17 a=10 31.149235 2.528722 8 1346
FAST f=17 a=10 0.232679 2.528722 8 1346
FAST f=18 a=1 72.942176 2.559857 6 386
FAST f=18 a=1 0.718618 2.559857 6 386
FAST f=18 a=2 51.690440 2.559572 8 290
FAST f=18 a=2 0.403978 2.559572 8 290
FAST f=18 a=3 45.344908 2.561040 8 962
FAST f=18 a=3 0.357205 2.561040 8 962
FAST f=18 a=4 39.804522 2.558446 8 1010
FAST f=18 a=4 0.310526 2.558446 8 1010
FAST f=18 a=5 38.134888 2.561811 8 626
FAST f=18 a=5 0.273743 2.561811 8 626
FAST f=18 a=6 35.091890 2.555518 8 722
FAST f=18 a=6 0.260135 2.555518 8 722
FAST f=18 a=7 34.639523 2.562938 8 290
FAST f=18 a=7 0.234294 2.562938 8 290
FAST f=18 a=8 36.076431 2.563567 8 1586
FAST f=18 a=8 0.274075 2.563567 8 1586
FAST f=18 a=9 36.376433 2.560950 8 722
FAST f=18 a=9 0.240106 2.560950 8 722
FAST f=18 a=10 32.624790 2.559340 8 578
FAST f=18 a=10 0.234704 2.559340 8 578
FAST f=19 a=1 70.513761 2.572441 8 194
FAST f=19 a=1 0.726112 2.572441 8 194
FAST f=19 a=2 59.263032 2.574560 8 482
FAST f=19 a=2 0.451554 2.574560 8 482
FAST f=19 a=3 51.509594 2.571546 6 194
FAST f=19 a=3 0.393014 2.571546 6 194
FAST f=19 a=4 55.393906 2.573386 8 482
FAST f=19 a=4 0.38819 2.573386 8 482
FAST f=19 a=5 43.201736 2.567589 8 674
FAST f=19 a=5 0.292155 2.567589 8 674
FAST f=19 a=6 42.911687 2.572666 6 434
FAST f=19 a=6 0.303988 2.572666 6 434
FAST f=19 a=7 44.687591 2.573613 6 290
FAST f=19 a=7 0.308721 2.573613 6 290
FAST f=19 a=8 37.372868 2.571039 6 194
FAST f=19 a=8 0.287137 2.571039 6 194
FAST f=19 a=9 36.074230 2.566473 6 482
FAST f=19 a=9 0.280721 2.566473 6 482
FAST f=19 a=10 33.731720 2.570306 8 194
FAST f=19 a=10 0.224073 2.570306 8 194
FAST f=20 a=1 79.670634 2.581146 6 290
FAST f=20 a=1 0.899986 2.581146 6 290
FAST f=20 a=2 58.827141 2.579782 8 386
FAST f=20 a=2 0.602288 2.579782 8 386
FAST f=20 a=3 51.289004 2.579627 8 722
FAST f=20 a=3 0.446091 2.579627 8 722
FAST f=20 a=4 47.711068 2.581508 8 722
FAST f=20 a=4 0.473007 2.581508 8 722
FAST f=20 a=5 47.402929 2.578062 6 434
FAST f=20 a=5 0.497131 2.578062 6 434
FAST f=20 a=6 54.797102 2.577365 8 482
FAST f=20 a=6 0.515061 2.577365 8 482
FAST f=20 a=7 51.370877 2.583050 8 386
FAST f=20 a=7 0.402878 2.583050 8 386
FAST f=20 a=8 51.437931 2.574875 6 242
FAST f=20 a=8 0.453094 2.574875 6 242
FAST f=20 a=9 44.105456 2.576700 6 242
FAST f=20 a=9 0.456633 2.576700 6 242
FAST f=20 a=10 44.447580 2.578305 8 338
FAST f=20 a=10 0.409121 2.578305 8 338
FAST f=21 a=1 113.031686 2.582449 6 242
FAST f=21 a=1 1.456971 2.582449 6 242
FAST f=21 a=2 97.700932 2.582124 8 194
FAST f=21 a=2 1.072078 2.582124 8 194
FAST f=21 a=3 96.563648 2.585479 8 434
FAST f=21 a=3 0.949528 2.585479 8 434
FAST f=21 a=4 90.597813 2.582366 6 386
FAST f=21 a=4 0.76944 2.582366 6 386
FAST f=21 a=5 86.815980 2.579043 8 434
FAST f=21 a=5 0.858167 2.579043 8 434
FAST f=21 a=6 91.235820 2.578378 8 530
FAST f=21 a=6 0.684274 2.578378 8 530
FAST f=21 a=7 84.392788 2.581243 8 386
FAST f=21 a=7 0.814386 2.581243 8 386
FAST f=21 a=8 82.052310 2.582547 8 338
FAST f=21 a=8 0.822633 2.582547 8 338
FAST f=21 a=9 74.696074 2.579319 8 194
FAST f=21 a=9 0.811028 2.579319 8 194
FAST f=21 a=10 76.211170 2.578766 8 290
FAST f=21 a=10 0.809715 2.578766 8 290
FAST f=22 a=1 138.976871 2.580478 8 194
FAST f=22 a=1 1.748932 2.580478 8 194
FAST f=22 a=2 120.164097 2.583633 8 386
FAST f=22 a=2 1.333239 2.583633 8 386
FAST f=22 a=3 111.986474 2.582566 6 194
FAST f=22 a=3 1.305734 2.582566 6 194
FAST f=22 a=4 108.548148 2.583068 6 194
FAST f=22 a=4 1.314026 2.583068 6 194
FAST f=22 a=5 103.173017 2.583495 6 290
FAST f=22 a=5 1.228664 2.583495 6 290
FAST f=22 a=6 108.421262 2.582349 8 530
FAST f=22 a=6 1.076773 2.582349 8 530
FAST f=22 a=7 103.284127 2.581022 8 386
FAST f=22 a=7 1.112117 2.581022 8 386
FAST f=22 a=8 96.330279 2.581073 8 290
FAST f=22 a=8 1.109303 2.581073 8 290
FAST f=22 a=9 97.651348 2.580075 6 194
FAST f=22 a=9 0.933032 2.580075 6 194
FAST f=22 a=10 101.660621 2.584886 8 194
FAST f=22 a=10 0.796823 2.584886 8 194
FAST f=23 a=1 159.322978 2.581474 6 242
FAST f=23 a=1 2.015878 2.581474 6 242
FAST f=23 a=2 134.331775 2.581619 8 194
FAST f=23 a=2 1.545845 2.581619 8 194
FAST f=23 a=3 127.724552 2.579888 6 338
FAST f=23 a=3 1.444496 2.579888 6 338
FAST f=23 a=4 126.077675 2.578137 6 242
FAST f=23 a=4 1.364394 2.578137 6 242
FAST f=23 a=5 124.914027 2.580843 8 338
FAST f=23 a=5 1.116059 2.580843 8 338
FAST f=23 a=6 122.874153 2.577637 6 338
FAST f=23 a=6 1.164584 2.577637 6 338
FAST f=23 a=7 123.099257 2.582715 6 386
FAST f=23 a=7 1.354042 2.582715 6 386
FAST f=23 a=8 122.026753 2.577681 8 194
FAST f=23 a=8 1.210966 2.577681 8 194
FAST f=23 a=9 121.164312 2.584599 6 290
FAST f=23 a=9 1.174859 2.584599 6 290
FAST f=23 a=10 117.462222 2.580358 8 194
FAST f=23 a=10 1.075258 2.580358 8 194
FAST f=24 a=1 169.539659 2.581642 6 194
FAST f=24 a=1 1.916804 2.581642 6 194
FAST f=24 a=2 160.539270 2.580421 6 290
FAST f=24 a=2 1.71087 2.580421 6 290
FAST f=24 a=3 155.455874 2.580449 6 242
FAST f=24 a=3 1.60307 2.580449 6 242
FAST f=24 a=4 147.630320 2.582953 6 338
FAST f=24 a=4 1.396364 2.582953 6 338
FAST f=24 a=5 133.767428 2.580589 6 290
FAST f=24 a=5 1.19933 2.580589 6 290
FAST f=24 a=6 146.437535 2.579453 8 194
FAST f=24 a=6 1.385405 2.579453 8 194
FAST f=24 a=7 147.227507 2.584155 8 386
FAST f=24 a=7 1.48942 2.584155 8 386
FAST f=24 a=8 138.005773 2.584115 8 194
FAST f=24 a=8 1.352 2.584115 8 194
FAST f=24 a=9 141.442625 2.582902 8 290
FAST f=24 a=9 1.39647 2.582902 8 290
FAST f=24 a=10 142.157446 2.582701 8 434
FAST f=24 a=10 1.498889 2.582701 8 434

View File

@ -1,442 +0,0 @@
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* strcmp, strlen */
#include <errno.h> /* errno */
#include <ctype.h>
#include <time.h>
#include "random.h"
#include "dictBuilder.h"
#include "zstd_internal.h" /* includes zstd.h */
#include "io.h"
#include "util.h"
#include "zdict.h"
/*-*************************************
* Console display
***************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
if (displayLevel>=4) fflush(stderr); } } }
/*-*************************************
* Exceptions
***************************************/
#ifndef DEBUG
# define DEBUG 0
#endif
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
#define EXM_THROW(error, ...) \
{ \
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
DISPLAY("Error %i : ", error); \
DISPLAY(__VA_ARGS__); \
DISPLAY("\n"); \
exit(error); \
}
/*-*************************************
* Constants
***************************************/
static const unsigned g_defaultMaxDictSize = 110 KB;
#define DEFAULT_CLEVEL 3
#define DEFAULT_DISPLAYLEVEL 2
/*-*************************************
* Struct
***************************************/
typedef struct {
const void* dictBuffer;
size_t dictSize;
} dictInfo;
/*-*************************************
* Dictionary related operations
***************************************/
/** createDictFromFiles() :
* Based on type of param given, train dictionary using the corresponding algorithm
* @return dictInfo containing dictionary buffer and dictionary size
*/
dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize,
ZDICT_random_params_t *randomParams, ZDICT_cover_params_t *coverParams,
ZDICT_legacy_params_t *legacyParams, ZDICT_fastCover_params_t *fastParams) {
unsigned const displayLevel = randomParams ? randomParams->zParams.notificationLevel :
coverParams ? coverParams->zParams.notificationLevel :
legacyParams ? legacyParams->zParams.notificationLevel :
fastParams ? fastParams->zParams.notificationLevel :
DEFAULT_DISPLAYLEVEL; /* no dict */
void* const dictBuffer = malloc(maxDictSize);
dictInfo* dInfo = NULL;
/* Checks */
if (!dictBuffer)
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
{ size_t dictSize;
if(randomParams) {
dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, *randomParams);
}else if(coverParams) {
/* Run the optimize version if either k or d is not provided */
if (!coverParams->d || !coverParams->k){
dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, coverParams);
} else {
dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, *coverParams);
}
} else if(legacyParams) {
dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, *legacyParams);
} else if(fastParams) {
/* Run the optimize version if either k or d is not provided */
if (!fastParams->d || !fastParams->k) {
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, fastParams);
} else {
dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, *fastParams);
}
} else {
dictSize = 0;
}
if (ZDICT_isError(dictSize)) {
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
free(dictBuffer);
return dInfo;
}
dInfo = (dictInfo *)malloc(sizeof(dictInfo));
dInfo->dictBuffer = dictBuffer;
dInfo->dictSize = dictSize;
}
return dInfo;
}
/** compressWithDict() :
* Compress samples from sample buffer given dictionary stored on dictionary buffer and compression level
* @return compression ratio
*/
double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLevel, int displayLevel) {
/* Local variables */
size_t totalCompressedSize = 0;
size_t totalOriginalSize = 0;
const unsigned hasDict = dInfo->dictSize > 0 ? 1 : 0;
double cRatio;
size_t dstCapacity;
int i;
/* Pointers */
ZSTD_CDict *cdict = NULL;
ZSTD_CCtx* cctx = NULL;
size_t *offsets = NULL;
void* dst = NULL;
/* Allocate dst with enough space to compress the maximum sized sample */
{
size_t maxSampleSize = 0;
for (i = 0; i < srcInfo->nbSamples; i++) {
maxSampleSize = MAX(srcInfo->samplesSizes[i], maxSampleSize);
}
dstCapacity = ZSTD_compressBound(maxSampleSize);
dst = malloc(dstCapacity);
}
/* Calculate offset for each sample */
offsets = (size_t *)malloc((srcInfo->nbSamples + 1) * sizeof(size_t));
offsets[0] = 0;
for (i = 1; i <= srcInfo->nbSamples; i++) {
offsets[i] = offsets[i - 1] + srcInfo->samplesSizes[i - 1];
}
/* Create the cctx */
cctx = ZSTD_createCCtx();
if(!cctx || !dst) {
cRatio = -1;
goto _cleanup;
}
/* Create CDict if there's a dictionary stored on buffer */
if (hasDict) {
cdict = ZSTD_createCDict(dInfo->dictBuffer, dInfo->dictSize, compressionLevel);
if(!cdict) {
cRatio = -1;
goto _cleanup;
}
}
/* Compress each sample and sum their sizes*/
const BYTE *const samples = (const BYTE *)srcInfo->srcBuffer;
for (i = 0; i < srcInfo->nbSamples; i++) {
size_t compressedSize;
if(hasDict) {
compressedSize = ZSTD_compress_usingCDict(cctx, dst, dstCapacity, samples + offsets[i], srcInfo->samplesSizes[i], cdict);
} else {
compressedSize = ZSTD_compressCCtx(cctx, dst, dstCapacity,samples + offsets[i], srcInfo->samplesSizes[i], compressionLevel);
}
if (ZSTD_isError(compressedSize)) {
cRatio = -1;
goto _cleanup;
}
totalCompressedSize += compressedSize;
}
/* Sum original sizes */
for (i = 0; i<srcInfo->nbSamples; i++) {
totalOriginalSize += srcInfo->samplesSizes[i];
}
/* Calculate compression ratio */
DISPLAYLEVEL(2, "original size is %lu\n", totalOriginalSize);
DISPLAYLEVEL(2, "compressed size is %lu\n", totalCompressedSize);
cRatio = (double)totalOriginalSize/(double)totalCompressedSize;
_cleanup:
free(dst);
free(offsets);
ZSTD_freeCCtx(cctx);
ZSTD_freeCDict(cdict);
return cRatio;
}
/** FreeDictInfo() :
* Free memory allocated for dictInfo
*/
void freeDictInfo(dictInfo* info) {
if (!info) return;
if (info->dictBuffer) free((void*)(info->dictBuffer));
free(info);
}
/*-********************************************************
* Benchmarking functions
**********************************************************/
/** benchmarkDictBuilder() :
* Measure how long a dictionary builder takes and compression ratio with the dictionary built
* @return 0 if benchmark successfully, 1 otherwise
*/
int benchmarkDictBuilder(sampleInfo *srcInfo, unsigned maxDictSize, ZDICT_random_params_t *randomParam,
ZDICT_cover_params_t *coverParam, ZDICT_legacy_params_t *legacyParam,
ZDICT_fastCover_params_t *fastParam) {
/* Local variables */
const unsigned displayLevel = randomParam ? randomParam->zParams.notificationLevel :
coverParam ? coverParam->zParams.notificationLevel :
legacyParam ? legacyParam->zParams.notificationLevel :
fastParam ? fastParam->zParams.notificationLevel:
DEFAULT_DISPLAYLEVEL; /* no dict */
const char* name = randomParam ? "RANDOM" :
coverParam ? "COVER" :
legacyParam ? "LEGACY" :
fastParam ? "FAST":
"NODICT"; /* no dict */
const unsigned cLevel = randomParam ? randomParam->zParams.compressionLevel :
coverParam ? coverParam->zParams.compressionLevel :
legacyParam ? legacyParam->zParams.compressionLevel :
fastParam ? fastParam->zParams.compressionLevel:
DEFAULT_CLEVEL; /* no dict */
int result = 0;
/* Calculate speed */
const UTIL_time_t begin = UTIL_getTime();
dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, randomParam, coverParam, legacyParam, fastParam);
const U64 timeMicro = UTIL_clockSpanMicro(begin);
const double timeSec = timeMicro / (double)SEC_TO_MICRO;
if (!dInfo) {
DISPLAYLEVEL(1, "%s does not train successfully\n", name);
result = 1;
goto _cleanup;
}
DISPLAYLEVEL(1, "%s took %f seconds to execute \n", name, timeSec);
/* Calculate compression ratio */
const double cRatio = compressWithDict(srcInfo, dInfo, cLevel, displayLevel);
if (cRatio < 0) {
DISPLAYLEVEL(1, "Compressing with %s dictionary does not work\n", name);
result = 1;
goto _cleanup;
}
DISPLAYLEVEL(1, "Compression ratio with %s dictionary is %f\n", name, cRatio);
_cleanup:
freeDictInfo(dInfo);
return result;
}
int main(int argCount, const char* argv[])
{
const int displayLevel = DEFAULT_DISPLAYLEVEL;
const char* programName = argv[0];
int result = 0;
/* Initialize arguments to default values */
unsigned k = 200;
unsigned d = 8;
unsigned f;
unsigned accel;
unsigned i;
const unsigned cLevel = DEFAULT_CLEVEL;
const unsigned dictID = 0;
const unsigned maxDictSize = g_defaultMaxDictSize;
/* Initialize table to store input files */
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
unsigned filenameIdx = 0;
char* fileNamesBuf = NULL;
unsigned fileNamesNb = filenameIdx;
const int followLinks = 0;
const char** extendedFileList = NULL;
/* Parse arguments */
for (i = 1; i < argCount; i++) {
const char* argument = argv[i];
if (longCommandWArg(&argument, "in=")) {
filenameTable[filenameIdx] = argument;
filenameIdx++;
continue;
}
DISPLAYLEVEL(1, "benchmark: Incorrect parameters\n");
return 1;
}
/* Get the list of all files recursively (because followLinks==0)*/
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
&fileNamesNb, followLinks);
if (extendedFileList) {
unsigned u;
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
free((void*)filenameTable);
filenameTable = extendedFileList;
filenameIdx = fileNamesNb;
}
/* get sampleInfo */
size_t blockSize = 0;
sampleInfo* srcInfo= getSampleInfo(filenameTable,
filenameIdx, blockSize, maxDictSize, displayLevel);
/* set up zParams */
ZDICT_params_t zParams;
zParams.compressionLevel = cLevel;
zParams.notificationLevel = displayLevel;
zParams.dictID = dictID;
/* with no dict */
{
const int noDictResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, NULL);
if(noDictResult) {
result = 1;
goto _cleanup;
}
}
/* for random */
{
ZDICT_random_params_t randomParam;
randomParam.zParams = zParams;
randomParam.k = k;
const int randomResult = benchmarkDictBuilder(srcInfo, maxDictSize, &randomParam, NULL, NULL, NULL);
DISPLAYLEVEL(2, "k=%u\n", randomParam.k);
if(randomResult) {
result = 1;
goto _cleanup;
}
}
/* for legacy */
{
ZDICT_legacy_params_t legacyParam;
legacyParam.zParams = zParams;
legacyParam.selectivityLevel = 9;
const int legacyResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, &legacyParam, NULL);
DISPLAYLEVEL(2, "selectivityLevel=%u\n", legacyParam.selectivityLevel);
if(legacyResult) {
result = 1;
goto _cleanup;
}
}
/* for cover */
{
/* for cover (optimizing k and d) */
ZDICT_cover_params_t coverParam;
memset(&coverParam, 0, sizeof(coverParam));
coverParam.zParams = zParams;
coverParam.splitPoint = 1.0;
coverParam.steps = 40;
coverParam.nbThreads = 1;
const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100));
if(coverOptResult) {
result = 1;
goto _cleanup;
}
/* for cover (with k and d provided) */
const int coverResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100));
if(coverResult) {
result = 1;
goto _cleanup;
}
}
/* for fastCover */
for (f = 15; f < 25; f++){
DISPLAYLEVEL(2, "current f is %u\n", f);
for (accel = 1; accel < 11; accel++) {
DISPLAYLEVEL(2, "current accel is %u\n", accel);
/* for fastCover (optimizing k and d) */
ZDICT_fastCover_params_t fastParam;
memset(&fastParam, 0, sizeof(fastParam));
fastParam.zParams = zParams;
fastParam.f = f;
fastParam.steps = 40;
fastParam.nbThreads = 1;
fastParam.accel = accel;
const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel);
if(fastOptResult) {
result = 1;
goto _cleanup;
}
/* for fastCover (with k and d provided) */
for (i = 0; i < 5; i++) {
const int fastResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel);
if(fastResult) {
result = 1;
goto _cleanup;
}
}
}
}
/* Free allocated memory */
_cleanup:
UTIL_freeFileList(extendedFileList, fileNamesBuf);
freeSampleInfo(srcInfo);
return result;
}

View File

@ -1,6 +0,0 @@
/* ZDICT_trainFromBuffer_legacy() :
* issue : samplesBuffer need to be followed by a noisy guard band.
* work around : duplicate the buffer, and add the noise */
size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_legacy_params_t params);

View File

@ -1,2 +0,0 @@
echo "Benchmark with in=../../lib/common"
./benchmark in=../../../lib/common

View File

@ -1,54 +0,0 @@
ARG :=
CC ?= gcc
CFLAGS ?= -O3 -g
INCLUDES := -I ../../../programs -I ../randomDictBuilder -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
IO_FILE := ../randomDictBuilder/io.c
TEST_INPUT := ../../../lib
TEST_OUTPUT := fastCoverDict
all: main run clean
.PHONY: test
test: main testrun testshell clean
.PHONY: run
run:
echo "Building a fastCover dictionary with given arguments"
./main $(ARG)
main: main.o io.o fastCover.o libzstd.a
$(CC) $(CFLAGS) main.o io.o fastCover.o libzstd.a -o main
main.o: main.c
$(CC) $(CFLAGS) $(INCLUDES) -c main.c
fastCover.o: fastCover.c
$(CC) $(CFLAGS) $(INCLUDES) -c fastCover.c
io.o: $(IO_FILE)
$(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)
libzstd.a:
$(MAKE) MOREFLAGS=-g -C ../../../lib libzstd.a
mv ../../../lib/libzstd.a .
.PHONY: testrun
testrun: main
echo "Run with $(TEST_INPUT) and $(TEST_OUTPUT) "
./main in=$(TEST_INPUT) out=$(TEST_OUTPUT)
zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q
rm -f $(TEST_OUTPUT)
.PHONY: testshell
testshell: test.sh
sh test.sh
echo "Finish running test.sh"
.PHONY: clean
clean:
rm -f *.o main libzstd.a
$(MAKE) -C ../../../lib clean
echo "Cleaning is completed"

View File

@ -1,24 +0,0 @@
FastCover Dictionary Builder
### Permitted Arguments:
Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
Output Dictionary (out=dictName): if not provided, default to fastCoverDict
Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0
Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB
Size of Selected Segment (k=#): positive number; in bytes; if not provided, default to 200
Size of Dmer (d=#): either 6 or 8; if not provided, default to 8
Number of steps (steps=#): positive number, if not provided, default to 32
Percentage of samples used for training(split=#): positive number; if not provided, default to 100
###Running Test:
make test
###Usage:
To build a FASTCOVER dictionary with the provided arguments: make ARG= followed by arguments
If k or d is not provided, the optimize version of FASTCOVER is run.
### Examples:
make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520"
make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"

View File

@ -1,809 +0,0 @@
/*-*************************************
* Dependencies
***************************************/
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memset */
#include <time.h> /* clock */
#include "mem.h" /* read */
#include "pool.h"
#include "threading.h"
#include "fastCover.h"
#include "zstd_internal.h" /* includes zstd.h */
#include "zdict.h"
/*-*************************************
* Constants
***************************************/
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
#define FASTCOVER_MAX_F 32
#define DEFAULT_SPLITPOINT 1.0
/*-*************************************
* Console display
***************************************/
static int g_displayLevel = 2;
#define DISPLAY(...) \
{ \
fprintf(stderr, __VA_ARGS__); \
fflush(stderr); \
}
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
if (displayLevel >= l) { \
DISPLAY(__VA_ARGS__); \
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
if (displayLevel >= l) { \
if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
g_time = clock(); \
DISPLAY(__VA_ARGS__); \
} \
}
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
static clock_t g_time = 0;
/*-*************************************
* Hash Functions
***************************************/
static const U64 prime6bytes = 227718039650203ULL;
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
/**
* Hash the d-byte value pointed to by p and mod 2^f
*/
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) {
if (d == 6) {
return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1);
}
return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1);
}
/*-*************************************
* Context
***************************************/
typedef struct {
const BYTE *samples;
size_t *offsets;
const size_t *samplesSizes;
size_t nbSamples;
size_t nbTrainSamples;
size_t nbTestSamples;
size_t nbDmers;
U32 *freqs;
U16 *segmentFreqs;
unsigned d;
} FASTCOVER_ctx_t;
/*-*************************************
* Helper functions
***************************************/
/**
* Returns the sum of the sample sizes.
*/
static size_t FASTCOVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
size_t sum = 0;
unsigned i;
for (i = 0; i < nbSamples; ++i) {
sum += samplesSizes[i];
}
return sum;
}
/*-*************************************
* fast functions
***************************************/
/**
* A segment is a range in the source as well as the score of the segment.
*/
typedef struct {
U32 begin;
U32 end;
U32 score;
} FASTCOVER_segment_t;
/**
* Selects the best segment in an epoch.
* Segments of are scored according to the function:
*
* Let F(d) be the frequency of all dmers with hash value d.
* Let S_i be hash value of the dmer at position i of segment S which has length k.
*
* Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
*
* Once the dmer with hash value d is in the dictionary we set F(d) = F(d)/2.
*/
static FASTCOVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
U32 *freqs, U32 begin,U32 end,
ZDICT_fastCover_params_t parameters) {
/* Constants */
const U32 k = parameters.k;
const U32 d = parameters.d;
const U32 dmersInK = k - d + 1;
/* Try each segment (activeSegment) and save the best (bestSegment) */
FASTCOVER_segment_t bestSegment = {0, 0, 0};
FASTCOVER_segment_t activeSegment;
/* Reset the activeDmers in the segment */
/* The activeSegment starts at the beginning of the epoch. */
activeSegment.begin = begin;
activeSegment.end = begin;
activeSegment.score = 0;
{
/* Slide the activeSegment through the whole epoch.
* Save the best segment in bestSegment.
*/
while (activeSegment.end < end) {
/* Get hash value of current dmer */
const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, parameters.f, ctx->d);
/* Add frequency of this index to score if this is the first occurrence of index in active segment */
if (ctx->segmentFreqs[index] == 0) {
activeSegment.score += freqs[index];
}
ctx->segmentFreqs[index] += 1;
/* Increment end of segment */
activeSegment.end += 1;
/* If the window is now too large, drop the first position */
if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
/* Get hash value of the dmer to be eliminated from active segment */
const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d);
ctx->segmentFreqs[delIndex] -= 1;
/* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */
if (ctx->segmentFreqs[delIndex] == 0) {
activeSegment.score -= freqs[delIndex];
}
/* Increment start of segment */
activeSegment.begin += 1;
}
/* If this segment is the best so far save it */
if (activeSegment.score > bestSegment.score) {
bestSegment = activeSegment;
}
}
/* Zero out rest of segmentFreqs array */
while (activeSegment.begin < end) {
const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d);
ctx->segmentFreqs[delIndex] -= 1;
activeSegment.begin += 1;
}
}
{
/* Trim off the zero frequency head and tail from the segment. */
U32 newBegin = bestSegment.end;
U32 newEnd = bestSegment.begin;
U32 pos;
for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + pos, parameters.f, ctx->d);
U32 freq = freqs[index];
if (freq != 0) {
newBegin = MIN(newBegin, pos);
newEnd = pos + 1;
}
}
bestSegment.begin = newBegin;
bestSegment.end = newEnd;
}
{
/* Zero the frequency of hash value of each dmer covered by the chosen segment. */
U32 pos;
for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, parameters.f, ctx->d);
freqs[i] = 0;
}
}
return bestSegment;
}
/**
* Check the validity of the parameters.
* Returns non-zero if the parameters are valid and 0 otherwise.
*/
static int FASTCOVER_checkParameters(ZDICT_fastCover_params_t parameters,
size_t maxDictSize) {
/* k, d, and f are required parameters */
if (parameters.d == 0 || parameters.k == 0 || parameters.f == 0) {
return 0;
}
/* d has to be 6 or 8 */
if (parameters.d != 6 && parameters.d != 8) {
return 0;
}
/* 0 < f <= FASTCOVER_MAX_F */
if (parameters.f > FASTCOVER_MAX_F) {
return 0;
}
/* k <= maxDictSize */
if (parameters.k > maxDictSize) {
return 0;
}
/* d <= k */
if (parameters.d > parameters.k) {
return 0;
}
/* 0 < splitPoint <= 1 */
if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) {
return 0;
}
return 1;
}
/**
* Clean up a context initialized with `FASTCOVER_ctx_init()`.
*/
static void FASTCOVER_ctx_destroy(FASTCOVER_ctx_t *ctx) {
if (!ctx) {
return;
}
if (ctx->segmentFreqs) {
free(ctx->segmentFreqs);
ctx->segmentFreqs = NULL;
}
if (ctx->freqs) {
free(ctx->freqs);
ctx->freqs = NULL;
}
if (ctx->offsets) {
free(ctx->offsets);
ctx->offsets = NULL;
}
}
/**
* Calculate for frequency of hash value of each dmer in ctx->samples
*/
static void FASTCOVER_computeFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t *ctx){
size_t start; /* start of current dmer */
for (unsigned i = 0; i < ctx->nbTrainSamples; i++) {
size_t currSampleStart = ctx->offsets[i];
size_t currSampleEnd = ctx->offsets[i+1];
start = currSampleStart;
while (start + ctx->d <= currSampleEnd) {
const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, ctx->d);
freqs[dmerIndex]++;
start++;
}
}
}
/**
* Prepare a context for dictionary building.
* The context is only dependent on the parameter `d` and can used multiple
* times.
* Returns 1 on success or zero on error.
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
*/
static int FASTCOVER_ctx_init(FASTCOVER_ctx_t *ctx, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples,
unsigned d, double splitPoint, unsigned f) {
const BYTE *const samples = (const BYTE *)samplesBuffer;
const size_t totalSamplesSize = FASTCOVER_sum(samplesSizes, nbSamples);
/* Split samples into testing and training sets */
const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
const size_t trainingSamplesSize = splitPoint < 1.0 ? FASTCOVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
const size_t testSamplesSize = splitPoint < 1.0 ? FASTCOVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
/* Checks */
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
(U32)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
return 0;
}
/* Check if there are at least 5 training samples */
if (nbTrainSamples < 5) {
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
return 0;
}
/* Check if there's testing sample */
if (nbTestSamples < 1) {
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
return 0;
}
/* Zero the context */
memset(ctx, 0, sizeof(*ctx));
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
(U32)trainingSamplesSize);
DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
(U32)testSamplesSize);
ctx->samples = samples;
ctx->samplesSizes = samplesSizes;
ctx->nbSamples = nbSamples;
ctx->nbTrainSamples = nbTrainSamples;
ctx->nbTestSamples = nbTestSamples;
ctx->nbDmers = trainingSamplesSize - d + 1;
ctx->d = d;
/* The offsets of each file */
ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
if (!ctx->offsets) {
DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
FASTCOVER_ctx_destroy(ctx);
return 0;
}
/* Fill offsets from the samplesSizes */
{
U32 i;
ctx->offsets[0] = 0;
for (i = 1; i <= nbSamples; ++i) {
ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
}
}
/* Initialize frequency array of size 2^f */
ctx->freqs = (U32 *)calloc((1 << f), sizeof(U32));
ctx->segmentFreqs = (U16 *)calloc((1 << f), sizeof(U16));
DISPLAYLEVEL(2, "Computing frequencies\n");
FASTCOVER_computeFrequency(ctx->freqs, f, ctx);
return 1;
}
/**
* Given the prepared context build the dictionary.
*/
static size_t FASTCOVER_buildDictionary(const FASTCOVER_ctx_t *ctx, U32 *freqs,
void *dictBuffer,
size_t dictBufferCapacity,
ZDICT_fastCover_params_t parameters){
BYTE *const dict = (BYTE *)dictBuffer;
size_t tail = dictBufferCapacity;
/* Divide the data up into epochs of equal size.
* We will select at least one segment from each epoch.
*/
const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k));
const U32 epochSize = (U32)(ctx->nbDmers / epochs);
size_t epoch;
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
epochSize);
/* Loop through the epochs until there are no more segments or the dictionary
* is full.
*/
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
const U32 epochBegin = (U32)(epoch * epochSize);
const U32 epochEnd = epochBegin + epochSize;
size_t segmentSize;
/* Select a segment */
FASTCOVER_segment_t segment = FASTCOVER_selectSegment(
ctx, freqs, epochBegin, epochEnd, parameters);
/* If the segment covers no dmers, then we are out of content */
if (segment.score == 0) {
break;
}
/* Trim the segment if necessary and if it is too small then we are done */
segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
if (segmentSize < parameters.d) {
break;
}
/* We fill the dictionary from the back to allow the best segments to be
* referenced with the smallest offsets.
*/
tail -= segmentSize;
memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
DISPLAYUPDATE(
2, "\r%u%% ",
(U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
}
DISPLAYLEVEL(2, "\r%79s\r", "");
return tail;
}
/**
* FASTCOVER_best_t is used for two purposes:
* 1. Synchronizing threads.
* 2. Saving the best parameters and dictionary.
*
* All of the methods except FASTCOVER_best_init() are thread safe if zstd is
* compiled with multithreaded support.
*/
typedef struct fast_best_s {
ZSTD_pthread_mutex_t mutex;
ZSTD_pthread_cond_t cond;
size_t liveJobs;
void *dict;
size_t dictSize;
ZDICT_fastCover_params_t parameters;
size_t compressedSize;
} FASTCOVER_best_t;
/**
* Initialize the `FASTCOVER_best_t`.
*/
static void FASTCOVER_best_init(FASTCOVER_best_t *best) {
if (best==NULL) return; /* compatible with init on NULL */
(void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
(void)ZSTD_pthread_cond_init(&best->cond, NULL);
best->liveJobs = 0;
best->dict = NULL;
best->dictSize = 0;
best->compressedSize = (size_t)-1;
memset(&best->parameters, 0, sizeof(best->parameters));
}
/**
* Wait until liveJobs == 0.
*/
static void FASTCOVER_best_wait(FASTCOVER_best_t *best) {
if (!best) {
return;
}
ZSTD_pthread_mutex_lock(&best->mutex);
while (best->liveJobs != 0) {
ZSTD_pthread_cond_wait(&best->cond, &best->mutex);
}
ZSTD_pthread_mutex_unlock(&best->mutex);
}
/**
* Call FASTCOVER_best_wait() and then destroy the FASTCOVER_best_t.
*/
static void FASTCOVER_best_destroy(FASTCOVER_best_t *best) {
if (!best) {
return;
}
FASTCOVER_best_wait(best);
if (best->dict) {
free(best->dict);
}
ZSTD_pthread_mutex_destroy(&best->mutex);
ZSTD_pthread_cond_destroy(&best->cond);
}
/**
* Called when a thread is about to be launched.
* Increments liveJobs.
*/
static void FASTCOVER_best_start(FASTCOVER_best_t *best) {
if (!best) {
return;
}
ZSTD_pthread_mutex_lock(&best->mutex);
++best->liveJobs;
ZSTD_pthread_mutex_unlock(&best->mutex);
}
/**
* Called when a thread finishes executing, both on error or success.
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
* If this dictionary is the best so far save it and its parameters.
*/
static void FASTCOVER_best_finish(FASTCOVER_best_t *best, size_t compressedSize,
ZDICT_fastCover_params_t parameters, void *dict,
size_t dictSize) {
if (!best) {
return;
}
{
size_t liveJobs;
ZSTD_pthread_mutex_lock(&best->mutex);
--best->liveJobs;
liveJobs = best->liveJobs;
/* If the new dictionary is better */
if (compressedSize < best->compressedSize) {
/* Allocate space if necessary */
if (!best->dict || best->dictSize < dictSize) {
if (best->dict) {
free(best->dict);
}
best->dict = malloc(dictSize);
if (!best->dict) {
best->compressedSize = ERROR(GENERIC);
best->dictSize = 0;
return;
}
}
/* Save the dictionary, parameters, and size */
memcpy(best->dict, dict, dictSize);
best->dictSize = dictSize;
best->parameters = parameters;
best->compressedSize = compressedSize;
}
ZSTD_pthread_mutex_unlock(&best->mutex);
if (liveJobs == 0) {
ZSTD_pthread_cond_broadcast(&best->cond);
}
}
}
/**
* Parameters for FASTCOVER_tryParameters().
*/
typedef struct FASTCOVER_tryParameters_data_s {
const FASTCOVER_ctx_t *ctx;
FASTCOVER_best_t *best;
size_t dictBufferCapacity;
ZDICT_fastCover_params_t parameters;
} FASTCOVER_tryParameters_data_t;
/**
* Tries a set of parameters and updates the FASTCOVER_best_t with the results.
* This function is thread safe if zstd is compiled with multithreaded support.
* It takes its parameters as an *OWNING* opaque pointer to support threading.
*/
static void FASTCOVER_tryParameters(void *opaque) {
/* Save parameters as local variables */
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
const FASTCOVER_ctx_t *const ctx = data->ctx;
const ZDICT_fastCover_params_t parameters = data->parameters;
size_t dictBufferCapacity = data->dictBufferCapacity;
size_t totalCompressedSize = ERROR(GENERIC);
/* Allocate space for hash table, dict, and freqs */
BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
U32 *freqs = (U32*) malloc((1 << parameters.f) * sizeof(U32));
if (!dict || !freqs) {
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
goto _cleanup;
}
/* Copy the frequencies because we need to modify them */
memcpy(freqs, ctx->freqs, (1 << parameters.f) * sizeof(U32));
/* Build the dictionary */
{
const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict,
dictBufferCapacity, parameters);
dictBufferCapacity = ZDICT_finalizeDictionary(
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
parameters.zParams);
if (ZDICT_isError(dictBufferCapacity)) {
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
goto _cleanup;
}
}
/* Check total compressed size */
{
/* Pointers */
ZSTD_CCtx *cctx;
ZSTD_CDict *cdict;
void *dst;
/* Local variables */
size_t dstCapacity;
size_t i;
/* Allocate dst with enough space to compress the maximum sized sample */
{
size_t maxSampleSize = 0;
i = parameters.splitPoint < 1.0 ? ctx->nbTrainSamples : 0;
for (; i < ctx->nbSamples; ++i) {
maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
}
dstCapacity = ZSTD_compressBound(maxSampleSize);
dst = malloc(dstCapacity);
}
/* Create the cctx and cdict */
cctx = ZSTD_createCCtx();
cdict = ZSTD_createCDict(dict, dictBufferCapacity,
parameters.zParams.compressionLevel);
if (!dst || !cctx || !cdict) {
goto _compressCleanup;
}
/* Compress each sample and sum their sizes (or error) */
totalCompressedSize = dictBufferCapacity;
i = parameters.splitPoint < 1.0 ? ctx->nbTrainSamples : 0;
for (; i < ctx->nbSamples; ++i) {
const size_t size = ZSTD_compress_usingCDict(
cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
ctx->samplesSizes[i], cdict);
if (ZSTD_isError(size)) {
totalCompressedSize = ERROR(GENERIC);
goto _compressCleanup;
}
totalCompressedSize += size;
}
_compressCleanup:
ZSTD_freeCCtx(cctx);
ZSTD_freeCDict(cdict);
if (dst) {
free(dst);
}
}
_cleanup:
FASTCOVER_best_finish(data->best, totalCompressedSize, parameters, dict,
dictBufferCapacity);
free(data);
if (dict) {
free(dict);
}
if (freqs) {
free(freqs);
}
}
ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters) {
BYTE* const dict = (BYTE*)dictBuffer;
FASTCOVER_ctx_t ctx;
parameters.splitPoint = 1.0;
/* Initialize global data */
g_displayLevel = parameters.zParams.notificationLevel;
/* Checks */
if (!FASTCOVER_checkParameters(parameters, dictBufferCapacity)) {
DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
return ERROR(GENERIC);
}
if (nbSamples == 0) {
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
return ERROR(GENERIC);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
ZDICT_DICTSIZE_MIN);
return ERROR(dstSize_tooSmall);
}
/* Initialize context */
if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
parameters.d, parameters.splitPoint, parameters.f)) {
DISPLAYLEVEL(1, "Failed to initialize context\n");
return ERROR(GENERIC);
}
/* Build the dictionary */
DISPLAYLEVEL(2, "Building dictionary\n");
{
const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer,
dictBufferCapacity, parameters);
const size_t dictionarySize = ZDICT_finalizeDictionary(
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
samplesBuffer, samplesSizes, (unsigned)ctx.nbTrainSamples,
parameters.zParams);
if (!ZSTD_isError(dictionarySize)) {
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
(U32)dictionarySize);
}
FASTCOVER_ctx_destroy(&ctx);
return dictionarySize;
}
}
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples,
ZDICT_fastCover_params_t *parameters) {
/* constants */
const unsigned nbThreads = parameters->nbThreads;
const double splitPoint =
parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
const unsigned kIterations =
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
const unsigned f = parameters->f == 0 ? 23 : parameters->f;
/* Local variables */
const int displayLevel = parameters->zParams.notificationLevel;
unsigned iteration = 1;
unsigned d;
unsigned k;
FASTCOVER_best_t best;
POOL_ctx *pool = NULL;
/* Checks */
if (splitPoint <= 0 || splitPoint > 1) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
return ERROR(GENERIC);
}
if (kMinK < kMaxD || kMaxK < kMinK) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
return ERROR(GENERIC);
}
if (nbSamples == 0) {
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
return ERROR(GENERIC);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
ZDICT_DICTSIZE_MIN);
return ERROR(dstSize_tooSmall);
}
if (nbThreads > 1) {
pool = POOL_create(nbThreads, 1);
if (!pool) {
return ERROR(memory_allocation);
}
}
/* Initialization */
FASTCOVER_best_init(&best);
/* Turn down global display level to clean up display at level 2 and below */
g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
/* Loop through d first because each new value needs a new context */
LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
kIterations);
for (d = kMinD; d <= kMaxD; d += 2) {
/* Initialize the context for this value of d */
FASTCOVER_ctx_t ctx;
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f)) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
FASTCOVER_best_destroy(&best);
POOL_free(pool);
return ERROR(GENERIC);
}
/* Loop through k reusing the same context */
for (k = kMinK; k <= kMaxK; k += kStepSize) {
/* Prepare the arguments */
FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc(
sizeof(FASTCOVER_tryParameters_data_t));
LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
if (!data) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
FASTCOVER_best_destroy(&best);
FASTCOVER_ctx_destroy(&ctx);
POOL_free(pool);
return ERROR(GENERIC);
}
data->ctx = &ctx;
data->best = &best;
data->dictBufferCapacity = dictBufferCapacity;
data->parameters = *parameters;
data->parameters.k = k;
data->parameters.d = d;
data->parameters.f = f;
data->parameters.splitPoint = splitPoint;
data->parameters.steps = kSteps;
data->parameters.zParams.notificationLevel = g_displayLevel;
/* Check the parameters */
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity)) {
DISPLAYLEVEL(1, "fastCover parameters incorrect\n");
free(data);
continue;
}
/* Call the function and pass ownership of data to it */
FASTCOVER_best_start(&best);
if (pool) {
POOL_add(pool, &FASTCOVER_tryParameters, data);
} else {
FASTCOVER_tryParameters(data);
}
/* Print status */
LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
(U32)((iteration * 100) / kIterations));
++iteration;
}
FASTCOVER_best_wait(&best);
FASTCOVER_ctx_destroy(&ctx);
}
LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
/* Fill the output buffer and parameters with output of the best parameters */
{
const size_t dictSize = best.dictSize;
if (ZSTD_isError(best.compressedSize)) {
const size_t compressedSize = best.compressedSize;
FASTCOVER_best_destroy(&best);
POOL_free(pool);
return compressedSize;
}
*parameters = best.parameters;
memcpy(dictBuffer, best.dict, dictSize);
FASTCOVER_best_destroy(&best);
POOL_free(pool);
return dictSize;
}
}

View File

@ -1,57 +0,0 @@
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memset */
#include <time.h> /* clock */
#include "mem.h" /* read */
#include "pool.h"
#include "threading.h"
#include "zstd_internal.h" /* includes zstd.h */
#ifndef ZDICT_STATIC_LINKING_ONLY
#define ZDICT_STATIC_LINKING_ONLY
#endif
#include "zdict.h"
typedef struct {
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
unsigned f; /* log of size of frequency array */
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
double splitPoint; /* Percentage of samples used for training: the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
ZDICT_params_t zParams;
} ZDICT_fastCover_params_t;
/*! ZDICT_optimizeTrainFromBuffer_fastCover():
* Train a dictionary from an array of samples using a modified version of the COVER algorithm.
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
* The resulting dictionary will be saved into `dictBuffer`.
* All of the parameters except for f are optional.
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
* if steps is zero it defaults to its default value.
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
*
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* On success `*parameters` contains the parameters selected.
*/
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples,
ZDICT_fastCover_params_t *parameters);
/*! ZDICT_trainFromBuffer_fastCover():
* Train a dictionary from an array of samples using a modified version of the COVER algorithm.
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
* The resulting dictionary will be saved into `dictBuffer`.
* d, k, and f are required.
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
*/
ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters);

View File

@ -1,183 +0,0 @@
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* strcmp, strlen */
#include <errno.h> /* errno */
#include <ctype.h>
#include "fastCover.h"
#include "io.h"
#include "util.h"
#include "zdict.h"
/*-*************************************
* Console display
***************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
if (displayLevel>=4) fflush(stderr); } } }
/*-*************************************
* Exceptions
***************************************/
#ifndef DEBUG
# define DEBUG 0
#endif
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
#define EXM_THROW(error, ...) \
{ \
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
DISPLAY("Error %i : ", error); \
DISPLAY(__VA_ARGS__); \
DISPLAY("\n"); \
exit(error); \
}
/*-*************************************
* Constants
***************************************/
static const unsigned g_defaultMaxDictSize = 110 KB;
#define DEFAULT_CLEVEL 3
/*-*************************************
* FASTCOVER
***************************************/
int FASTCOVER_trainFromFiles(const char* dictFileName, sampleInfo *info,
unsigned maxDictSize,
ZDICT_fastCover_params_t *params) {
unsigned const displayLevel = params->zParams.notificationLevel;
void* const dictBuffer = malloc(maxDictSize);
int result = 0;
/* Checks */
if (!dictBuffer)
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
{ size_t dictSize;
/* Run the optimize version if either k or d is not provided */
if (!params->d || !params->k) {
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, params);
} else {
dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, *params);
}
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint*100));
if (ZDICT_isError(dictSize)) {
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
result = 1;
goto _done;
}
/* save dict */
DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
saveDict(dictFileName, dictBuffer, dictSize);
}
/* clean up */
_done:
free(dictBuffer);
return result;
}
int main(int argCount, const char* argv[])
{
int displayLevel = 2;
const char* programName = argv[0];
int operationResult = 0;
/* Initialize arguments to default values */
unsigned k = 0;
unsigned d = 0;
unsigned f = 23;
unsigned steps = 32;
unsigned nbThreads = 1;
unsigned split = 100;
const char* outputFile = "fastCoverDict";
unsigned dictID = 0;
unsigned maxDictSize = g_defaultMaxDictSize;
/* Initialize table to store input files */
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
unsigned filenameIdx = 0;
char* fileNamesBuf = NULL;
unsigned fileNamesNb = filenameIdx;
int followLinks = 0; /* follow directory recursively */
const char** extendedFileList = NULL;
/* Parse arguments */
for (int i = 1; i < argCount; i++) {
const char* argument = argv[i];
if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "d=")) { d = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "f=")) { f = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "steps=")) { steps = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "split=")) { split = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "in=")) {
filenameTable[filenameIdx] = argument;
filenameIdx++;
continue;
}
if (longCommandWArg(&argument, "out=")) {
outputFile = argument;
continue;
}
DISPLAYLEVEL(1, "Incorrect parameters\n");
operationResult = 1;
return operationResult;
}
/* Get the list of all files recursively (because followLinks==0)*/
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
&fileNamesNb, followLinks);
if (extendedFileList) {
unsigned u;
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
free((void*)filenameTable);
filenameTable = extendedFileList;
filenameIdx = fileNamesNb;
}
size_t blockSize = 0;
/* Set up zParams */
ZDICT_params_t zParams;
zParams.compressionLevel = DEFAULT_CLEVEL;
zParams.notificationLevel = displayLevel;
zParams.dictID = dictID;
/* Set up fastCover params */
ZDICT_fastCover_params_t params;
params.zParams = zParams;
params.k = k;
params.d = d;
params.f = f;
params.steps = steps;
params.nbThreads = nbThreads;
params.splitPoint = (double)split/100;
/* Build dictionary */
sampleInfo* info = getSampleInfo(filenameTable,
filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
operationResult = FASTCOVER_trainFromFiles(outputFile, info, maxDictSize, &params);
/* Free allocated memory */
UTIL_freeFileList(extendedFileList, fileNamesBuf);
freeSampleInfo(info);
return operationResult;
}

View File

@ -1,15 +0,0 @@
echo "Building fastCover dictionary with in=../../lib/common f=20 out=dict1"
./main in=../../../lib/common f=20 out=dict1
zstd -be3 -D dict1 -r ../../../lib/common -q
echo "Building fastCover dictionary with in=../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000"
./main in=../../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000
zstd -be3 -D dict2 -r ../../../lib/common -q
echo "Building fastCover dictionary with 2 sample sources"
./main in=../../../lib/common in=../../../lib/compress out=dict3
zstd -be3 -D dict3 -r ../../../lib/common -q
echo "Removing dict1 dict2 dict3"
rm -f dict1 dict2 dict3
echo "Testing with invalid parameters, should fail"
! ./main in=../../../lib/common r=10
! ./main in=../../../lib/common d=10

View File

@ -1,52 +0,0 @@
ARG :=
CC ?= gcc
CFLAGS ?= -O3
INCLUDES := -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
TEST_INPUT := ../../../lib
TEST_OUTPUT := randomDict
all: main run clean
.PHONY: test
test: main testrun testshell clean
.PHONY: run
run:
echo "Building a random dictionary with given arguments"
./main $(ARG)
main: main.o io.o random.o libzstd.a
$(CC) $(CFLAGS) main.o io.o random.o libzstd.a -o main
main.o: main.c
$(CC) $(CFLAGS) $(INCLUDES) -c main.c
random.o: random.c
$(CC) $(CFLAGS) $(INCLUDES) -c random.c
io.o: io.c
$(CC) $(CFLAGS) $(INCLUDES) -c io.c
libzstd.a:
$(MAKE) -C ../../../lib libzstd.a
mv ../../../lib/libzstd.a .
.PHONY: testrun
testrun: main
echo "Run with $(TEST_INPUT) and $(TEST_OUTPUT) "
./main in=$(TEST_INPUT) out=$(TEST_OUTPUT)
zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q
rm -f $(TEST_OUTPUT)
.PHONY: testshell
testshell: test.sh
sh test.sh
echo "Finish running test.sh"
.PHONY: clean
clean:
rm -f *.o main libzstd.a
$(MAKE) -C ../../../lib clean
echo "Cleaning is completed"

View File

@ -1,20 +0,0 @@
Random Dictionary Builder
### Permitted Arguments:
Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
Output Dictionary (out=dictName): if not provided, default to defaultDict
Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0
Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB
Size of Randomly Selected Segment (k=#): positive number; in bytes; if not provided, default to 200
###Running Test:
make test
###Usage:
To build a random dictionary with the provided arguments: make ARG= followed by arguments
### Examples:
make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520"
make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"

View File

@ -1,284 +0,0 @@
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* strcmp, strlen */
#include <errno.h> /* errno */
#include <ctype.h>
#include "io.h"
#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */
#include "platform.h" /* Large Files support */
#include "util.h"
#include "zdict.h"
/*-*************************************
* Console display
***************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
if (displayLevel>=4) fflush(stderr); } } }
/*-*************************************
* Exceptions
***************************************/
#ifndef DEBUG
# define DEBUG 0
#endif
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
#define EXM_THROW(error, ...) \
{ \
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
DISPLAY("Error %i : ", error); \
DISPLAY(__VA_ARGS__); \
DISPLAY("\n"); \
exit(error); \
}
/*-*************************************
* Constants
***************************************/
#define SAMPLESIZE_MAX (128 KB)
#define RANDOM_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
#define RANDOM_MEMMULT 9
static const size_t g_maxMemory = (sizeof(size_t) == 4) ?
(2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
#define NOISELENGTH 32
/*-*************************************
* Commandline related functions
***************************************/
unsigned readU32FromChar(const char** stringPtr){
const char errorMsg[] = "error: numeric value too large";
unsigned result = 0;
while ((**stringPtr >='0') && (**stringPtr <='9')) {
unsigned const max = (((unsigned)(-1)) / 10) - 1;
if (result > max) exit(1);
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
}
if ((**stringPtr=='K') || (**stringPtr=='M')) {
unsigned const maxK = ((unsigned)(-1)) >> 10;
if (result > maxK) exit(1);
result <<= 10;
if (**stringPtr=='M') {
if (result > maxK) exit(1);
result <<= 10;
}
(*stringPtr)++; /* skip `K` or `M` */
if (**stringPtr=='i') (*stringPtr)++;
if (**stringPtr=='B') (*stringPtr)++;
}
return result;
}
unsigned longCommandWArg(const char** stringPtr, const char* longCommand){
size_t const comSize = strlen(longCommand);
int const result = !strncmp(*stringPtr, longCommand, comSize);
if (result) *stringPtr += comSize;
return result;
}
/* ********************************************************
* File related operations
**********************************************************/
/** loadFiles() :
* load samples from files listed in fileNamesTable into buffer.
* works even if buffer is too small to load all samples.
* Also provides the size of each sample into sampleSizes table
* which must be sized correctly, using DiB_fileStats().
* @return : nb of samples effectively loaded into `buffer`
* *bufferSizePtr is modified, it provides the amount data loaded within buffer.
* sampleSizes is filled with the size of each sample.
*/
static unsigned loadFiles(void* buffer, size_t* bufferSizePtr, size_t* sampleSizes,
unsigned sstSize, const char** fileNamesTable, unsigned nbFiles,
size_t targetChunkSize, unsigned displayLevel) {
char* const buff = (char*)buffer;
size_t pos = 0;
unsigned nbLoadedChunks = 0, fileIndex;
for (fileIndex=0; fileIndex<nbFiles; fileIndex++) {
const char* const fileName = fileNamesTable[fileIndex];
unsigned long long const fs64 = UTIL_getFileSize(fileName);
unsigned long long remainingToLoad = (fs64 == UTIL_FILESIZE_UNKNOWN) ? 0 : fs64;
U32 const nbChunks = targetChunkSize ? (U32)((fs64 + (targetChunkSize-1)) / targetChunkSize) : 1;
U64 const chunkSize = targetChunkSize ? MIN(targetChunkSize, fs64) : fs64;
size_t const maxChunkSize = (size_t)MIN(chunkSize, SAMPLESIZE_MAX);
U32 cnb;
FILE* const f = fopen(fileName, "rb");
if (f==NULL) EXM_THROW(10, "zstd: dictBuilder: %s %s ", fileName, strerror(errno));
DISPLAYUPDATE(2, "Loading %s... \r", fileName);
for (cnb=0; cnb<nbChunks; cnb++) {
size_t const toLoad = (size_t)MIN(maxChunkSize, remainingToLoad);
if (toLoad > *bufferSizePtr-pos) break;
{ size_t const readSize = fread(buff+pos, 1, toLoad, f);
if (readSize != toLoad) EXM_THROW(11, "Pb reading %s", fileName);
pos += readSize;
sampleSizes[nbLoadedChunks++] = toLoad;
remainingToLoad -= targetChunkSize;
if (nbLoadedChunks == sstSize) { /* no more space left in sampleSizes table */
fileIndex = nbFiles; /* stop there */
break;
}
if (toLoad < targetChunkSize) {
fseek(f, (long)(targetChunkSize - toLoad), SEEK_CUR);
} } }
fclose(f);
}
DISPLAYLEVEL(2, "\r%79s\r", "");
*bufferSizePtr = pos;
DISPLAYLEVEL(4, "loaded : %u KB \n", (U32)(pos >> 10))
return nbLoadedChunks;
}
#define rotl32(x,r) ((x << r) | (x >> (32 - r)))
static U32 getRand(U32* src)
{
static const U32 prime1 = 2654435761U;
static const U32 prime2 = 2246822519U;
U32 rand32 = *src;
rand32 *= prime1;
rand32 ^= prime2;
rand32 = rotl32(rand32, 13);
*src = rand32;
return rand32 >> 5;
}
/* shuffle() :
* shuffle a table of file names in a semi-random way
* It improves dictionary quality by reducing "locality" impact, so if sample set is very large,
* it will load random elements from it, instead of just the first ones. */
static void shuffle(const char** fileNamesTable, unsigned nbFiles) {
U32 seed = 0xFD2FB528;
unsigned i;
for (i = nbFiles - 1; i > 0; --i) {
unsigned const j = getRand(&seed) % (i + 1);
const char* const tmp = fileNamesTable[j];
fileNamesTable[j] = fileNamesTable[i];
fileNamesTable[i] = tmp;
}
}
/*-********************************************************
* Dictionary training functions
**********************************************************/
size_t findMaxMem(unsigned long long requiredMem) {
size_t const step = 8 MB;
void* testmem = NULL;
requiredMem = (((requiredMem >> 23) + 1) << 23);
requiredMem += step;
if (requiredMem > g_maxMemory) requiredMem = g_maxMemory;
while (!testmem) {
testmem = malloc((size_t)requiredMem);
requiredMem -= step;
}
free(testmem);
return (size_t)requiredMem;
}
void saveDict(const char* dictFileName,
const void* buff, size_t buffSize) {
FILE* const f = fopen(dictFileName, "wb");
if (f==NULL) EXM_THROW(3, "cannot open %s ", dictFileName);
{ size_t const n = fwrite(buff, 1, buffSize, f);
if (n!=buffSize) EXM_THROW(4, "%s : write error", dictFileName) }
{ size_t const n = (size_t)fclose(f);
if (n!=0) EXM_THROW(5, "%s : flush error", dictFileName) }
}
/*! getFileStats() :
* Given a list of files, and a chunkSize (0 == no chunk, whole files)
* provides the amount of data to be loaded and the resulting nb of samples.
* This is useful primarily for allocation purpose => sample buffer, and sample sizes table.
*/
static fileStats getFileStats(const char** fileNamesTable, unsigned nbFiles,
size_t chunkSize, unsigned displayLevel) {
fileStats fs;
unsigned n;
memset(&fs, 0, sizeof(fs));
for (n=0; n<nbFiles; n++) {
U64 const fileSize = UTIL_getFileSize(fileNamesTable[n]);
U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? 0 : fileSize;
U32 const nbSamples = (U32)(chunkSize ? (srcSize + (chunkSize-1)) / chunkSize : 1);
U64 const chunkToLoad = chunkSize ? MIN(chunkSize, srcSize) : srcSize;
size_t const cappedChunkSize = (size_t)MIN(chunkToLoad, SAMPLESIZE_MAX);
fs.totalSizeToLoad += cappedChunkSize * nbSamples;
fs.oneSampleTooLarge |= (chunkSize > 2*SAMPLESIZE_MAX);
fs.nbSamples += nbSamples;
}
DISPLAYLEVEL(4, "Preparing to load : %u KB \n", (U32)(fs.totalSizeToLoad >> 10));
return fs;
}
sampleInfo* getSampleInfo(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
unsigned maxDictSize, const unsigned displayLevel) {
fileStats const fs = getFileStats(fileNamesTable, nbFiles, chunkSize, displayLevel);
size_t* const sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t));
size_t const memMult = RANDOM_MEMMULT;
size_t const maxMem = findMaxMem(fs.totalSizeToLoad * memMult) / memMult;
size_t loadedSize = (size_t) MIN ((unsigned long long)maxMem, fs.totalSizeToLoad);
void* const srcBuffer = malloc(loadedSize+NOISELENGTH);
/* Checks */
if ((!sampleSizes) || (!srcBuffer))
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
if (fs.oneSampleTooLarge) {
DISPLAYLEVEL(2, "! Warning : some sample(s) are very large \n");
DISPLAYLEVEL(2, "! Note that dictionary is only useful for small samples. \n");
DISPLAYLEVEL(2, "! As a consequence, only the first %u bytes of each sample are loaded \n", SAMPLESIZE_MAX);
}
if (fs.nbSamples < 5) {
DISPLAYLEVEL(2, "! Warning : nb of samples too low for proper processing ! \n");
DISPLAYLEVEL(2, "! Please provide _one file per sample_. \n");
DISPLAYLEVEL(2, "! Alternatively, split files into fixed-size blocks representative of samples, with -B# \n");
EXM_THROW(14, "nb of samples too low"); /* we now clearly forbid this case */
}
if (fs.totalSizeToLoad < (unsigned long long)(8 * maxDictSize)) {
DISPLAYLEVEL(2, "! Warning : data size of samples too small for target dictionary size \n");
DISPLAYLEVEL(2, "! Samples should be about 100x larger than target dictionary size \n");
}
/* init */
if (loadedSize < fs.totalSizeToLoad)
DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(loadedSize >> 20));
/* Load input buffer */
DISPLAYLEVEL(3, "Shuffling input files\n");
shuffle(fileNamesTable, nbFiles);
nbFiles = loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples,
fileNamesTable, nbFiles, chunkSize, displayLevel);
sampleInfo *info = (sampleInfo *)malloc(sizeof(sampleInfo));
info->nbSamples = fs.nbSamples;
info->samplesSizes = sampleSizes;
info->srcBuffer = srcBuffer;
return info;
}
void freeSampleInfo(sampleInfo *info) {
if (!info) return;
if (info->samplesSizes) free((void*)(info->samplesSizes));
if (info->srcBuffer) free((void*)(info->srcBuffer));
free(info);
}

View File

@ -1,60 +0,0 @@
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* strcmp, strlen */
#include <errno.h> /* errno */
#include <ctype.h>
#include "zstd_internal.h" /* includes zstd.h */
#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */
#include "platform.h" /* Large Files support */
#include "util.h"
#include "zdict.h"
/*-*************************************
* Structs
***************************************/
typedef struct {
U64 totalSizeToLoad;
unsigned oneSampleTooLarge;
unsigned nbSamples;
} fileStats;
typedef struct {
const void* srcBuffer;
const size_t *samplesSizes;
size_t nbSamples;
}sampleInfo;
/*! getSampleInfo():
* Load from input files and add samples to buffer
* @return: a sampleInfo struct containing infomation about buffer where samples are stored,
* size of each sample, and total number of samples
*/
sampleInfo* getSampleInfo(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
unsigned maxDictSize, const unsigned displayLevel);
/*! freeSampleInfo():
* Free memory allocated for info
*/
void freeSampleInfo(sampleInfo *info);
/*! saveDict():
* Save data stored on buff to dictFileName
*/
void saveDict(const char* dictFileName, const void* buff, size_t buffSize);
unsigned readU32FromChar(const char** stringPtr);
/** longCommandWArg() :
* check if *stringPtr is the same as longCommand.
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
* @return 0 and doesn't modify *stringPtr otherwise.
*/
unsigned longCommandWArg(const char** stringPtr, const char* longCommand);

View File

@ -1,161 +0,0 @@
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* strcmp, strlen */
#include <errno.h> /* errno */
#include <ctype.h>
#include "random.h"
#include "io.h"
#include "util.h"
#include "zdict.h"
/*-*************************************
* Console display
***************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
if (displayLevel>=4) fflush(stderr); } } }
/*-*************************************
* Exceptions
***************************************/
#ifndef DEBUG
# define DEBUG 0
#endif
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
#define EXM_THROW(error, ...) \
{ \
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
DISPLAY("Error %i : ", error); \
DISPLAY(__VA_ARGS__); \
DISPLAY("\n"); \
exit(error); \
}
/*-*************************************
* Constants
***************************************/
static const unsigned g_defaultMaxDictSize = 110 KB;
#define DEFAULT_CLEVEL 3
#define DEFAULT_k 200
#define DEFAULT_OUTPUTFILE "defaultDict"
#define DEFAULT_DICTID 0
/*-*************************************
* RANDOM
***************************************/
int RANDOM_trainFromFiles(const char* dictFileName, sampleInfo *info,
unsigned maxDictSize,
ZDICT_random_params_t *params) {
unsigned const displayLevel = params->zParams.notificationLevel;
void* const dictBuffer = malloc(maxDictSize);
int result = 0;
/* Checks */
if (!dictBuffer)
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
{ size_t dictSize;
dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, *params);
DISPLAYLEVEL(2, "k=%u\n", params->k);
if (ZDICT_isError(dictSize)) {
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
result = 1;
goto _done;
}
/* save dict */
DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
saveDict(dictFileName, dictBuffer, dictSize);
}
/* clean up */
_done:
free(dictBuffer);
return result;
}
int main(int argCount, const char* argv[])
{
int displayLevel = 2;
const char* programName = argv[0];
int operationResult = 0;
/* Initialize arguments to default values */
unsigned k = DEFAULT_k;
const char* outputFile = DEFAULT_OUTPUTFILE;
unsigned dictID = DEFAULT_DICTID;
unsigned maxDictSize = g_defaultMaxDictSize;
/* Initialize table to store input files */
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
unsigned filenameIdx = 0;
/* Parse arguments */
for (int i = 1; i < argCount; i++) {
const char* argument = argv[i];
if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "in=")) {
filenameTable[filenameIdx] = argument;
filenameIdx++;
continue;
}
if (longCommandWArg(&argument, "out=")) {
outputFile = argument;
continue;
}
DISPLAYLEVEL(1, "Incorrect parameters\n");
operationResult = 1;
return operationResult;
}
char* fileNamesBuf = NULL;
unsigned fileNamesNb = filenameIdx;
int followLinks = 0; /* follow directory recursively */
const char** extendedFileList = NULL;
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
&fileNamesNb, followLinks);
if (extendedFileList) {
unsigned u;
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
free((void*)filenameTable);
filenameTable = extendedFileList;
filenameIdx = fileNamesNb;
}
size_t blockSize = 0;
ZDICT_random_params_t params;
ZDICT_params_t zParams;
zParams.compressionLevel = DEFAULT_CLEVEL;
zParams.notificationLevel = displayLevel;
zParams.dictID = dictID;
params.zParams = zParams;
params.k = k;
sampleInfo* info = getSampleInfo(filenameTable,
filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
operationResult = RANDOM_trainFromFiles(outputFile, info, maxDictSize, &params);
/* Free allocated memory */
UTIL_freeFileList(extendedFileList, fileNamesBuf);
freeSampleInfo(info);
return operationResult;
}

View File

@ -1,163 +0,0 @@
/*-*************************************
* Dependencies
***************************************/
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memset */
#include <time.h> /* clock */
#include "random.h"
#include "util.h" /* UTIL_getFileSize, UTIL_getTotalFileSize */
#ifndef ZDICT_STATIC_LINKING_ONLY
#define ZDICT_STATIC_LINKING_ONLY
#endif
#include "zdict.h"
/*-*************************************
* Console display
***************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
if (displayLevel >= l) { \
if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
g_time = clock(); \
DISPLAY(__VA_ARGS__); \
} \
}
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(displayLevel, l, __VA_ARGS__)
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
static clock_t g_time = 0;
/* ********************************************************
* Random Dictionary Builder
**********************************************************/
/**
* Returns the sum of the sample sizes.
*/
static size_t RANDOM_sum(const size_t *samplesSizes, unsigned nbSamples) {
size_t sum = 0;
unsigned i;
for (i = 0; i < nbSamples; ++i) {
sum += samplesSizes[i];
}
return sum;
}
/**
* A segment is an inclusive range in the source.
*/
typedef struct {
U32 begin;
U32 end;
} RANDOM_segment_t;
/**
* Selects a random segment from totalSamplesSize - k + 1 possible segments
*/
static RANDOM_segment_t RANDOM_selectSegment(const size_t totalSamplesSize,
ZDICT_random_params_t parameters) {
const U32 k = parameters.k;
RANDOM_segment_t segment;
unsigned index;
/* Randomly generate a number from 0 to sampleSizes - k */
index = rand()%(totalSamplesSize - k + 1);
/* inclusive */
segment.begin = index;
segment.end = index + k - 1;
return segment;
}
/**
* Check the validity of the parameters.
* Returns non-zero if the parameters are valid and 0 otherwise.
*/
static int RANDOM_checkParameters(ZDICT_random_params_t parameters,
size_t maxDictSize) {
/* k is a required parameter */
if (parameters.k == 0) {
return 0;
}
/* k <= maxDictSize */
if (parameters.k > maxDictSize) {
return 0;
}
return 1;
}
/**
* Given the prepared context build the dictionary.
*/
static size_t RANDOM_buildDictionary(const size_t totalSamplesSize, const BYTE *samples,
void *dictBuffer, size_t dictBufferCapacity,
ZDICT_random_params_t parameters) {
BYTE *const dict = (BYTE *)dictBuffer;
size_t tail = dictBufferCapacity;
const int displayLevel = parameters.zParams.notificationLevel;
while (tail > 0) {
/* Select a segment */
RANDOM_segment_t segment = RANDOM_selectSegment(totalSamplesSize, parameters);
size_t segmentSize;
segmentSize = MIN(segment.end - segment.begin + 1, tail);
tail -= segmentSize;
memcpy(dict + tail, samples + segment.begin, segmentSize);
DISPLAYUPDATE(
2, "\r%u%% ",
(U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
}
return tail;
}
ZDICTLIB_API size_t ZDICT_trainFromBuffer_random(
void *dictBuffer, size_t dictBufferCapacity,
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
ZDICT_random_params_t parameters) {
const int displayLevel = parameters.zParams.notificationLevel;
BYTE* const dict = (BYTE*)dictBuffer;
/* Checks */
if (!RANDOM_checkParameters(parameters, dictBufferCapacity)) {
DISPLAYLEVEL(1, "k is incorrect\n");
return ERROR(GENERIC);
}
if (nbSamples == 0) {
DISPLAYLEVEL(1, "Random must have at least one input file\n");
return ERROR(GENERIC);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
ZDICT_DICTSIZE_MIN);
return ERROR(dstSize_tooSmall);
}
const size_t totalSamplesSize = RANDOM_sum(samplesSizes, nbSamples);
const BYTE *const samples = (const BYTE *)samplesBuffer;
DISPLAYLEVEL(2, "Building dictionary\n");
{
const size_t tail = RANDOM_buildDictionary(totalSamplesSize, samples,
dictBuffer, dictBufferCapacity, parameters);
const size_t dictSize = ZDICT_finalizeDictionary(
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
if (!ZSTD_isError(dictSize)) {
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
(U32)dictSize);
}
return dictSize;
}
}

View File

@ -1,29 +0,0 @@
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memset */
#include <time.h> /* clock */
#include "zstd_internal.h" /* includes zstd.h */
#ifndef ZDICT_STATIC_LINKING_ONLY
#define ZDICT_STATIC_LINKING_ONLY
#endif
#include "zdict.h"
typedef struct {
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+]; Default to 200 */
ZDICT_params_t zParams;
} ZDICT_random_params_t;
/*! ZDICT_trainFromBuffer_random():
* Train a dictionary from an array of samples.
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
* The resulting dictionary will be saved into `dictBuffer`.
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
*/
ZDICTLIB_API size_t ZDICT_trainFromBuffer_random( void *dictBuffer, size_t dictBufferCapacity,
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
ZDICT_random_params_t parameters);

View File

@ -1,14 +0,0 @@
echo "Building random dictionary with in=../../lib/common k=200 out=dict1"
./main in=../../../lib/common k=200 out=dict1
zstd -be3 -D dict1 -r ../../../lib/common -q
echo "Building random dictionary with in=../../lib/common k=500 out=dict2 dictID=100 maxdict=140000"
./main in=../../../lib/common k=500 out=dict2 dictID=100 maxdict=140000
zstd -be3 -D dict2 -r ../../../lib/common -q
echo "Building random dictionary with 2 sample sources"
./main in=../../../lib/common in=../../../lib/compress out=dict3
zstd -be3 -D dict3 -r ../../../lib/common -q
echo "Removing dict1 dict2 dict3"
rm -f dict1 dict2 dict3
echo "Testing with invalid parameters, should fail"
! ./main r=10

View File

@ -429,14 +429,14 @@ void shuffleDictionaries(ddict_collection_t dicts)
{
size_t const nbDicts = dicts.nbDDict;
for (size_t r=0; r<nbDicts; r++) {
size_t const d = rand() % nbDicts;
size_t const d = (size_t)rand() % nbDicts;
ZSTD_DDict* tmpd = dicts.ddicts[d];
dicts.ddicts[d] = dicts.ddicts[r];
dicts.ddicts[r] = tmpd;
}
for (size_t r=0; r<nbDicts; r++) {
size_t const d1 = rand() % nbDicts;
size_t const d2 = rand() % nbDicts;
size_t const d1 = (size_t)rand() % nbDicts;
size_t const d2 = (size_t)rand() % nbDicts;
ZSTD_DDict* tmpd = dicts.ddicts[d1];
dicts.ddicts[d1] = dicts.ddicts[d2];
dicts.ddicts[d2] = tmpd;
@ -528,7 +528,7 @@ size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity
static int benchMem(slice_collection_t dstBlocks,
slice_collection_t srcBlocks,
ddict_collection_t dictionaries,
int nbRounds)
unsigned nbRounds)
{
assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
@ -586,7 +586,7 @@ int bench(const char** fileNameTable, unsigned nbFiles,
const char* dictionary,
size_t blockSize, int clevel,
unsigned nbDictMax, unsigned nbBlocks,
int nbRounds)
unsigned nbRounds)
{
int result = 0;
@ -707,7 +707,7 @@ static unsigned readU32FromChar(const char** stringPtr)
while ((**stringPtr >='0') && (**stringPtr <='9')) {
unsigned const max = (((unsigned)(-1)) / 10) - 1;
assert(result <= max); /* check overflow */
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
result *= 10, result += (unsigned)**stringPtr - '0', (*stringPtr)++ ;
}
if ((**stringPtr=='K') || (**stringPtr=='M')) {
unsigned const maxK = ((unsigned)(-1)) >> 10;
@ -729,7 +729,7 @@ static unsigned readU32FromChar(const char** stringPtr)
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
* @return 0 and doesn't modify *stringPtr otherwise.
*/
static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
static int longCommandWArg(const char** stringPtr, const char* longCommand)
{
size_t const comSize = strlen(longCommand);
int const result = !strncmp(*stringPtr, longCommand, comSize);
@ -765,12 +765,12 @@ int bad_usage(const char* exeName)
int main (int argc, const char** argv)
{
int recursiveMode = 0;
int nbRounds = BENCH_TIME_DEFAULT_S;
unsigned nbRounds = BENCH_TIME_DEFAULT_S;
const char* const exeName = argv[0];
if (argc < 2) return bad_usage(exeName);
const char** nameTable = (const char**)malloc(argc * sizeof(const char*));
const char** nameTable = (const char**)malloc((size_t)argc * sizeof(const char*));
assert(nameTable != NULL);
unsigned nameIdx = 0;
@ -791,26 +791,27 @@ int main (int argc, const char** argv)
if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--clevel=")) { cLevel = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "-")) { cLevel = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--clevel=")) { cLevel = (int)readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; }
/* anything that's not a command is a filename */
nameTable[nameIdx++] = argument;
}
const char** filenameTable = nameTable;
unsigned nbFiles = nameIdx;
char* buffer_containing_filenames = NULL;
FileNamesTable* filenameTable;
if (recursiveMode) {
#ifndef UTIL_HAS_CREATEFILELIST
assert(0); /* missing capability, do not run */
#endif
filenameTable = UTIL_createFileList(nameTable, nameIdx, &buffer_containing_filenames, &nbFiles, 1 /* follow_links */);
filenameTable = UTIL_createExpandedFNT(nameTable, nameIdx, 1 /* follow_links */);
} else {
filenameTable = UTIL_assembleFileNamesTable(nameTable, nameIdx, NULL);
nameTable = NULL; /* UTIL_createFileNamesTable() takes ownership of nameTable */
}
int result = bench(filenameTable, nbFiles, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds);
int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds);
free(buffer_containing_filenames);
UTIL_freeFileNamesTable(filenameTable);
free(nameTable);
return result;

View File

@ -337,23 +337,19 @@ Options::Status Options::parse(int argc, const char **argv) {
// Translate input files/directories into files to (de)compress
if (recursive) {
char *scratchBuffer = nullptr;
unsigned numFiles = 0;
const char **files =
UTIL_createFileList(localInputFiles.data(), localInputFiles.size(),
&scratchBuffer, &numFiles, followLinks);
FileNamesTable* const files = UTIL_createExpandedFNT(localInputFiles.data(), localInputFiles.size(), followLinks);
if (files == nullptr) {
std::fprintf(stderr, "Error traversing directories\n");
return Status::Failure;
}
auto guard =
makeScopeGuard([&] { UTIL_freeFileList(files, scratchBuffer); });
if (numFiles == 0) {
makeScopeGuard([&] { UTIL_freeFileNamesTable(files); });
if (files->tableSize == 0) {
std::fprintf(stderr, "No files found\n");
return Status::Failure;
}
inputFiles.resize(numFiles);
std::copy(files, files + numFiles, inputFiles.begin());
inputFiles.resize(files->tableSize);
std::copy(files->fileNames, files->fileNames + files->tableSize, inputFiles.begin());
} else {
inputFiles.resize(localInputFiles.size());
std::copy(localInputFiles.begin(), localInputFiles.end(),

View File

@ -17,22 +17,83 @@ extern "C" {
* Dependencies
******************************************/
#include "util.h" /* note : ensure that platform.h is included first ! */
#include <stdlib.h> /* malloc, realloc, free */
#include <stdio.h> /* fprintf */
#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */
#include <errno.h>
#include <assert.h>
#if defined(_WIN32)
# include <sys/utime.h> /* utime */
# include <io.h> /* _chmod */
#else
# include <unistd.h> /* chown, stat */
# if PLATFORM_POSIX_VERSION < 200809L
# include <utime.h> /* utime */
# else
# include <fcntl.h> /* AT_FDCWD */
# include <sys/stat.h> /* utimensat */
# endif
#endif
#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__)
#include <direct.h> /* needed for _mkdir in windows */
#endif
#if defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */
# include <dirent.h> /* opendir, readdir */
# include <string.h> /* strerror, memcpy */
#endif /* #ifdef _WIN32 */
/*-****************************************
* Internal Macros
******************************************/
/* CONTROL is almost like an assert(), but is never disabled.
* It's designed for failures that may happen rarely,
* but we don't want to maintain a specific error code path for them,
* such as a malloc() returning NULL for example.
* Since it's always active, this macro can trigger side effects.
*/
#define CONTROL(c) { \
if (!(c)) { \
UTIL_DISPLAYLEVEL(1, "Error : %s, %i : %s", \
__FILE__, __LINE__, #c); \
exit(1); \
} }
/* console log */
#define UTIL_DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define UTIL_DISPLAYLEVEL(l, ...) { if (g_utilDisplayLevel>=l) { UTIL_DISPLAY(__VA_ARGS__); } }
/* A modified version of realloc().
* If UTIL_realloc() fails the original block is freed.
*/
UTIL_STATIC void* UTIL_realloc(void *ptr, size_t size)
{
void *newptr = realloc(ptr, size);
if (newptr) return newptr;
free(ptr);
return NULL;
}
#if defined(_MSC_VER)
#define chmod _chmod
#endif
/*-****************************************
* Console log
******************************************/
int g_utilDisplayLevel;
/*-*************************************
* Constants
***************************************/
#define LIST_SIZE_INCREASE (8*1024)
#define MAX_FILE_OF_FILE_NAMES_SIZE (1<<20)*50
/*-*************************************
@ -194,28 +255,218 @@ U64 UTIL_getFileSize(const char* infilename)
}
U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbFiles)
U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles)
{
U64 total = 0;
int error = 0;
unsigned n;
for (n=0; n<nbFiles; n++) {
U64 const size = UTIL_getFileSize(fileNamesTable[n]);
error |= (size == UTIL_FILESIZE_UNKNOWN);
if (size == UTIL_FILESIZE_UNKNOWN) return UTIL_FILESIZE_UNKNOWN;
total += size;
}
return error ? UTIL_FILESIZE_UNKNOWN : total;
return total;
}
/* condition : @file must be valid, and not have reached its end.
* @return : length of line written into @buf, ended with `\0` instead of '\n',
* or 0, if there is no new line */
static size_t readLineFromFile(char* buf, size_t len, FILE* file)
{
assert(!feof(file));
CONTROL( fgets(buf, (int) len, file) == buf ); /* requires success */
{ size_t linelen = strlen(buf);
if (strlen(buf)==0) return 0;
if (buf[linelen-1] == '\n') linelen--;
buf[linelen] = '\0';
return linelen+1;
}
}
/* Conditions :
* size of @inputFileName file must be < @dstCapacity
* @dst must be initialized
* @return : nb of lines
* or -1 if there's an error
*/
static int
readLinesFromFile(void* dst, size_t dstCapacity,
const char* inputFileName)
{
int nbFiles = 0;
size_t pos = 0;
char* const buf = (char*)dst;
FILE* const inputFile = fopen(inputFileName, "r");
assert(dst != NULL);
if(!inputFile) {
if (g_utilDisplayLevel >= 1) perror("zstd:util:readLinesFromFile");
return -1;
}
while ( !feof(inputFile) ) {
size_t const lineLength = readLineFromFile(buf+pos, dstCapacity-pos, inputFile);
if (lineLength == 0) break;
assert(pos + lineLength < dstCapacity);
pos += lineLength;
++nbFiles;
}
CONTROL( fclose(inputFile) == 0 );
return nbFiles;
}
/*Note: buf is not freed in case function successfully created table because filesTable->fileNames[0] = buf*/
FileNamesTable*
UTIL_createFileNamesTable_fromFileName(const char* inputFileName)
{
size_t nbFiles = 0;
char* buf;
size_t bufSize;
size_t pos = 0;
if (!UTIL_fileExist(inputFileName) || !UTIL_isRegularFile(inputFileName))
return NULL;
{ U64 const inputFileSize = UTIL_getFileSize(inputFileName);
if(inputFileSize > MAX_FILE_OF_FILE_NAMES_SIZE)
return NULL;
bufSize = (size_t)(inputFileSize + 1); /* (+1) to add '\0' at the end of last filename */
}
buf = (char*) malloc(bufSize);
CONTROL( buf != NULL );
{ int const ret_nbFiles = readLinesFromFile(buf, bufSize, inputFileName);
if (ret_nbFiles <= 0) {
free(buf);
return NULL;
}
nbFiles = (size_t)ret_nbFiles;
}
{ const char** filenamesTable = (const char**) malloc(nbFiles * sizeof(*filenamesTable));
CONTROL(filenamesTable != NULL);
{ size_t fnb;
for (fnb = 0, pos = 0; fnb < nbFiles; fnb++) {
filenamesTable[fnb] = buf+pos;
pos += strlen(buf+pos)+1; /* +1 for the finishing `\0` */
} }
assert(pos <= bufSize);
return UTIL_assembleFileNamesTable(filenamesTable, nbFiles, buf);
}
}
FileNamesTable*
UTIL_assembleFileNamesTable(const char** filenames, size_t tableSize, char* buf)
{
FileNamesTable* const table = (FileNamesTable*) malloc(sizeof(*table));
CONTROL(table != NULL);
table->fileNames = filenames;
table->buf = buf;
table->tableSize = tableSize;
table->tableCapacity = tableSize;
return table;
}
void UTIL_freeFileNamesTable(FileNamesTable* table)
{
if (table==NULL) return;
free((void*)table->fileNames);
free(table->buf);
free(table);
}
FileNamesTable* UTIL_allocateFileNamesTable(size_t tableSize)
{
const char** const fnTable = (const char**)malloc(tableSize * sizeof(*fnTable));
FileNamesTable* fnt;
if (fnTable==NULL) return NULL;
fnt = UTIL_assembleFileNamesTable(fnTable, tableSize, NULL);
fnt->tableSize = 0; /* the table is empty */
return fnt;
}
void UTIL_refFilename(FileNamesTable* fnt, const char* filename)
{
assert(fnt->tableSize < fnt->tableCapacity);
fnt->fileNames[fnt->tableSize] = filename;
fnt->tableSize++;
}
static size_t getTotalTableSize(FileNamesTable* table)
{
size_t fnb = 0, totalSize = 0;
for(fnb = 0 ; fnb < table->tableSize && table->fileNames[fnb] ; ++fnb) {
totalSize += strlen(table->fileNames[fnb]) + 1; /* +1 to add '\0' at the end of each fileName */
}
return totalSize;
}
FileNamesTable*
UTIL_mergeFileNamesTable(FileNamesTable* table1, FileNamesTable* table2)
{
unsigned newTableIdx = 0;
size_t pos = 0;
size_t newTotalTableSize;
char* buf;
FileNamesTable* const newTable = UTIL_assembleFileNamesTable(NULL, 0, NULL);
CONTROL( newTable != NULL );
newTotalTableSize = getTotalTableSize(table1) + getTotalTableSize(table2);
buf = (char*) calloc(newTotalTableSize, sizeof(*buf));
CONTROL ( buf != NULL );
newTable->buf = buf;
newTable->tableSize = table1->tableSize + table2->tableSize;
newTable->fileNames = (const char **) calloc(newTable->tableSize, sizeof(*(newTable->fileNames)));
CONTROL ( newTable->fileNames != NULL );
{ unsigned idx1;
for( idx1=0 ; (idx1 < table1->tableSize) && table1->fileNames[idx1] && (pos < newTotalTableSize); ++idx1, ++newTableIdx) {
size_t const curLen = strlen(table1->fileNames[idx1]);
memcpy(buf+pos, table1->fileNames[idx1], curLen);
assert(newTableIdx <= newTable->tableSize);
newTable->fileNames[newTableIdx] = buf+pos;
pos += curLen+1;
} }
{ unsigned idx2;
for( idx2=0 ; (idx2 < table2->tableSize) && table2->fileNames[idx2] && (pos < newTotalTableSize) ; ++idx2, ++newTableIdx) {
size_t const curLen = strlen(table2->fileNames[idx2]);
memcpy(buf+pos, table2->fileNames[idx2], curLen);
assert(newTableIdx <= newTable->tableSize);
newTable->fileNames[newTableIdx] = buf+pos;
pos += curLen+1;
} }
assert(pos <= newTotalTableSize);
newTable->tableSize = newTableIdx;
UTIL_freeFileNamesTable(table1);
UTIL_freeFileNamesTable(table2);
return newTable;
}
#ifdef _WIN32
int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
static int UTIL_prepareFileList(const char* dirName,
char** bufStart, size_t* pos,
char** bufEnd, int followLinks)
{
char* path;
int dirLength, fnameLength, pathLength, nbFiles = 0;
size_t dirLength, pathLength;
int nbFiles = 0;
WIN32_FIND_DATAA cFile;
HANDLE hFile;
dirLength = (int)strlen(dirName);
dirLength = strlen(dirName);
path = (char*) malloc(dirLength + 3);
if (!path) return 0;
@ -232,7 +483,7 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
free(path);
do {
fnameLength = (int)strlen(cFile.cFileName);
size_t const fnameLength = strlen(cFile.cFileName);
path = (char*) malloc(dirLength + fnameLength + 2);
if (!path) { FindClose(hFile); return 0; }
memcpy(path, dirName, dirLength);
@ -260,8 +511,7 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
memcpy(*bufStart + *pos, path, pathLength+1 /* include final \0 */);
*pos += pathLength + 1;
nbFiles++;
}
}
} }
free(path);
} while (FindNextFileA(hFile, &cFile));
@ -271,12 +521,13 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
#elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */
int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
static int UTIL_prepareFileList(const char *dirName,
char** bufStart, size_t* pos,
char** bufEnd, int followLinks)
{
DIR *dir;
struct dirent *entry;
char* path;
size_t dirLength, fnameLength, pathLength;
DIR* dir;
struct dirent * entry;
size_t dirLength;
int nbFiles = 0;
if (!(dir = opendir(dirName))) {
@ -287,6 +538,8 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
dirLength = strlen(dirName);
errno = 0;
while ((entry = readdir(dir)) != NULL) {
char* path;
size_t fnameLength, pathLength;
if (strcmp (entry->d_name, "..") == 0 ||
strcmp (entry->d_name, ".") == 0) continue;
fnameLength = strlen(entry->d_name);
@ -320,14 +573,13 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
memcpy(*bufStart + *pos, path, pathLength + 1); /* with final \0 */
*pos += pathLength + 1;
nbFiles++;
}
}
} }
free(path);
errno = 0; /* clear errno after UTIL_isDirectory, UTIL_prepareFileList */
}
if (errno != 0) {
UTIL_DISPLAYLEVEL(1, "readdir(%s) error: %s\n", dirName, strerror(errno));
UTIL_DISPLAYLEVEL(1, "readdir(%s) error: %s \n", dirName, strerror(errno));
free(*bufStart);
*bufStart = NULL;
}
@ -337,10 +589,12 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
#else
int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
static int UTIL_prepareFileList(const char *dirName,
char** bufStart, size_t* pos,
char** bufEnd, int followLinks)
{
(void)bufStart; (void)bufEnd; (void)pos; (void)followLinks;
UTIL_DISPLAYLEVEL(1, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName);
UTIL_DISPLAYLEVEL(1, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE) \n", dirName);
return 0;
}
@ -367,68 +621,68 @@ const char* UTIL_getFileExtension(const char* infilename)
return extension;
}
/*
* UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories,
* and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb).
* After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer)
* In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called.
*/
const char**
UTIL_createFileList(const char **inputNames, unsigned inputNamesNb,
char** allocatedBuffer, unsigned* allocatedNamesNb,
int followLinks)
FileNamesTable*
UTIL_createExpandedFNT(const char** inputNames, size_t nbIfns, int followLinks)
{
size_t pos;
unsigned i, nbFiles;
unsigned nbFiles;
char* buf = (char*)malloc(LIST_SIZE_INCREASE);
char* bufend = buf + LIST_SIZE_INCREASE;
if (!buf) return NULL;
for (i=0, pos=0, nbFiles=0; i<inputNamesNb; i++) {
if (!UTIL_isDirectory(inputNames[i])) {
size_t const len = strlen(inputNames[i]);
if (buf + pos + len >= bufend) {
ptrdiff_t newListSize = (bufend - buf) + LIST_SIZE_INCREASE;
assert(newListSize >= 0);
buf = (char*)UTIL_realloc(buf, (size_t)newListSize);
bufend = buf + newListSize;
if (!buf) return NULL;
}
if (buf + pos + len < bufend) {
memcpy(buf+pos, inputNames[i], len+1); /* including final \0 */
pos += len + 1;
nbFiles++;
}
} else {
nbFiles += (unsigned)UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend, followLinks);
if (buf == NULL) return NULL;
} }
{ size_t ifnNb, pos;
for (ifnNb=0, pos=0, nbFiles=0; ifnNb<nbIfns; ifnNb++) {
if (!UTIL_isDirectory(inputNames[ifnNb])) {
size_t const len = strlen(inputNames[ifnNb]);
if (buf + pos + len >= bufend) {
ptrdiff_t newListSize = (bufend - buf) + LIST_SIZE_INCREASE;
assert(newListSize >= 0);
buf = (char*)UTIL_realloc(buf, (size_t)newListSize);
if (!buf) return NULL;
bufend = buf + newListSize;
}
if (buf + pos + len < bufend) {
memcpy(buf+pos, inputNames[ifnNb], len+1); /* including final \0 */
pos += len + 1;
nbFiles++;
}
} else {
nbFiles += (unsigned)UTIL_prepareFileList(inputNames[ifnNb], &buf, &pos, &bufend, followLinks);
if (buf == NULL) return NULL;
} } }
if (nbFiles == 0) { free(buf); return NULL; }
{ const char** const fileTable = (const char**)malloc((nbFiles + 1) * sizeof(*fileTable));
if (!fileTable) { free(buf); return NULL; }
{ size_t ifnNb, pos;
const char** const fileNamesTable = (const char**)malloc((nbFiles + 1) * sizeof(*fileNamesTable));
if (!fileNamesTable) { free(buf); return NULL; }
for (i = 0, pos = 0; i < nbFiles; i++) {
fileTable[i] = buf + pos;
if (buf + pos > bufend) { free(buf); free((void*)fileTable); return NULL; }
pos += strlen(fileTable[i]) + 1;
for (ifnNb = 0, pos = 0; ifnNb < nbFiles; ifnNb++) {
fileNamesTable[ifnNb] = buf + pos;
if (buf + pos > bufend) { free(buf); free((void*)fileNamesTable); return NULL; }
pos += strlen(fileNamesTable[ifnNb]) + 1;
}
*allocatedBuffer = buf;
*allocatedNamesNb = nbFiles;
return fileTable;
return UTIL_assembleFileNamesTable(fileNamesTable, nbFiles, buf);
}
}
/*-****************************************
* Console log
******************************************/
int g_utilDisplayLevel;
void UTIL_expandFNT(FileNamesTable** fnt, int followLinks)
{
FileNamesTable* const newFNT = UTIL_createExpandedFNT((*fnt)->fileNames, (*fnt)->tableSize, followLinks);
UTIL_freeFileNamesTable(*fnt);
*fnt = newFNT;
}
FileNamesTable* UTIL_createFNT_fromROTable(const char** filenames, size_t nbFilenames)
{
size_t const sizeof_FNTable = nbFilenames * sizeof(*filenames);
const char** const newFNTable = (const char**)malloc(sizeof_FNTable);
if (newFNTable==NULL) return NULL;
memcpy((void*)newFNTable, filenames, sizeof_FNTable); /* void* : mitigate a Visual compiler bug or limitation */
return UTIL_assembleFileNamesTable(newFNTable, nbFilenames, NULL);
}
/*-****************************************
@ -483,8 +737,7 @@ int UTIL_countPhysicalCores(void)
}
} else {
done = TRUE;
}
}
} }
ptr = buffer;
@ -596,8 +849,7 @@ int UTIL_countPhysicalCores(void)
} else if (ferror(cpuinfo)) {
/* fall back on the sysconf value */
goto failed;
}
}
} }
if (siblings && cpu_cores) {
ratio = siblings / cpu_cores;
}

View File

@ -20,37 +20,23 @@ extern "C" {
* Dependencies
******************************************/
#include "platform.h" /* PLATFORM_POSIX_VERSION, ZSTD_NANOSLEEP_SUPPORT, ZSTD_SETPRIORITY_SUPPORT */
#include <stdlib.h> /* malloc, realloc, free */
#include <stddef.h> /* size_t, ptrdiff_t */
#include <stdio.h> /* fprintf */
#include <sys/types.h> /* stat, utime */
#include <sys/stat.h> /* stat, chmod */
#if defined(_WIN32)
# include <sys/utime.h> /* utime */
# include <io.h> /* _chmod */
#else
# include <unistd.h> /* chown, stat */
# if PLATFORM_POSIX_VERSION < 200809L
# include <utime.h> /* utime */
# else
# include <fcntl.h> /* AT_FDCWD */
# include <sys/stat.h> /* utimensat */
# endif
#endif
#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */
#include "mem.h" /* U32, U64 */
#include "mem.h" /* U64 */
/*-************************************************************
* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
***************************************************************/
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
# define UTIL_fseek _fseeki64
# define UTIL_fseek _fseeki64
#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
# define UTIL_fseek fseeko
#elif defined(__MINGW32__) && defined(__MSVCRT__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS)
# define UTIL_fseek fseeko64
# define UTIL_fseek fseeko64
#else
# define UTIL_fseek fseek
# define UTIL_fseek fseek
#endif
@ -106,8 +92,6 @@ extern "C" {
* Console log
******************************************/
extern int g_utilDisplayLevel;
#define UTIL_DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define UTIL_DISPLAYLEVEL(l, ...) { if (g_utilDisplayLevel>=l) { UTIL_DISPLAY(__VA_ARGS__); } }
/*-****************************************
@ -131,7 +115,7 @@ int UTIL_isFIFO(const char* infilename);
#define UTIL_FILESIZE_UNKNOWN ((U64)(-1))
U64 UTIL_getFileSize(const char* infilename);
U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbFiles);
U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles);
int UTIL_getFileStat(const char* infilename, stat_t* statbuf);
int UTIL_setFileStat(const char* filename, stat_t* statbuf);
int UTIL_chmod(char const* filename, mode_t permissions); /*< like chmod, but avoid changing permission of /dev/null */
@ -139,47 +123,111 @@ int UTIL_compareStr(const void *p1, const void *p2);
const char* UTIL_getFileExtension(const char* infilename);
/*
* A modified version of realloc().
* If UTIL_realloc() fails the original block is freed.
*/
UTIL_STATIC void* UTIL_realloc(void* ptr, size_t size)
{
void* const newptr = realloc(ptr, size);
if (newptr) return newptr;
free(ptr);
return NULL;
}
/*-****************************************
* Lists of Filenames
******************************************/
int UTIL_prepareFileList(const char* dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks);
typedef struct
{ const char** fileNames;
char* buf; /* fileNames are stored in this buffer (or are read-only) */
size_t tableSize; /* nb of fileNames */
size_t tableCapacity;
} FileNamesTable;
/*! UTIL_createFileNamesTable_fromFileName() :
* read filenames from @inputFileName, and store them into returned object.
* @return : a FileNamesTable*, or NULL in case of error (ex: @inputFileName doesn't exist).
* Note: inputFileSize must be less than 50MB
*/
FileNamesTable*
UTIL_createFileNamesTable_fromFileName(const char* inputFileName);
/*! UTIL_assembleFileNamesTable() :
* This function takes ownership of its arguments, @filenames and @buf,
* and store them inside the created object.
* note : this function never fails,
* it will rather exit() the program if internal allocation fails.
* @return : resulting FileNamesTable* object.
*/
FileNamesTable*
UTIL_assembleFileNamesTable(const char** filenames, size_t tableSize, char* buf);
/*! UTIL_freeFileNamesTable() :
* This function is compatible with NULL argument and never fails.
*/
void UTIL_freeFileNamesTable(FileNamesTable* table);
/*! UTIL_mergeFileNamesTable():
* @return : FileNamesTable*, concatenation of @table1 and @table2
* note: @table1 and @table2 are consumed (freed) by this operation
*/
FileNamesTable*
UTIL_mergeFileNamesTable(FileNamesTable* table1, FileNamesTable* table2);
/*! UTIL_expandFNT() :
* read names from @fnt, and expand those corresponding to directories
* update @fnt, now containing only file names,
* @return : 0 in case of success, 1 if error
* note : in case of error, @fnt[0] is NULL
*/
void UTIL_expandFNT(FileNamesTable** fnt, int followLinks);
/*! UTIL_createFNT_fromROTable() :
* copy the @filenames pointer table inside the returned object.
* The names themselves are still stored in their original buffer, which must outlive the object.
* @return : a FileNamesTable* object,
* or NULL in case of error
*/
FileNamesTable*
UTIL_createFNT_fromROTable(const char** filenames, size_t nbFilenames);
/*! UTIL_allocateFileNamesTable() :
* Allocates a table of const char*, to insert read-only names later on.
* The created FileNamesTable* doesn't hold a buffer.
* @return : FileNamesTable*, or NULL, if allocation fails.
*/
FileNamesTable* UTIL_allocateFileNamesTable(size_t tableSize);
/*! UTIL_refFilename() :
* Add a reference to read-only name into @fnt table.
* As @filename is only referenced, its lifetime must outlive @fnt.
* Internal table must be large enough to reference a new member,
* otherwise its UB (protected by an `assert()`).
*/
void UTIL_refFilename(FileNamesTable* fnt, const char* filename);
/* UTIL_createExpandedFNT() is only active if UTIL_HAS_CREATEFILELIST is defined.
* Otherwise, UTIL_createExpandedFNT() is a shell function which does nothing
* apart from displaying a warning message.
*/
#ifdef _WIN32
# define UTIL_HAS_CREATEFILELIST
#elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */
# define UTIL_HAS_CREATEFILELIST
# include <dirent.h> /* opendir, readdir */
# include <string.h> /* strerror, memcpy */
#else
#endif /* #ifdef _WIN32 */
/* do not define UTIL_HAS_CREATEFILELIST */
#endif
/*
* UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories,
* and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb).
* After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer)
* In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called.
/*! UTIL_createExpandedFNT() :
* read names from @filenames, and expand those corresponding to directories.
* links are followed or not depending on @followLinks directive.
* @return : an expanded FileNamesTable*, where each name is a file
* or NULL in case of error
*/
const char**
UTIL_createFileList(const char **inputNames, unsigned inputNamesNb,
char** allocatedBuffer, unsigned* allocatedNamesNb,
int followLinks);
FileNamesTable*
UTIL_createExpandedFNT(const char** filenames, size_t nbFilenames, int followLinks);
UTIL_STATIC void UTIL_freeFileList(const char** filenameTable, char* allocatedBuffer)
{
if (allocatedBuffer) free(allocatedBuffer);
if (filenameTable) free((void*)filenameTable);
}
/*-****************************************
* System
******************************************/
int UTIL_countPhysicalCores(void);
#if defined (__cplusplus)
}
#endif

View File

@ -1,5 +1,5 @@
.
.TH "ZSTD" "1" "October 2019" "zstd 1.4.4" "User Commands"
.TH "ZSTD" "1" "November 2019" "zstd 1.4.4" "User Commands"
.
.SH "NAME"
\fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
@ -95,120 +95,97 @@ Display information related to a zstd compressed file, such as size, ratio, and
.
.SS "Operation modifiers"
.
.TP
\fB\-#\fR
\fB#\fR compression level [1\-19] (default: 3)
.IP "\(bu" 4
\fB\-#\fR: \fB#\fR compression level [1\-19] (default: 3)
.
.TP
\fB\-\-fast[=#]\fR
switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\.
.IP "\(bu" 4
\fB\-\-fast[=#]\fR: switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\.
.
.TP
\fB\-\-ultra\fR
unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\.
.IP "\(bu" 4
\fB\-\-ultra\fR: unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\.
.
.TP
\fB\-\-long[=#]\fR
enables long distance matching with \fB#\fR \fBwindowLog\fR, if not \fB#\fR is not present it defaults to \fB27\fR\. This increases the window size (\fBwindowLog\fR) and memory usage for both the compressor and decompressor\. This setting is designed to improve the compression ratio for files with long matches at a large distance\.
.IP "\(bu" 4
\fB\-\-long[=#]\fR: enables long distance matching with \fB#\fR \fBwindowLog\fR, if not \fB#\fR is not present it defaults to \fB27\fR\. This increases the window size (\fBwindowLog\fR) and memory usage for both the compressor and decompressor\. This setting is designed to improve the compression ratio for files with long matches at a large distance\.
.
.IP
Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \fB\-\-memory=windowSize\fR needs to be passed to the decompressor\.
.
.TP
\fB\-T#\fR, \fB\-\-threads=#\fR
Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==200\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
.IP "\(bu" 4
\fB\-T#\fR, \fB\-\-threads=#\fR: Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==200\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
.
.TP
\fB\-\-single\-thread\fR
Does not spawn a thread for compression, use a single thread for both I/O and compression\. In this mode, compression is serialized with I/O, which is slightly slower\. (This is different from \fB\-T1\fR, which spawns 1 compression thread in parallel of I/O)\. This mode is the only one available when multithread support is disabled\. Single\-thread mode features lower memory usage\. Final compressed result is slightly different from \fB\-T1\fR\.
.IP "\(bu" 4
\fB\-\-single\-thread\fR: Does not spawn a thread for compression, use a single thread for both I/O and compression\. In this mode, compression is serialized with I/O, which is slightly slower\. (This is different from \fB\-T1\fR, which spawns 1 compression thread in parallel of I/O)\. This mode is the only one available when multithread support is disabled\. Single\-thread mode features lower memory usage\. Final compressed result is slightly different from \fB\-T1\fR\.
.
.TP
\fB\-\-adapt[=min=#,max=#]\fR
\fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\. \fInote\fR : at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\.
.IP "\(bu" 4
\fB\-\-adapt[=min=#,max=#]\fR : \fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\. \fInote\fR : at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\.
.
.TP
\fB\-\-stream\-size=#\fR
Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\.
.IP "\(bu" 4
\fB\-\-stream\-size=#\fR : Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\.
.
.TP
\fB\-\-size\-hint=#\fR
When handling input from a stream, \fBzstd\fR must guess how large the source size will be when optimizing compression parameters\. If the stream size is relatively small, this guess may be a poor one, resulting in a higher compression ratio than expected\. This feature allows for controlling the guess when needed\. Exact guesses result in better compression ratios\. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation\.
.IP "\(bu" 4
\fB\-\-size\-hint=#\fR: When handling input from a stream, \fBzstd\fR must guess how large the source size will be when optimizing compression parameters\. If the stream size is relatively small, this guess may be a poor one, resulting in a higher compression ratio than expected\. This feature allows for controlling the guess when needed\. Exact guesses result in better compression ratios\. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation\.
.
.TP
\fB\-\-rsyncable\fR
\fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and the faster compression levels will see a small compression speed hit\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your milage may vary\.
.IP "\(bu" 4
\fB\-\-rsyncable\fR : \fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and the faster compression levels will see a small compression speed hit\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your milage may vary\.
.
.TP
\fB\-D file\fR
use \fBfile\fR as Dictionary to compress or decompress FILE(s)
.IP "\(bu" 4
\fB\-D file\fR: use \fBfile\fR as Dictionary to compress or decompress FILE(s)
.
.TP
\fB\-\-no\-dictID\fR
do not store dictionary ID within frame header (dictionary compression)\. The decoder will have to rely on implicit knowledge about which dictionary to use, it won\'t be able to check if it\'s correct\.
.IP "\(bu" 4
\fB\-\-no\-dictID\fR: do not store dictionary ID within frame header (dictionary compression)\. The decoder will have to rely on implicit knowledge about which dictionary to use, it won\'t be able to check if it\'s correct\.
.
.TP
\fB\-o file\fR
save result into \fBfile\fR (only possible with a single \fIINPUT\-FILE\fR)
.IP "\(bu" 4
\fB\-o file\fR: save result into \fBfile\fR (only possible with a single \fIINPUT\-FILE\fR)
.
.TP
\fB\-f\fR, \fB\-\-force\fR
overwrite output without prompting, and (de)compress symbolic links
.IP "\(bu" 4
\fB\-f\fR, \fB\-\-force\fR: overwrite output without prompting, and (de)compress symbolic links
.
.TP
\fB\-c\fR, \fB\-\-stdout\fR
force write to standard output, even if it is the console
.IP "\(bu" 4
\fB\-c\fR, \fB\-\-stdout\fR: force write to standard output, even if it is the console
.
.TP
\fB\-\-[no\-]sparse\fR
enable / disable sparse FS support, to make files with many zeroes smaller on disk\. Creating sparse files may save disk space and speed up decompression by reducing the amount of disk I/O\. default: enabled when output is into a file, and disabled when output is stdout\. This setting overrides default and can force sparse mode over stdout\.
.IP "\(bu" 4
\fB\-\-[no\-]sparse\fR: enable / disable sparse FS support, to make files with many zeroes smaller on disk\. Creating sparse files may save disk space and speed up decompression by reducing the amount of disk I/O\. default: enabled when output is into a file, and disabled when output is stdout\. This setting overrides default and can force sparse mode over stdout\.
.
.TP
\fB\-\-rm\fR
remove source file(s) after successful compression or decompression
.IP "\(bu" 4
\fB\-\-rm\fR: remove source file(s) after successful compression or decompression
.
.TP
\fB\-k\fR, \fB\-\-keep\fR
keep source file(s) after successful compression or decompression\. This is the default behavior\.
.IP "\(bu" 4
\fB\-k\fR, \fB\-\-keep\fR: keep source file(s) after successful compression or decompression\. This is the default behavior\.
.
.TP
\fB\-r\fR
operate recursively on directories
.IP "\(bu" 4
\fB\-r\fR: operate recursively on directories
.
.TP
\fB\-\-output\-dir\-flat[=dir]\fR
resulting files are stored into target \fBdir\fR directory, instead of same directory as origin file\. Be aware that this command can introduce name collision issues, if multiple files, from different directories, end up having the same name\. Collision resolution ensures first file with a given name will be present in \fBdir\fR, while in combination with \fB\-f\fR, the last file will be present instead\.
.IP "\(bu" 4
\fB\-\-filelist=FILE\fR read a list of files to process as content from \fBFILE\fR\. Format is compatible with \fBls\fR output, with one file per line\.
.
.TP
\fB\-\-format=FORMAT\fR
compress and decompress in other formats\. If compiled with support, zstd can compress to or decompress from other compression algorithm formats\. Possibly available options are \fBzstd\fR, \fBgzip\fR, \fBxz\fR, \fBlzma\fR, and \fBlz4\fR\. If no such format is provided, \fBzstd\fR is the default\.
.IP "\(bu" 4
\fB\-\-output\-dir\-flat[=dir]\fR: resulting files are stored into target \fBdir\fR directory, instead of same directory as origin file\. Be aware that this command can introduce name collision issues, if multiple files, from different directories, end up having the same name\. Collision resolution ensures first file with a given name will be present in \fBdir\fR, while in combination with \fB\-f\fR, the last file will be present instead\.
.
.TP
\fB\-h\fR/\fB\-H\fR, \fB\-\-help\fR
display help/long help and exit
.IP "\(bu" 4
\fB\-\-format=FORMAT\fR: compress and decompress in other formats\. If compiled with support, zstd can compress to or decompress from other compression algorithm formats\. Possibly available options are \fBzstd\fR, \fBgzip\fR, \fBxz\fR, \fBlzma\fR, and \fBlz4\fR\. If no such format is provided, \fBzstd\fR is the default\.
.
.TP
\fB\-V\fR, \fB\-\-version\fR
display version number and exit\. Advanced : \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\.
.IP "\(bu" 4
\fB\-h\fR/\fB\-H\fR, \fB\-\-help\fR: display help/long help and exit
.
.TP
\fB\-v\fR
verbose mode
.IP "\(bu" 4
\fB\-V\fR, \fB\-\-version\fR: display version number and exit\. Advanced : \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\.
.
.TP
\fB\-q\fR, \fB\-\-quiet\fR
suppress warnings, interactivity, and notifications\. specify twice to suppress errors too\.
.IP "\(bu" 4
\fB\-v\fR: verbose mode
.
.TP
\fB\-\-no\-progress\fR
do not display the progress bar, but keep all other messages\.
.IP "\(bu" 4
\fB\-q\fR, \fB\-\-quiet\fR: suppress warnings, interactivity, and notifications\. specify twice to suppress errors too\.
.
.TP
\fB\-C\fR, \fB\-\-[no\-]check\fR
add integrity check computed from uncompressed data (default: enabled)
.IP "\(bu" 4
\fB\-\-no\-progress\fR: do not display the progress bar, but keep all other messages\.
.
.TP
\fB\-\-\fR
All arguments after \fB\-\-\fR are treated as files
.IP "\(bu" 4
\fB\-C\fR, \fB\-\-[no\-]check\fR: add integrity check computed from uncompressed data (default: enabled)
.
.IP "\(bu" 4
\fB\-\-\fR: All arguments after \fB\-\-\fR are treated as files
.
.IP "" 0
.
.SS "Restricted usage of Environment Variables"
Using environment variables to set parameters has security implications\. Therefore, this avenue is intentionally restricted\. Only \fBZSTD_CLEVEL\fR is supported currently, for setting compression level\. \fBZSTD_CLEVEL\fR can be used to set the level between 1 and 19 (the "normal" range)\. If the value of \fBZSTD_CLEVEL\fR is not a valid integer, it will be ignored with a warning message\. \fBZSTD_CLEVEL\fR just replaces the default compression level (\fB3\fR)\. It can be overridden by corresponding command line arguments\.

View File

@ -191,6 +191,9 @@ the last one takes effect.
This is the default behavior.
* `-r`:
operate recursively on directories
* `--filelist=FILE`
read a list of files to process as content from `FILE`.
Format is compatible with `ls` output, with one file per line.
* `--output-dir-flat[=dir]`:
resulting files are stored into target `dir` directory,
instead of same directory as origin file.

View File

@ -38,8 +38,7 @@
#ifndef ZSTD_NODICT
# include "dibio.h" /* ZDICT_cover_params_t, DiB_trainFromFiles() */
#endif
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_minCLevel */
#include "zstd.h" /* ZSTD_VERSION_STRING, ZSTD_maxCLevel */
#include "zstd.h" /* ZSTD_VERSION_STRING, ZSTD_minCLevel, ZSTD_maxCLevel */
/*-************************************
@ -156,7 +155,8 @@ static int usage_advanced(const char* programName)
#endif
#ifdef UTIL_HAS_CREATEFILELIST
DISPLAY( " -r : operate recursively on directories \n");
DISPLAY( "--output-dir-flat[=directory]: all resulting files stored into `directory`. \n");
DISPLAY( "--filelist=FILE : read a list of files from FILE. \n");
DISPLAY( "--output-dir-flat=DIR : all resulting files are stored into DIR. \n");
#endif
DISPLAY( "--format=zstd : compress files to the .zst format (default) \n");
#ifdef ZSTD_GZCOMPRESS
@ -585,8 +585,8 @@ int main(int argCount, const char* argv[])
int cLevelLast = -1000000000;
unsigned recursive = 0;
unsigned memLimit = 0;
const char** filenameTable = (const char**)malloc((size_t)argCount * sizeof(const char*)); /* argCount >= 1 */
unsigned filenameIdx = 0;
FileNamesTable* filenames = UTIL_allocateFileNamesTable((size_t)argCount); /* argCount >= 1 */
FileNamesTable* file_of_names = UTIL_allocateFileNamesTable((size_t)argCount); /* argCount >= 1 */
const char* programName = argv[0];
const char* outFileName = NULL;
const char* outDirName = NULL;
@ -599,11 +599,6 @@ int main(int argCount, const char* argv[])
size_t srcSizeHint = 0;
int dictCLevel = g_defaultDictCLevel;
unsigned dictSelect = g_defaultSelectivityLevel;
#ifdef UTIL_HAS_CREATEFILELIST
const char** extendedFileList = NULL;
char* fileNamesBuf = NULL;
unsigned fileNamesNb;
#endif
#ifndef ZSTD_NODICT
ZDICT_cover_params_t coverParams = defaultCoverParams();
ZDICT_fastCover_params_t fastCoverParams = defaultFastCoverParams();
@ -618,8 +613,7 @@ int main(int argCount, const char* argv[])
/* init */
(void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */
(void)memLimit; /* not used when ZSTD_NODECOMPRESS set */
if (filenameTable==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); }
filenameTable[0] = stdinmark;
if ((filenames==NULL) || (file_of_names==NULL)) { DISPLAY("zstd: allocation error \n"); exit(1); }
g_displayOut = stderr;
cLevel = init_cLevel();
programName = lastNameFromPath(programName);
@ -649,312 +643,317 @@ int main(int argCount, const char* argv[])
/* command switches */
for (argNb=1; argNb<argCount; argNb++) {
const char* argument = argv[argNb];
if(!argument) continue; /* Protection if argument empty */
if (!argument) continue; /* Protection if argument empty */
if (nextArgumentsAreFiles==0) {
/* "-" means stdin/stdout */
if (!strcmp(argument, "-")){
if (!filenameIdx) {
filenameIdx=1, filenameTable[0]=stdinmark;
outFileName=stdoutmark;
g_displayLevel-=(g_displayLevel==2);
continue;
} }
if (nextArgumentsAreFiles) {
UTIL_refFilename(filenames, argument);
continue;
}
/* Decode commands (note : aggregated commands are allowed) */
if (argument[0]=='-') {
/* "-" means stdin/stdout */
if (!strcmp(argument, "-")){
UTIL_refFilename(filenames, stdinmark);
continue;
}
if (argument[1]=='-') {
/* long commands (--long-word) */
if (!strcmp(argument, "--")) { nextArgumentsAreFiles=1; continue; } /* only file names allowed from now on */
if (!strcmp(argument, "--list")) { operation=zom_list; continue; }
if (!strcmp(argument, "--compress")) { operation=zom_compress; continue; }
if (!strcmp(argument, "--decompress")) { operation=zom_decompress; continue; }
if (!strcmp(argument, "--uncompress")) { operation=zom_decompress; continue; }
if (!strcmp(argument, "--force")) { FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; continue; }
if (!strcmp(argument, "--version")) { g_displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); }
if (!strcmp(argument, "--help")) { g_displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); }
if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; }
if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; }
if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; g_displayLevel-=(g_displayLevel==2); continue; }
if (!strcmp(argument, "--ultra")) { ultra=1; continue; }
if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(prefs, 2); continue; }
if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(prefs, 0); continue; }
if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(prefs, 2); continue; }
if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(prefs, 0); continue; }
if (!strcmp(argument, "--test")) { operation=zom_test; continue; }
if (!strcmp(argument, "--train")) { operation=zom_train; if (outFileName==NULL) outFileName=g_defaultDictName; continue; }
if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(prefs, 0); continue; }
if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(prefs, 0); continue; }
if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; }
if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; }
if (!strcmp(argument, "--output-dir-flat")) {nextArgumentIsOutDirName=1; lastCommand=1; continue; }
if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }
if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) CLEAN_RETURN(badusage(programName)); continue; }
if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; FIO_setCompressionType(prefs, FIO_zstdCompression); continue; }
/* Decode commands (note : aggregated commands are allowed) */
if (argument[0]=='-') {
if (argument[1]=='-') {
/* long commands (--long-word) */
if (!strcmp(argument, "--")) { nextArgumentsAreFiles=1; continue; } /* only file names allowed from now on */
if (!strcmp(argument, "--list")) { operation=zom_list; continue; }
if (!strcmp(argument, "--compress")) { operation=zom_compress; continue; }
if (!strcmp(argument, "--decompress")) { operation=zom_decompress; continue; }
if (!strcmp(argument, "--uncompress")) { operation=zom_decompress; continue; }
if (!strcmp(argument, "--force")) { FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; continue; }
if (!strcmp(argument, "--version")) { g_displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); }
if (!strcmp(argument, "--help")) { g_displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); }
if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; }
if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; }
if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; g_displayLevel-=(g_displayLevel==2); continue; }
if (!strcmp(argument, "--ultra")) { ultra=1; continue; }
if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(prefs, 2); continue; }
if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(prefs, 0); continue; }
if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(prefs, 2); continue; }
if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(prefs, 0); continue; }
if (!strcmp(argument, "--test")) { operation=zom_test; continue; }
if (!strcmp(argument, "--train")) { operation=zom_train; if (outFileName==NULL) outFileName=g_defaultDictName; continue; }
if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(prefs, 0); continue; }
if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(prefs, 0); continue; }
if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; }
if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; }
if (!strcmp(argument, "--output-dir-flat")) {nextArgumentIsOutDirName=1; lastCommand=1; continue; }
if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }
if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) CLEAN_RETURN(badusage(programName)); continue; }
if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; FIO_setCompressionType(prefs, FIO_zstdCompression); continue; }
#ifdef ZSTD_GZCOMPRESS
if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); continue; }
if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); continue; }
#endif
#ifdef ZSTD_LZMACOMPRESS
if (!strcmp(argument, "--format=lzma")) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression); continue; }
if (!strcmp(argument, "--format=xz")) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression); continue; }
if (!strcmp(argument, "--format=lzma")) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression); continue; }
if (!strcmp(argument, "--format=xz")) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression); continue; }
#endif
#ifdef ZSTD_LZ4COMPRESS
if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression); continue; }
if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression); continue; }
#endif
if (!strcmp(argument, "--rsyncable")) { rsyncable = 1; continue; }
if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_lcm_huffman; continue; }
if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; }
if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; }
if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; }
/* long commands with arguments */
if (!strcmp(argument, "--rsyncable")) { rsyncable = 1; continue; }
if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_lcm_huffman; continue; }
if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; }
if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; }
if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; }
/* long commands with arguments */
#ifndef ZSTD_NODICT
if (longCommandWArg(&argument, "--train-cover")) {
operation = zom_train;
if (outFileName == NULL)
outFileName = g_defaultDictName;
dict = cover;
/* Allow optional arguments following an = */
if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
else if (!parseCoverParameters(argument, &coverParams)) { CLEAN_RETURN(badusage(programName)); }
continue;
}
if (longCommandWArg(&argument, "--train-fastcover")) {
operation = zom_train;
if (outFileName == NULL)
outFileName = g_defaultDictName;
dict = fastCover;
/* Allow optional arguments following an = */
if (*argument == 0) { memset(&fastCoverParams, 0, sizeof(fastCoverParams)); }
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
else if (!parseFastCoverParameters(argument, &fastCoverParams)) { CLEAN_RETURN(badusage(programName)); }
continue;
}
if (longCommandWArg(&argument, "--train-legacy")) {
operation = zom_train;
if (outFileName == NULL)
outFileName = g_defaultDictName;
dict = legacy;
/* Allow optional arguments following an = */
if (*argument == 0) { continue; }
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
else if (!parseLegacyParameters(argument, &dictSelect)) { CLEAN_RETURN(badusage(programName)); }
continue;
}
if (longCommandWArg(&argument, "--train-cover")) {
operation = zom_train;
if (outFileName == NULL)
outFileName = g_defaultDictName;
dict = cover;
/* Allow optional arguments following an = */
if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
else if (!parseCoverParameters(argument, &coverParams)) { CLEAN_RETURN(badusage(programName)); }
continue;
}
if (longCommandWArg(&argument, "--train-fastcover")) {
operation = zom_train;
if (outFileName == NULL)
outFileName = g_defaultDictName;
dict = fastCover;
/* Allow optional arguments following an = */
if (*argument == 0) { memset(&fastCoverParams, 0, sizeof(fastCoverParams)); }
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
else if (!parseFastCoverParameters(argument, &fastCoverParams)) { CLEAN_RETURN(badusage(programName)); }
continue;
}
if (longCommandWArg(&argument, "--train-legacy")) {
operation = zom_train;
if (outFileName == NULL)
outFileName = g_defaultDictName;
dict = legacy;
/* Allow optional arguments following an = */
if (*argument == 0) { continue; }
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
else if (!parseLegacyParameters(argument, &dictSelect)) { CLEAN_RETURN(badusage(programName)); }
continue;
}
#endif
if (longCommandWArg(&argument, "--threads=")) { nbWorkers = (int)readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--memlimit=")) { memLimit = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--memory=")) { memLimit = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--memlimit-decompress=")) { memLimit = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--block-size=")) { blockSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--output-dir-flat=")) { outDirName = argument; continue; }
if (longCommandWArg(&argument, "--long")) {
unsigned ldmWindowLog = 0;
ldmFlag = 1;
/* Parse optional window log */
if (*argument == '=') {
++argument;
ldmWindowLog = readU32FromChar(&argument);
} else if (*argument != 0) {
/* Invalid character following --long */
CLEAN_RETURN(badusage(programName));
}
/* Only set windowLog if not already set by --zstd */
if (compressionParams.windowLog == 0)
compressionParams.windowLog = ldmWindowLog;
continue;
if (longCommandWArg(&argument, "--threads=")) { nbWorkers = (int)readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--memlimit=")) { memLimit = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--memory=")) { memLimit = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--memlimit-decompress=")) { memLimit = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--block-size=")) { blockSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--output-dir-flat=")) { outDirName = argument; continue; }
if (longCommandWArg(&argument, "--long")) {
unsigned ldmWindowLog = 0;
ldmFlag = 1;
/* Parse optional window log */
if (*argument == '=') {
++argument;
ldmWindowLog = readU32FromChar(&argument);
} else if (*argument != 0) {
/* Invalid character following --long */
CLEAN_RETURN(badusage(programName));
}
/* Only set windowLog if not already set by --zstd */
if (compressionParams.windowLog == 0)
compressionParams.windowLog = ldmWindowLog;
continue;
}
#ifndef ZSTD_NOCOMPRESS /* linking ZSTD_minCLevel() requires compression support */
if (longCommandWArg(&argument, "--fast")) {
/* Parse optional acceleration factor */
if (*argument == '=') {
U32 const maxFast = (U32)-ZSTD_minCLevel();
U32 fastLevel;
++argument;
fastLevel = readU32FromChar(&argument);
if (fastLevel > maxFast) fastLevel = maxFast;
if (fastLevel) {
dictCLevel = cLevel = -(int)fastLevel;
} else {
CLEAN_RETURN(badusage(programName));
}
} else if (*argument != 0) {
/* Invalid character following --fast */
CLEAN_RETURN(badusage(programName));
if (longCommandWArg(&argument, "--fast")) {
/* Parse optional acceleration factor */
if (*argument == '=') {
U32 const maxFast = (U32)-ZSTD_minCLevel();
U32 fastLevel;
++argument;
fastLevel = readU32FromChar(&argument);
if (fastLevel > maxFast) fastLevel = maxFast;
if (fastLevel) {
dictCLevel = cLevel = -(int)fastLevel;
} else {
cLevel = -1; /* default for --fast */
CLEAN_RETURN(badusage(programName));
}
continue;
} else if (*argument != 0) {
/* Invalid character following --fast */
CLEAN_RETURN(badusage(programName));
} else {
cLevel = -1; /* default for --fast */
}
continue;
}
#endif
/* fall-through, will trigger bad_usage() later on */
if (longCommandWArg(&argument, "--filelist=")) {
UTIL_refFilename(file_of_names, argument);
continue;
}
argument++;
while (argument[0]!=0) {
if (lastCommand) {
DISPLAY("error : command must be followed by argument \n");
CLEAN_RETURN(1);
}
/* fall-through, will trigger bad_usage() later on */
}
argument++;
while (argument[0]!=0) {
if (lastCommand) {
DISPLAY("error : command must be followed by argument \n");
CLEAN_RETURN(1);
}
#ifndef ZSTD_NOCOMPRESS
/* compression Level */
if ((*argument>='0') && (*argument<='9')) {
dictCLevel = cLevel = (int)readU32FromChar(&argument);
continue;
}
/* compression Level */
if ((*argument>='0') && (*argument<='9')) {
dictCLevel = cLevel = (int)readU32FromChar(&argument);
continue;
}
#endif
switch(argument[0])
{
/* Display help */
case 'V': g_displayOut=stdout; printVersion(); CLEAN_RETURN(0); /* Version Only */
case 'H':
case 'h': g_displayOut=stdout; CLEAN_RETURN(usage_advanced(programName));
switch(argument[0])
{
/* Display help */
case 'V': g_displayOut=stdout; printVersion(); CLEAN_RETURN(0); /* Version Only */
case 'H':
case 'h': g_displayOut=stdout; CLEAN_RETURN(usage_advanced(programName));
/* Compress */
case 'z': operation=zom_compress; argument++; break;
/* Compress */
case 'z': operation=zom_compress; argument++; break;
/* Decoding */
case 'd':
/* Decoding */
case 'd':
#ifndef ZSTD_NOBENCH
benchParams.mode = BMK_decodeOnly;
if (operation==zom_bench) { argument++; break; } /* benchmark decode (hidden option) */
benchParams.mode = BMK_decodeOnly;
if (operation==zom_bench) { argument++; break; } /* benchmark decode (hidden option) */
#endif
operation=zom_decompress; argument++; break;
operation=zom_decompress; argument++; break;
/* Force stdout, even if stdout==console */
case 'c': forceStdout=1; outFileName=stdoutmark; argument++; break;
/* Force stdout, even if stdout==console */
case 'c': forceStdout=1; outFileName=stdoutmark; argument++; break;
/* Use file content as dictionary */
case 'D': nextEntryIsDictionary = 1; lastCommand = 1; argument++; break;
/* Use file content as dictionary */
case 'D': nextEntryIsDictionary = 1; lastCommand = 1; argument++; break;
/* Overwrite */
case 'f': FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; argument++; break;
/* Overwrite */
case 'f': FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; argument++; break;
/* Verbose mode */
case 'v': g_displayLevel++; argument++; break;
/* Verbose mode */
case 'v': g_displayLevel++; argument++; break;
/* Quiet mode */
case 'q': g_displayLevel--; argument++; break;
/* Quiet mode */
case 'q': g_displayLevel--; argument++; break;
/* keep source file (default) */
case 'k': FIO_setRemoveSrcFile(prefs, 0); argument++; break;
/* keep source file (default) */
case 'k': FIO_setRemoveSrcFile(prefs, 0); argument++; break;
/* Checksum */
case 'C': FIO_setChecksumFlag(prefs, 2); argument++; break;
/* Checksum */
case 'C': FIO_setChecksumFlag(prefs, 2); argument++; break;
/* test compressed file */
case 't': operation=zom_test; argument++; break;
/* test compressed file */
case 't': operation=zom_test; argument++; break;
/* destination file name */
case 'o': nextArgumentIsOutFileName=1; lastCommand=1; argument++; break;
/* destination file name */
case 'o': nextArgumentIsOutFileName=1; lastCommand=1; argument++; break;
/* limit decompression memory */
case 'M':
argument++;
memLimit = readU32FromChar(&argument);
break;
case 'l': operation=zom_list; argument++; break;
/* limit decompression memory */
case 'M':
argument++;
memLimit = readU32FromChar(&argument);
break;
case 'l': operation=zom_list; argument++; break;
#ifdef UTIL_HAS_CREATEFILELIST
/* recursive */
case 'r': recursive=1; argument++; break;
/* recursive */
case 'r': recursive=1; argument++; break;
#endif
#ifndef ZSTD_NOBENCH
/* Benchmark */
case 'b':
operation=zom_bench;
argument++;
break;
/* Benchmark */
case 'b':
operation=zom_bench;
argument++;
break;
/* range bench (benchmark only) */
case 'e':
/* compression Level */
argument++;
cLevelLast = (int)readU32FromChar(&argument);
break;
/* range bench (benchmark only) */
case 'e':
/* compression Level */
argument++;
cLevelLast = (int)readU32FromChar(&argument);
break;
/* Modify Nb Iterations (benchmark only) */
case 'i':
argument++;
bench_nbSeconds = readU32FromChar(&argument);
break;
/* Modify Nb Iterations (benchmark only) */
case 'i':
argument++;
bench_nbSeconds = readU32FromChar(&argument);
break;
/* cut input into blocks (benchmark only) */
case 'B':
argument++;
blockSize = readU32FromChar(&argument);
break;
/* cut input into blocks (benchmark only) */
case 'B':
argument++;
blockSize = readU32FromChar(&argument);
break;
/* benchmark files separately (hidden option) */
case 'S':
argument++;
separateFiles = 1;
break;
/* benchmark files separately (hidden option) */
case 'S':
argument++;
separateFiles = 1;
break;
#endif /* ZSTD_NOBENCH */
/* nb of threads (hidden option) */
case 'T':
argument++;
nbWorkers = (int)readU32FromChar(&argument);
break;
/* Dictionary Selection level */
case 's':
argument++;
dictSelect = readU32FromChar(&argument);
break;
/* Pause at the end (-p) or set an additional param (-p#) (hidden option) */
case 'p': argument++;
#ifndef ZSTD_NOBENCH
if ((*argument>='0') && (*argument<='9')) {
benchParams.additionalParam = (int)readU32FromChar(&argument);
} else
#endif
main_pause=1;
break;
/* Select compressibility of synthetic sample */
case 'P':
{ argument++;
compressibility = (double)readU32FromChar(&argument) / 100;
}
/* nb of threads (hidden option) */
case 'T':
argument++;
nbWorkers = (int)readU32FromChar(&argument);
break;
/* unknown command */
default : CLEAN_RETURN(badusage(programName));
}
/* Dictionary Selection level */
case 's':
argument++;
dictSelect = readU32FromChar(&argument);
break;
/* Pause at the end (-p) or set an additional param (-p#) (hidden option) */
case 'p': argument++;
#ifndef ZSTD_NOBENCH
if ((*argument>='0') && (*argument<='9')) {
benchParams.additionalParam = (int)readU32FromChar(&argument);
} else
#endif
main_pause=1;
break;
/* Select compressibility of synthetic sample */
case 'P':
{ argument++;
compressibility = (double)readU32FromChar(&argument) / 100;
}
continue;
} /* if (argument[0]=='-') */
break;
if (nextArgumentIsMaxDict) { /* kept available for compatibility with old syntax ; will be removed one day */
nextArgumentIsMaxDict = 0;
lastCommand = 0;
maxDictSize = readU32FromChar(&argument);
continue;
/* unknown command */
default : CLEAN_RETURN(badusage(programName));
}
}
continue;
} /* if (argument[0]=='-') */
if (nextArgumentIsDictID) { /* kept available for compatibility with old syntax ; will be removed one day */
nextArgumentIsDictID = 0;
lastCommand = 0;
dictID = readU32FromChar(&argument);
continue;
}
if (nextArgumentIsMaxDict) { /* kept available for compatibility with old syntax ; will be removed one day */
nextArgumentIsMaxDict = 0;
lastCommand = 0;
maxDictSize = readU32FromChar(&argument);
continue;
}
} /* if (nextArgumentIsAFile==0) */
if (nextArgumentIsDictID) { /* kept available for compatibility with old syntax ; will be removed one day */
nextArgumentIsDictID = 0;
lastCommand = 0;
dictID = readU32FromChar(&argument);
continue;
}
if (nextEntryIsDictionary) {
nextEntryIsDictionary = 0;
@ -978,8 +977,8 @@ int main(int argCount, const char* argv[])
continue;
}
/* add filename to list */
filenameTable[filenameIdx++] = argument;
/* none of the above : add filename to list */
UTIL_refFilename(filenames, argument);
}
if (lastCommand) { /* forgotten argument */
@ -1003,35 +1002,45 @@ int main(int argCount, const char* argv[])
#ifdef UTIL_HAS_CREATEFILELIST
g_utilDisplayLevel = g_displayLevel;
if (!followLinks) {
unsigned u;
for (u=0, fileNamesNb=0; u<filenameIdx; u++) {
if ( UTIL_isLink(filenameTable[u])
&& !UTIL_isFIFO(filenameTable[u])
unsigned u, fileNamesNb;
unsigned const nbFilenames = (unsigned)filenames->tableSize;
for (u=0, fileNamesNb=0; u<nbFilenames; u++) {
if ( UTIL_isLink(filenames->fileNames[u])
&& !UTIL_isFIFO(filenames->fileNames[u])
) {
DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring\n", filenameTable[u]);
DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring \n", filenames->fileNames[u]);
} else {
filenameTable[fileNamesNb++] = filenameTable[u];
filenames->fileNames[fileNamesNb++] = filenames->fileNames[u];
} }
if (fileNamesNb == 0 && filenameIdx > 0)
if (fileNamesNb == 0 && nbFilenames > 0) /* all names are eliminated */
CLEAN_RETURN(1);
filenameIdx = fileNamesNb;
filenames->tableSize = fileNamesNb;
} /* if (!followLinks) */
/* read names from a file */
if (file_of_names->tableSize) {
size_t const nbFileLists = file_of_names->tableSize;
size_t flNb;
for (flNb=0; flNb < nbFileLists; flNb++) {
FileNamesTable* const fnt = UTIL_createFileNamesTable_fromFileName(file_of_names->fileNames[flNb]);
if (fnt==NULL) {
DISPLAYLEVEL(1, "zstd: error reading %s \n", file_of_names->fileNames[flNb]);
CLEAN_RETURN(1);
}
filenames = UTIL_mergeFileNamesTable(filenames, fnt);
}
}
if (recursive) { /* at this stage, filenameTable is a list of paths, which can contain both files and directories */
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf, &fileNamesNb, followLinks);
if (extendedFileList) {
unsigned u;
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
free((void*)filenameTable);
filenameTable = extendedFileList;
filenameIdx = fileNamesNb;
} }
UTIL_expandFNT(&filenames, followLinks);
}
#else
(void)followLinks;
#endif
if (operation == zom_list) {
#ifndef ZSTD_NODECOMPRESS
int const ret = FIO_listMultipleFiles(filenameIdx, filenameTable, g_displayLevel);
int const ret = FIO_listMultipleFiles((unsigned)filenames->tableSize, filenames->fileNames, g_displayLevel);
CLEAN_RETURN(ret);
#else
DISPLAY("file information is not supported \n");
@ -1062,18 +1071,18 @@ int main(int argCount, const char* argv[])
if (cLevelLast < cLevel) cLevelLast = cLevel;
if (cLevelLast > cLevel)
DISPLAYLEVEL(3, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
if(filenameIdx) {
if (filenames->tableSize > 0) {
if(separateFiles) {
unsigned i;
for(i = 0; i < filenameIdx; i++) {
for(i = 0; i < filenames->tableSize; i++) {
int c;
DISPLAYLEVEL(3, "Benchmarking %s \n", filenameTable[i]);
DISPLAYLEVEL(3, "Benchmarking %s \n", filenames->fileNames[i]);
for(c = cLevel; c <= cLevelLast; c++) {
BMK_benchFilesAdvanced(&filenameTable[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams);
BMK_benchFilesAdvanced(&filenames->fileNames[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams);
} }
} else {
for(; cLevel <= cLevelLast; cLevel++) {
BMK_benchFilesAdvanced(filenameTable, filenameIdx, dictFileName, cLevel, &compressionParams, g_displayLevel, &benchParams);
BMK_benchFilesAdvanced(filenames->fileNames, (unsigned)filenames->tableSize, dictFileName, cLevel, &compressionParams, g_displayLevel, &benchParams);
} }
} else {
for(; cLevel <= cLevelLast; cLevel++) {
@ -1097,18 +1106,18 @@ int main(int argCount, const char* argv[])
int const optimize = !coverParams.k || !coverParams.d;
coverParams.nbThreads = (unsigned)nbWorkers;
coverParams.zParams = zParams;
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, &coverParams, NULL, optimize);
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (unsigned)filenames->tableSize, blockSize, NULL, &coverParams, NULL, optimize);
} else if (dict == fastCover) {
int const optimize = !fastCoverParams.k || !fastCoverParams.d;
fastCoverParams.nbThreads = (unsigned)nbWorkers;
fastCoverParams.zParams = zParams;
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, NULL, &fastCoverParams, optimize);
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (unsigned)filenames->tableSize, blockSize, NULL, NULL, &fastCoverParams, optimize);
} else {
ZDICT_legacy_params_t dictParams;
memset(&dictParams, 0, sizeof(dictParams));
dictParams.selectivityLevel = dictSelect;
dictParams.zParams = zParams;
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, &dictParams, NULL, NULL, 0);
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (unsigned)filenames->tableSize, blockSize, &dictParams, NULL, NULL, 0);
}
#else
(void)dictCLevel; (void)dictSelect; (void)dictID; (void)maxDictSize; /* not used when ZSTD_NODICT set */
@ -1123,16 +1132,16 @@ int main(int argCount, const char* argv[])
#endif
/* No input filename ==> use stdin and stdout */
filenameIdx += !filenameIdx; /* filenameTable[0] is stdin by default */
if (!strcmp(filenameTable[0], stdinmark) && !outFileName)
if (filenames->tableSize == 0) UTIL_refFilename(filenames, stdinmark);
if (!strcmp(filenames->fileNames[0], stdinmark) && !outFileName)
outFileName = stdoutmark; /* when input is stdin, default output is stdout */
/* Check if input/output defined as console; trigger an error in this case */
if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) )
if (!strcmp(filenames->fileNames[0], stdinmark) && IS_CONSOLE(stdin) )
CLEAN_RETURN(badusage(programName));
if ( outFileName && !strcmp(outFileName, stdoutmark)
&& IS_CONSOLE(stdout)
&& !strcmp(filenameTable[0], stdinmark)
&& !strcmp(filenames->fileNames[0], stdinmark)
&& !forceStdout
&& operation!=zom_decompress )
CLEAN_RETURN(badusage(programName));
@ -1147,8 +1156,8 @@ int main(int argCount, const char* argv[])
#endif
/* No status message in pipe mode (stdin - stdout) or multi-files mode */
if (!strcmp(filenameTable[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (g_displayLevel==2)) g_displayLevel=1;
if ((filenameIdx>1) & (g_displayLevel==2)) g_displayLevel=1;
if (!strcmp(filenames->fileNames[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (g_displayLevel==2)) g_displayLevel=1;
if ((filenames->tableSize > 1) & (g_displayLevel==2)) g_displayLevel=1;
/* IO Stream/File */
FIO_setNotificationLevel(g_displayLevel);
@ -1173,10 +1182,10 @@ int main(int argCount, const char* argv[])
if (adaptMin > cLevel) cLevel = adaptMin;
if (adaptMax < cLevel) cLevel = adaptMax;
if ((filenameIdx==1) && outFileName)
operationResult = FIO_compressFilename(prefs, outFileName, filenameTable[0], dictFileName, cLevel, compressionParams);
if ((filenames->tableSize==1) && outFileName)
operationResult = FIO_compressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName, cLevel, compressionParams);
else
operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams);
operationResult = FIO_compressMultipleFilenames(prefs, filenames->fileNames, (unsigned)filenames->tableSize, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams);
#else
(void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */
DISPLAY("Compression not supported \n");
@ -1191,10 +1200,11 @@ int main(int argCount, const char* argv[])
}
}
FIO_setMemLimit(prefs, memLimit);
if (filenameIdx==1 && outFileName)
operationResult = FIO_decompressFilename(prefs, outFileName, filenameTable[0], dictFileName);
else
operationResult = FIO_decompressMultipleFilenames(prefs, filenameTable, filenameIdx, outDirName, outFileName, dictFileName);
if (filenames->tableSize == 1 && outFileName) {
operationResult = FIO_decompressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName);
} else {
operationResult = FIO_decompressMultipleFilenames(prefs, filenames->fileNames, (unsigned)filenames->tableSize, outDirName, outFileName, dictFileName);
}
#else
DISPLAY("Decompression not supported \n");
#endif
@ -1202,13 +1212,9 @@ int main(int argCount, const char* argv[])
_end:
FIO_freePreferences(prefs);
if (main_pause) waitEnter();
#ifdef UTIL_HAS_CREATEFILELIST
if (extendedFileList)
UTIL_freeFileList(extendedFileList, fileNamesBuf);
else
#endif
free((void*)filenameTable);
UTIL_freeFileNamesTable(filenames);
UTIL_freeFileNamesTable(file_of_names);
return operationResult;
}

View File

@ -18,24 +18,26 @@
int main(int argc, char const **argv) {
size_t const kMaxFileSize = (size_t)1 << 27;
int const kFollowLinks = 1;
char *fileNamesBuf = NULL;
char const **files = argv + 1;
unsigned numFiles = argc - 1;
FileNamesTable* files;
const char** const fnTable = argv + 1;
unsigned numFiles = (unsigned)(argc - 1);
uint8_t *buffer = NULL;
size_t bufferSize = 0;
unsigned i;
int ret;
#ifdef UTIL_HAS_CREATEFILELIST
files = UTIL_createFileList(files, numFiles, &fileNamesBuf, &numFiles,
kFollowLinks);
if (!files)
numFiles = 0;
files = UTIL_createExpandedFNT(fnTable, numFiles, kFollowLinks);
if (!files) numFiles = 0;
#else
files = UTIL_createFNT_fromROTable(fnTable, numFiles);
if (!files) numFiles = 0;
assert(numFiles == files->tableSize);
#endif
if (numFiles == 0)
fprintf(stderr, "WARNING: No files passed to %s\n", argv[0]);
for (i = 0; i < numFiles; ++i) {
char const *fileName = files[i];
char const *fileName = files->fileNames[i];
DEBUGLOG(3, "Running %s", fileName);
size_t const fileSize = UTIL_getFileSize(fileName);
size_t readSize;
@ -70,8 +72,6 @@ int main(int argc, char const **argv) {
ret = 0;
free(buffer);
#ifdef UTIL_HAS_CREATEFILELIST
UTIL_freeFileList(files, fileNamesBuf);
#endif
UTIL_freeFileNamesTable(files);
return ret;
}

View File

@ -114,6 +114,7 @@ else
fi
println "\n===> simple tests "
./datagen > tmp
@ -277,6 +278,7 @@ $ZSTD -f tmp && die "attempt to compress a non existing file"
test -f tmp.zst # destination file should still be present
rm -rf tmp* # may also erase tmp* directory from previous failed run
println "\n===> decompression only tests "
# the following test verifies that the decoder is compatible with RLE as first block
# older versions of zstd cli are not able to decode such corner case.
@ -286,7 +288,8 @@ $ZSTD -d -o tmp1 "$TESTDIR/golden-decompression/rle-first-block.zst"
$DIFF -s tmp1 tmp
rm tmp*
println "\m===> compress multiple files"
println "\n===> compress multiple files"
println hello > tmp1
println world > tmp2
$ZSTD tmp1 tmp2 -o "$INTOVOID" -f
@ -306,10 +309,47 @@ $ZSTD tmp1 tmp2 -o tmpexists && die "should have refused to overwrite"
if [ "$?" -eq 139 ]; then
die "should not have segfaulted"
fi
println "\n===> multiple files and shell completion "
./datagen -s1 > tmp1 2> $INTOVOID
./datagen -s2 -g100K > tmp2 2> $INTOVOID
./datagen -s3 -g1M > tmp3 2> $INTOVOID
println "compress tmp* : "
$ZSTD -f tmp*
test -f tmp1.zst
test -f tmp2.zst
test -f tmp3.zst
rm tmp1 tmp2 tmp3
println "decompress tmp* : "
$ZSTD -df ./*.zst
test -f tmp1
test -f tmp2
test -f tmp3
println "compress tmp* into stdout > tmpall : "
$ZSTD -c tmp1 tmp2 tmp3 > tmpall
test -f tmpall # should check size of tmpall (should be tmp1.zst + tmp2.zst + tmp3.zst)
println "decompress tmpall* into stdout > tmpdec : "
cp tmpall tmpall2
$ZSTD -dc tmpall* > tmpdec
test -f tmpdec # should check size of tmpdec (should be 2*(tmp1 + tmp2 + tmp3))
println "compress multiple files including a missing one (notHere) : "
$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
rm tmp*
if [ -n "$DEVNULLRIGHTS" ]
then
if [ "$isWindows" = false ] ; then
println "\n===> zstd fifo named pipe test "
echo "Hello World!" > tmp_original
mkfifo tmp_named_pipe
# note : fifo test doesn't work in combination with `dd` or `cat`
echo "Hello World!" > tmp_named_pipe &
$ZSTD tmp_named_pipe -o tmp_compressed
$ZSTD -d -o tmp_decompressed tmp_compressed
$DIFF -s tmp_original tmp_decompressed
rm -rf tmp*
fi
if [ -n "$DEVNULLRIGHTS" ] ; then
# these tests requires sudo rights, which is uncommon.
# they are only triggered if DEVNULLRIGHTS macro is defined.
println "\n===> checking /dev/null permissions are unaltered "
@ -322,6 +362,7 @@ then
ls -las $INTOVOID | grep "rw-rw-rw-"
fi
println "\n===> compress multiple files into an output directory, --output-dir-flat"
println henlo > tmp1
mkdir tmpInputTestDir
@ -345,6 +386,68 @@ test -f tmpOutDirDecomp/tmp2
test -f tmpOutDirDecomp/tmp1
rm -rf tmp*
println "test : compress multiple files reading them from a file, --filelist=FILE"
println "Hello world!, file1" > tmp1
println "Hello world!, file2" > tmp2
println tmp1 > tmp_fileList
println tmp2 >> tmp_fileList
$ZSTD -f --filelist=tmp_fileList
test -f tmp2.zst
test -f tmp1.zst
println "test : reading file list from a symlink, --filelist=FILE"
rm -f *.zst
ln -s tmp_fileList tmp_symLink
$ZSTD -f --filelist=tmp_symLink
test -f tmp2.zst
test -f tmp1.zst
println "test : compress multiple files reading them from multiple files, --filelist=FILE"
rm -f *.zst
println "Hello world!, file3" > tmp3
println "Hello world!, file4" > tmp4
println tmp3 > tmp_fileList2
println tmp4 >> tmp_fileList2
$ZSTD -f --filelist=tmp_fileList --filelist=tmp_fileList2
test -f tmp1.zst
test -f tmp2.zst
test -f tmp3.zst
test -f tmp4.zst
println "test : decompress multiple files reading them from a file, --filelist=FILE"
rm -f tmp1 tmp2
println tmp1.zst > tmpZst
println tmp2.zst >> tmpZst
$ZSTD -d -f --filelist=tmpZst
test -f tmp1
test -f tmp2
println "test : decompress multiple files reading them from multiple files, --filelist=FILE"
rm -f tmp1 tmp2 tmp3 tmp4
println tmp3.zst > tmpZst2
println tmp4.zst >> tmpZst2
$ZSTD -d -f --filelist=tmpZst --filelist=tmpZst2
test -f tmp1
test -f tmp2
test -f tmp3
test -f tmp4
println "test : survive a list of files which is text garbage (--filelist=FILE)"
./datagen > tmp_badList
$ZSTD -f --filelist=tmp_badList && die "should have failed : list is text garbage"
println "test : survive a list of files which is binary garbage (--filelist=FILE)"
./datagen -P0 -g1M > tmp_badList
$ZSTD -qq -f --filelist=tmp_badList && die "should have failed : list is binary garbage" # let's avoid printing binary garbage on console
println "test : try to overflow internal list of files (--filelist=FILE)"
touch tmp1 tmp2 tmp3 tmp4 tmp5 tmp6
ls tmp* > tmpList
$ZSTD -f tmp1 --filelist=tmpList --filelist=tmpList tmp2 tmp3 # can trigger an overflow of internal file list
rm -rf tmp*
println "\n===> Advanced compression parameters "
println "Hello world!" | $ZSTD --zstd=windowLog=21, - -o tmp.zst && die "wrong parameters not detected!"
println "Hello world!" | $ZSTD --zstd=windowLo=21 - -o tmp.zst && die "wrong parameters not detected!"
@ -464,28 +567,6 @@ $DIFF tmpSparse2M tmpSparseRegenerated
rm tmpSparse*
println "\n===> multiple files tests "
./datagen -s1 > tmp1 2> $INTOVOID
./datagen -s2 -g100K > tmp2 2> $INTOVOID
./datagen -s3 -g1M > tmp3 2> $INTOVOID
println "compress tmp* : "
$ZSTD -f tmp*
ls -ls tmp*
rm tmp1 tmp2 tmp3
println "decompress tmp* : "
$ZSTD -df ./*.zst
ls -ls tmp*
println "compress tmp* into stdout > tmpall : "
$ZSTD -c tmp1 tmp2 tmp3 > tmpall
ls -ls tmp* # check size of tmpall (should be tmp1.zst + tmp2.zst + tmp3.zst)
println "decompress tmpall* into stdout > tmpdec : "
cp tmpall tmpall2
$ZSTD -dc tmpall* > tmpdec
ls -ls tmp* # check size of tmpdec (should be 2*(tmp1 + tmp2 + tmp3))
println "compress multiple files including a missing one (notHere) : "
$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
println "\n===> stream-size mode"
./datagen -g11000 > tmp
@ -726,7 +807,6 @@ $ZSTD -t tmpSplit.* && die "bad file not detected !"
./datagen | $ZSTD -c | $ZSTD -t
println "\n===> golden files tests "
$ZSTD -t -r "$TESTDIR/golden-compression"
@ -748,6 +828,7 @@ println "benchmark decompression only"
$ZSTD -f tmp1
$ZSTD -b -d -i0 tmp1.zst
println "\n===> zstd compatibility tests "
./datagen > tmp
@ -755,6 +836,7 @@ rm -f tmp.zst
$ZSTD --format=zstd -f tmp
test -f tmp.zst
println "\n===> gzip compatibility tests "
GZIPMODE=1
@ -882,9 +964,8 @@ else
fi
println "\n===> lz4 frame tests "
if [ $LZ4MODE -eq 1 ]; then
println "\n===> lz4 frame tests "
./datagen > tmp
$ZSTD -f --format=lz4 tmp
$ZSTD -f tmp
@ -892,9 +973,10 @@ if [ $LZ4MODE -eq 1 ]; then
truncateLastByte tmp.lz4 | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !"
rm tmp*
else
println "lz4 mode not supported"
println "\nlz4 mode not supported"
fi
println "\n===> suffix list test"
! $ZSTD -d tmp.abc 2> tmplg
@ -912,6 +994,7 @@ if [ $LZ4MODE -ne 1 ]; then
grep ".lz4" tmplg > $INTOVOID && die "Unsupported suffix listed"
fi
println "\n===> tar extension tests "
rm -f tmp tmp.tar tmp.tzst tmp.tgz tmp.txz tmp.tlz4
@ -950,7 +1033,6 @@ touch tmp.t tmp.tz tmp.tzs
! $ZSTD -d tmp.tz
! $ZSTD -d tmp.tzs
exit
println "\n===> zstd round-trip tests "
@ -1205,18 +1287,4 @@ $ZSTD --train-cover "$TESTDIR"/*.c "$PRGDIR"/*.c
test -f dictionary
rm -f tmp* dictionary
if [ "$isWindows" = false ] ; then
println "\n===> zstd fifo named pipe test "
dd bs=1 count=10 if=/dev/zero of=tmp_original
mkfifo named_pipe
dd bs=1 count=10 if=/dev/zero of=named_pipe &
$ZSTD named_pipe -o tmp_compressed
$ZSTD -d -o tmp_decompressed tmp_compressed
$DIFF -s tmp_original tmp_decompressed
rm -rf tmp*
rm -rf named_pipe
fi
rm -f tmp*

View File

@ -173,21 +173,10 @@ void data_buffer_free(data_buffer_t buffer) {
* data filenames helpers.
*/
data_filenames_t data_filenames_get(data_t const* data) {
data_filenames_t filenames = {.buffer = NULL, .size = 0};
char const* path = data->data.path;
filenames.filenames = UTIL_createFileList(
&path,
1,
&filenames.buffer,
&filenames.size,
/* followLinks */ 0);
return filenames;
}
void data_filenames_free(data_filenames_t filenames) {
UTIL_freeFileList(filenames.filenames, filenames.buffer);
FileNamesTable* data_filenames_get(data_t const* data)
{
char const* const path = data->data.path;
return UTIL_createExpandedFNT(&path, 1, 0 /* followLinks */ );
}
/**
@ -196,26 +185,33 @@ void data_filenames_free(data_filenames_t filenames) {
data_buffers_t data_buffers_get(data_t const* data) {
data_buffers_t buffers = {.size = 0};
data_filenames_t filenames = data_filenames_get(data);
if (filenames.size == 0)
FileNamesTable* const filenames = data_filenames_get(data);
if (filenames == NULL) return buffers;
if (filenames->tableSize == 0) {
UTIL_freeFileNamesTable(filenames);
return buffers;
}
data_buffer_t* buffersPtr =
(data_buffer_t*)malloc(filenames.size * sizeof(data_buffer_t));
if (buffersPtr == NULL)
(data_buffer_t*)malloc(filenames->tableSize * sizeof(*buffersPtr));
if (buffersPtr == NULL) {
UTIL_freeFileNamesTable(filenames);
return buffers;
}
buffers.buffers = (data_buffer_t const*)buffersPtr;
buffers.size = filenames.size;
buffers.size = filenames->tableSize;
for (size_t i = 0; i < filenames.size; ++i) {
buffersPtr[i] = data_buffer_read(filenames.filenames[i]);
for (size_t i = 0; i < filenames->tableSize; ++i) {
buffersPtr[i] = data_buffer_read(filenames->fileNames[i]);
if (buffersPtr[i].data == NULL) {
data_buffers_t const kEmptyBuffer = {};
data_buffers_free(buffers);
UTIL_freeFileNamesTable(filenames);
return kEmptyBuffer;
}
}
UTIL_freeFileNamesTable(filenames);
return buffers;
}

View File

@ -102,25 +102,6 @@ int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2);
*/
void data_buffer_free(data_buffer_t buffer);
typedef struct {
char* buffer;
char const** filenames;
unsigned size;
} data_filenames_t;
/**
* Get a recursive list of filenames in the data object. If it is a file, it
* will only contain one entry. If it is a directory, it will recursively walk
* the directory.
*
* @returns The list of filenames, which has size 0 and NULL pointers on error.
*/
data_filenames_t data_filenames_get(data_t const* data);
/**
* Frees the filenames table.
*/
void data_filenames_free(data_filenames_t filenames);
typedef struct {
data_buffer_t const* buffers;

View File

@ -74,7 +74,7 @@ static U32 g_compressibilityDefault = 50;
#define DEFAULT_DISPLAY_LEVEL 2
#define DISPLAY(...) fprintf(displayOut, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static int g_displayLevel = DEFAULT_DISPLAY_LEVEL; /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */
static unsigned g_displayLevel = DEFAULT_DISPLAY_LEVEL; /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */
static FILE* displayOut;
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
@ -848,7 +848,7 @@ static unsigned readU32FromChar(const char** stringPtr)
{
unsigned result = 0;
while ((**stringPtr >='0') && (**stringPtr <='9'))
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
result *= 10, result += (unsigned)(**stringPtr - '0'), (*stringPtr)++ ;
return result;
}
@ -865,24 +865,18 @@ int main(int argCount, char** argv)
int cLevel = ZSTDCLI_CLEVEL_DEFAULT;
int cLevelLast = 1;
unsigned recursive = 0;
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*)); /* argCount >= 1 */
unsigned filenameIdx = 0;
FileNamesTable* filenames = UTIL_allocateFileNamesTable((size_t)argCount);
const char* programName = argv[0];
const char* dictFileName = NULL;
char* dynNameSpace = NULL;
#ifdef UTIL_HAS_CREATEFILELIST
const char** fileNamesTable = NULL;
char* fileNamesBuf = NULL;
unsigned fileNamesNb;
#endif
/* init */
if (filenameTable==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); }
if (filenames==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); }
displayOut = stderr;
/* Pick out program name from path. Don't rely on stdlib because of conflicting behavior */
{ size_t pos;
for (pos = (int)strlen(programName); pos > 0; pos--) { if (programName[pos] == '/') { pos++; break; } }
for (pos = strlen(programName); pos > 0; pos--) { if (programName[pos] == '/') { pos++; break; } }
programName += pos;
}
@ -930,14 +924,14 @@ int main(int argCount, char** argv)
case 'b':
/* first compression Level */
argument++;
cLevel = readU32FromChar(&argument);
cLevel = (int)readU32FromChar(&argument);
break;
/* range bench (benchmark only) */
case 'e':
/* last compression Level */
argument++;
cLevelLast = readU32FromChar(&argument);
cLevelLast = (int)readU32FromChar(&argument);
break;
/* Modify Nb Iterations (benchmark only) */
@ -964,7 +958,7 @@ int main(int argCount, char** argv)
/* Pause at the end (-p) or set an additional param (-p#) (hidden option) */
case 'p': argument++;
if ((*argument>='0') && (*argument<='9')) {
BMK_setAdditionalParam(readU32FromChar(&argument));
BMK_setAdditionalParam((int)readU32FromChar(&argument));
} else
main_pause=1;
break;
@ -984,7 +978,7 @@ int main(int argCount, char** argv)
}
/* add filename to list */
filenameTable[filenameIdx++] = argument;
UTIL_refFilename(filenames, argument);
}
/* Welcome message (if verbose) */
@ -992,28 +986,16 @@ int main(int argCount, char** argv)
#ifdef UTIL_HAS_CREATEFILELIST
if (recursive) {
fileNamesTable = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf, &fileNamesNb, 1);
if (fileNamesTable) {
unsigned u;
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, fileNamesTable[u]);
free((void*)filenameTable);
filenameTable = fileNamesTable;
filenameIdx = fileNamesNb;
}
UTIL_expandFNT(&filenames, 1);
}
#endif
BMK_setNotificationLevel(g_displayLevel);
BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast);
BMK_benchFiles(filenames->fileNames, (unsigned)filenames->tableSize, dictFileName, cLevel, cLevelLast);
_end:
if (main_pause) waitEnter();
free(dynNameSpace);
#ifdef UTIL_HAS_CREATEFILELIST
if (fileNamesTable)
UTIL_freeFileList(fileNamesTable, fileNamesBuf);
else
#endif
free((void*)filenameTable);
UTIL_freeFileNamesTable(filenames);
return operationResult;
}