commit
1d6070463f
|
@ -1,44 +0,0 @@
|
|||
ARG :=
|
||||
|
||||
CC ?= gcc
|
||||
CFLAGS ?= -O3
|
||||
INCLUDES := -I ../randomDictBuilder -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
|
||||
|
||||
RANDOM_FILE := ../randomDictBuilder/random.c
|
||||
IO_FILE := ../randomDictBuilder/io.c
|
||||
|
||||
all: run clean
|
||||
|
||||
.PHONY: run
|
||||
run: benchmark
|
||||
echo "Benchmarking with $(ARG)"
|
||||
./benchmark $(ARG)
|
||||
|
||||
.PHONY: test
|
||||
test: benchmarkTest clean
|
||||
|
||||
.PHONY: benchmarkTest
|
||||
benchmarkTest: benchmark test.sh
|
||||
sh test.sh
|
||||
|
||||
benchmark: benchmark.o io.o random.o libzstd.a
|
||||
$(CC) $(CFLAGS) benchmark.o io.o random.o libzstd.a -o benchmark
|
||||
|
||||
benchmark.o: benchmark.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c benchmark.c
|
||||
|
||||
random.o: $(RANDOM_FILE)
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c $(RANDOM_FILE)
|
||||
|
||||
io.o: $(IO_FILE)
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)
|
||||
|
||||
libzstd.a:
|
||||
$(MAKE) -C ../../../lib libzstd.a
|
||||
mv ../../../lib/libzstd.a .
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -f *.o benchmark libzstd.a
|
||||
$(MAKE) -C ../../../lib clean
|
||||
echo "Cleaning is completed"
|
|
@ -1,849 +0,0 @@
|
|||
Benchmarking Dictionary Builder
|
||||
|
||||
### Permitted Argument:
|
||||
Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
|
||||
|
||||
###Running Test:
|
||||
make test
|
||||
|
||||
###Usage:
|
||||
Benchmark given input files: make ARG= followed by permitted arguments
|
||||
|
||||
### Examples:
|
||||
make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
|
||||
|
||||
###Benchmarking Result:
|
||||
- First Cover is optimize cover, second Cover uses optimized d and k from first one.
|
||||
- For every f value of fastCover, the first one is optimize fastCover and the second one uses optimized d and k from first one. This is run for accel values from 1 to 10.
|
||||
- Fourth column is chosen d and fifth column is chosen k
|
||||
|
||||
github:
|
||||
NODICT 0.000004 2.999642
|
||||
RANDOM 0.024560 8.791189
|
||||
LEGACY 0.727109 8.173529
|
||||
COVER 40.565676 10.652243 8 1298
|
||||
COVER 3.608284 10.652243 8 1298
|
||||
FAST f=15 a=1 4.181024 10.570882 8 1154
|
||||
FAST f=15 a=1 0.040788 10.570882 8 1154
|
||||
FAST f=15 a=2 3.548352 10.574287 6 1970
|
||||
FAST f=15 a=2 0.035535 10.574287 6 1970
|
||||
FAST f=15 a=3 3.287364 10.613950 6 1010
|
||||
FAST f=15 a=3 0.032182 10.613950 6 1010
|
||||
FAST f=15 a=4 3.184976 10.573883 6 1058
|
||||
FAST f=15 a=4 0.029878 10.573883 6 1058
|
||||
FAST f=15 a=5 3.045513 10.580640 8 1154
|
||||
FAST f=15 a=5 0.022162 10.580640 8 1154
|
||||
FAST f=15 a=6 3.003296 10.583677 6 1010
|
||||
FAST f=15 a=6 0.028091 10.583677 6 1010
|
||||
FAST f=15 a=7 2.952655 10.622551 6 1106
|
||||
FAST f=15 a=7 0.02724 10.622551 6 1106
|
||||
FAST f=15 a=8 2.945674 10.614657 6 1010
|
||||
FAST f=15 a=8 0.027264 10.614657 6 1010
|
||||
FAST f=15 a=9 3.153439 10.564018 8 1154
|
||||
FAST f=15 a=9 0.020635 10.564018 8 1154
|
||||
FAST f=15 a=10 2.950416 10.511454 6 1010
|
||||
FAST f=15 a=10 0.026606 10.511454 6 1010
|
||||
FAST f=16 a=1 3.970029 10.681035 8 1154
|
||||
FAST f=16 a=1 0.038188 10.681035 8 1154
|
||||
FAST f=16 a=2 3.422892 10.484978 6 1874
|
||||
FAST f=16 a=2 0.034702 10.484978 6 1874
|
||||
FAST f=16 a=3 3.215836 10.632631 8 1154
|
||||
FAST f=16 a=3 0.026084 10.632631 8 1154
|
||||
FAST f=16 a=4 3.081353 10.626533 6 1106
|
||||
FAST f=16 a=4 0.030032 10.626533 6 1106
|
||||
FAST f=16 a=5 3.041241 10.545027 8 1922
|
||||
FAST f=16 a=5 0.022882 10.545027 8 1922
|
||||
FAST f=16 a=6 2.989390 10.638284 6 1874
|
||||
FAST f=16 a=6 0.028308 10.638284 6 1874
|
||||
FAST f=16 a=7 3.001581 10.797136 6 1106
|
||||
FAST f=16 a=7 0.027479 10.797136 6 1106
|
||||
FAST f=16 a=8 2.984107 10.658356 8 1058
|
||||
FAST f=16 a=8 0.021099 10.658356 8 1058
|
||||
FAST f=16 a=9 2.925788 10.523869 6 1010
|
||||
FAST f=16 a=9 0.026905 10.523869 6 1010
|
||||
FAST f=16 a=10 2.889605 10.745841 6 1874
|
||||
FAST f=16 a=10 0.026846 10.745841 6 1874
|
||||
FAST f=17 a=1 4.031953 10.672080 8 1202
|
||||
FAST f=17 a=1 0.040658 10.672080 8 1202
|
||||
FAST f=17 a=2 3.458107 10.589352 8 1106
|
||||
FAST f=17 a=2 0.02926 10.589352 8 1106
|
||||
FAST f=17 a=3 3.291189 10.662714 8 1154
|
||||
FAST f=17 a=3 0.026531 10.662714 8 1154
|
||||
FAST f=17 a=4 3.154950 10.549456 8 1346
|
||||
FAST f=17 a=4 0.024991 10.549456 8 1346
|
||||
FAST f=17 a=5 3.092271 10.541670 6 1202
|
||||
FAST f=17 a=5 0.038285 10.541670 6 1202
|
||||
FAST f=17 a=6 3.166146 10.729112 6 1874
|
||||
FAST f=17 a=6 0.038217 10.729112 6 1874
|
||||
FAST f=17 a=7 3.035467 10.810485 6 1106
|
||||
FAST f=17 a=7 0.036655 10.810485 6 1106
|
||||
FAST f=17 a=8 3.035668 10.530532 6 1058
|
||||
FAST f=17 a=8 0.037715 10.530532 6 1058
|
||||
FAST f=17 a=9 2.987917 10.589802 8 1922
|
||||
FAST f=17 a=9 0.02217 10.589802 8 1922
|
||||
FAST f=17 a=10 2.981647 10.722579 8 1106
|
||||
FAST f=17 a=10 0.021948 10.722579 8 1106
|
||||
FAST f=18 a=1 4.067144 10.634943 8 1154
|
||||
FAST f=18 a=1 0.041386 10.634943 8 1154
|
||||
FAST f=18 a=2 3.507377 10.546230 6 1970
|
||||
FAST f=18 a=2 0.037572 10.546230 6 1970
|
||||
FAST f=18 a=3 3.323015 10.648061 8 1154
|
||||
FAST f=18 a=3 0.028306 10.648061 8 1154
|
||||
FAST f=18 a=4 3.216735 10.705402 6 1010
|
||||
FAST f=18 a=4 0.030755 10.705402 6 1010
|
||||
FAST f=18 a=5 3.175794 10.588154 8 1874
|
||||
FAST f=18 a=5 0.025315 10.588154 8 1874
|
||||
FAST f=18 a=6 3.127459 10.751104 8 1106
|
||||
FAST f=18 a=6 0.023897 10.751104 8 1106
|
||||
FAST f=18 a=7 3.083017 10.780402 6 1106
|
||||
FAST f=18 a=7 0.029158 10.780402 6 1106
|
||||
FAST f=18 a=8 3.069700 10.547226 8 1346
|
||||
FAST f=18 a=8 0.024046 10.547226 8 1346
|
||||
FAST f=18 a=9 3.056591 10.674759 6 1010
|
||||
FAST f=18 a=9 0.028496 10.674759 6 1010
|
||||
FAST f=18 a=10 3.063588 10.737578 8 1106
|
||||
FAST f=18 a=10 0.023033 10.737578 8 1106
|
||||
FAST f=19 a=1 4.164041 10.650333 8 1154
|
||||
FAST f=19 a=1 0.042906 10.650333 8 1154
|
||||
FAST f=19 a=2 3.585409 10.577066 6 1058
|
||||
FAST f=19 a=2 0.038994 10.577066 6 1058
|
||||
FAST f=19 a=3 3.439643 10.639403 8 1154
|
||||
FAST f=19 a=3 0.028427 10.639403 8 1154
|
||||
FAST f=19 a=4 3.268869 10.554410 8 1298
|
||||
FAST f=19 a=4 0.026866 10.554410 8 1298
|
||||
FAST f=19 a=5 3.238225 10.615109 6 1010
|
||||
FAST f=19 a=5 0.03078 10.615109 6 1010
|
||||
FAST f=19 a=6 3.199558 10.609782 6 1874
|
||||
FAST f=19 a=6 0.030099 10.609782 6 1874
|
||||
FAST f=19 a=7 3.132395 10.794753 6 1106
|
||||
FAST f=19 a=7 0.028964 10.794753 6 1106
|
||||
FAST f=19 a=8 3.148446 10.554842 8 1298
|
||||
FAST f=19 a=8 0.024277 10.554842 8 1298
|
||||
FAST f=19 a=9 3.108324 10.668763 6 1010
|
||||
FAST f=19 a=9 0.02896 10.668763 6 1010
|
||||
FAST f=19 a=10 3.159863 10.757347 8 1106
|
||||
FAST f=19 a=10 0.023351 10.757347 8 1106
|
||||
FAST f=20 a=1 4.462698 10.661788 8 1154
|
||||
FAST f=20 a=1 0.047174 10.661788 8 1154
|
||||
FAST f=20 a=2 3.820269 10.678612 6 1106
|
||||
FAST f=20 a=2 0.040807 10.678612 6 1106
|
||||
FAST f=20 a=3 3.644955 10.648424 8 1154
|
||||
FAST f=20 a=3 0.031398 10.648424 8 1154
|
||||
FAST f=20 a=4 3.546257 10.559756 8 1298
|
||||
FAST f=20 a=4 0.029856 10.559756 8 1298
|
||||
FAST f=20 a=5 3.485248 10.646637 6 1010
|
||||
FAST f=20 a=5 0.033756 10.646637 6 1010
|
||||
FAST f=20 a=6 3.490438 10.775824 8 1106
|
||||
FAST f=20 a=6 0.028338 10.775824 8 1106
|
||||
FAST f=20 a=7 3.631289 10.801795 6 1106
|
||||
FAST f=20 a=7 0.035228 10.801795 6 1106
|
||||
FAST f=20 a=8 3.758936 10.545116 8 1346
|
||||
FAST f=20 a=8 0.027495 10.545116 8 1346
|
||||
FAST f=20 a=9 3.707024 10.677454 6 1010
|
||||
FAST f=20 a=9 0.031326 10.677454 6 1010
|
||||
FAST f=20 a=10 3.586593 10.756017 8 1106
|
||||
FAST f=20 a=10 0.027122 10.756017 8 1106
|
||||
FAST f=21 a=1 5.701396 10.655398 8 1154
|
||||
FAST f=21 a=1 0.067744 10.655398 8 1154
|
||||
FAST f=21 a=2 5.270542 10.650743 6 1106
|
||||
FAST f=21 a=2 0.052999 10.650743 6 1106
|
||||
FAST f=21 a=3 4.945294 10.652380 8 1154
|
||||
FAST f=21 a=3 0.052678 10.652380 8 1154
|
||||
FAST f=21 a=4 4.894079 10.543185 8 1298
|
||||
FAST f=21 a=4 0.04997 10.543185 8 1298
|
||||
FAST f=21 a=5 4.785417 10.630321 6 1010
|
||||
FAST f=21 a=5 0.045294 10.630321 6 1010
|
||||
FAST f=21 a=6 4.789381 10.664477 6 1874
|
||||
FAST f=21 a=6 0.046578 10.664477 6 1874
|
||||
FAST f=21 a=7 4.302955 10.805179 6 1106
|
||||
FAST f=21 a=7 0.041205 10.805179 6 1106
|
||||
FAST f=21 a=8 4.034630 10.551211 8 1298
|
||||
FAST f=21 a=8 0.040121 10.551211 8 1298
|
||||
FAST f=21 a=9 4.523868 10.799114 6 1010
|
||||
FAST f=21 a=9 0.043592 10.799114 6 1010
|
||||
FAST f=21 a=10 4.760736 10.750255 8 1106
|
||||
FAST f=21 a=10 0.043483 10.750255 8 1106
|
||||
FAST f=22 a=1 6.743064 10.640537 8 1154
|
||||
FAST f=22 a=1 0.086967 10.640537 8 1154
|
||||
FAST f=22 a=2 6.121739 10.626638 6 1970
|
||||
FAST f=22 a=2 0.066337 10.626638 6 1970
|
||||
FAST f=22 a=3 5.248851 10.640688 8 1154
|
||||
FAST f=22 a=3 0.054935 10.640688 8 1154
|
||||
FAST f=22 a=4 5.436579 10.588333 8 1298
|
||||
FAST f=22 a=4 0.064113 10.588333 8 1298
|
||||
FAST f=22 a=5 5.812815 10.652653 6 1010
|
||||
FAST f=22 a=5 0.058189 10.652653 6 1010
|
||||
FAST f=22 a=6 5.745472 10.666437 6 1874
|
||||
FAST f=22 a=6 0.057188 10.666437 6 1874
|
||||
FAST f=22 a=7 5.716393 10.806911 6 1106
|
||||
FAST f=22 a=7 0.056 10.806911 6 1106
|
||||
FAST f=22 a=8 5.698799 10.530784 8 1298
|
||||
FAST f=22 a=8 0.0583 10.530784 8 1298
|
||||
FAST f=22 a=9 5.710533 10.777391 6 1010
|
||||
FAST f=22 a=9 0.054945 10.777391 6 1010
|
||||
FAST f=22 a=10 5.685395 10.745023 8 1106
|
||||
FAST f=22 a=10 0.056526 10.745023 8 1106
|
||||
FAST f=23 a=1 7.836923 10.638828 8 1154
|
||||
FAST f=23 a=1 0.099522 10.638828 8 1154
|
||||
FAST f=23 a=2 6.627834 10.631061 6 1970
|
||||
FAST f=23 a=2 0.066769 10.631061 6 1970
|
||||
FAST f=23 a=3 5.602533 10.647288 8 1154
|
||||
FAST f=23 a=3 0.064513 10.647288 8 1154
|
||||
FAST f=23 a=4 6.005580 10.568747 8 1298
|
||||
FAST f=23 a=4 0.062022 10.568747 8 1298
|
||||
FAST f=23 a=5 5.481816 10.676921 6 1010
|
||||
FAST f=23 a=5 0.058959 10.676921 6 1010
|
||||
FAST f=23 a=6 5.460444 10.666194 6 1874
|
||||
FAST f=23 a=6 0.057687 10.666194 6 1874
|
||||
FAST f=23 a=7 5.659822 10.800377 6 1106
|
||||
FAST f=23 a=7 0.06783 10.800377 6 1106
|
||||
FAST f=23 a=8 6.826940 10.522167 8 1298
|
||||
FAST f=23 a=8 0.070533 10.522167 8 1298
|
||||
FAST f=23 a=9 6.804757 10.577799 8 1682
|
||||
FAST f=23 a=9 0.069949 10.577799 8 1682
|
||||
FAST f=23 a=10 6.774933 10.742093 8 1106
|
||||
FAST f=23 a=10 0.068395 10.742093 8 1106
|
||||
FAST f=24 a=1 8.444110 10.632783 8 1154
|
||||
FAST f=24 a=1 0.094357 10.632783 8 1154
|
||||
FAST f=24 a=2 7.289578 10.631061 6 1970
|
||||
FAST f=24 a=2 0.098515 10.631061 6 1970
|
||||
FAST f=24 a=3 8.619780 10.646289 8 1154
|
||||
FAST f=24 a=3 0.098041 10.646289 8 1154
|
||||
FAST f=24 a=4 8.508455 10.555199 8 1298
|
||||
FAST f=24 a=4 0.093885 10.555199 8 1298
|
||||
FAST f=24 a=5 8.471145 10.674363 6 1010
|
||||
FAST f=24 a=5 0.088676 10.674363 6 1010
|
||||
FAST f=24 a=6 8.426727 10.667228 6 1874
|
||||
FAST f=24 a=6 0.087247 10.667228 6 1874
|
||||
FAST f=24 a=7 8.356826 10.803027 6 1106
|
||||
FAST f=24 a=7 0.085835 10.803027 6 1106
|
||||
FAST f=24 a=8 6.756811 10.522049 8 1298
|
||||
FAST f=24 a=8 0.07107 10.522049 8 1298
|
||||
FAST f=24 a=9 6.548169 10.571882 8 1682
|
||||
FAST f=24 a=9 0.0713 10.571882 8 1682
|
||||
FAST f=24 a=10 8.238079 10.736453 8 1106
|
||||
FAST f=24 a=10 0.07004 10.736453 8 1106
|
||||
|
||||
|
||||
hg-commands:
|
||||
NODICT 0.000005 2.425276
|
||||
RANDOM 0.046332 3.490331
|
||||
LEGACY 0.720351 3.911682
|
||||
COVER 45.507731 4.132653 8 386
|
||||
COVER 1.868810 4.132653 8 386
|
||||
FAST f=15 a=1 4.561427 3.866894 8 1202
|
||||
FAST f=15 a=1 0.048946 3.866894 8 1202
|
||||
FAST f=15 a=2 3.574462 3.892119 8 1538
|
||||
FAST f=15 a=2 0.033677 3.892119 8 1538
|
||||
FAST f=15 a=3 3.230227 3.888791 6 1346
|
||||
FAST f=15 a=3 0.034312 3.888791 6 1346
|
||||
FAST f=15 a=4 3.042388 3.899739 8 1010
|
||||
FAST f=15 a=4 0.024307 3.899739 8 1010
|
||||
FAST f=15 a=5 2.800148 3.896220 8 818
|
||||
FAST f=15 a=5 0.022331 3.896220 8 818
|
||||
FAST f=15 a=6 2.706518 3.882039 8 578
|
||||
FAST f=15 a=6 0.020955 3.882039 8 578
|
||||
FAST f=15 a=7 2.701820 3.885430 6 866
|
||||
FAST f=15 a=7 0.026074 3.885430 6 866
|
||||
FAST f=15 a=8 2.604445 3.906932 8 1826
|
||||
FAST f=15 a=8 0.021789 3.906932 8 1826
|
||||
FAST f=15 a=9 2.598568 3.870324 6 1682
|
||||
FAST f=15 a=9 0.026004 3.870324 6 1682
|
||||
FAST f=15 a=10 2.575920 3.920783 8 1442
|
||||
FAST f=15 a=10 0.020228 3.920783 8 1442
|
||||
FAST f=16 a=1 4.630623 4.001430 8 770
|
||||
FAST f=16 a=1 0.047497 4.001430 8 770
|
||||
FAST f=16 a=2 3.674721 3.974431 8 1874
|
||||
FAST f=16 a=2 0.035761 3.974431 8 1874
|
||||
FAST f=16 a=3 3.338384 3.978703 8 1010
|
||||
FAST f=16 a=3 0.029436 3.978703 8 1010
|
||||
FAST f=16 a=4 3.004412 3.983035 8 1010
|
||||
FAST f=16 a=4 0.025744 3.983035 8 1010
|
||||
FAST f=16 a=5 2.881892 3.987710 8 770
|
||||
FAST f=16 a=5 0.023211 3.987710 8 770
|
||||
FAST f=16 a=6 2.807410 3.952717 8 1298
|
||||
FAST f=16 a=6 0.023199 3.952717 8 1298
|
||||
FAST f=16 a=7 2.819623 3.994627 8 770
|
||||
FAST f=16 a=7 0.021806 3.994627 8 770
|
||||
FAST f=16 a=8 2.740092 3.954032 8 1826
|
||||
FAST f=16 a=8 0.0226 3.954032 8 1826
|
||||
FAST f=16 a=9 2.682564 3.969879 6 1442
|
||||
FAST f=16 a=9 0.026324 3.969879 6 1442
|
||||
FAST f=16 a=10 2.657959 3.969755 8 674
|
||||
FAST f=16 a=10 0.020413 3.969755 8 674
|
||||
FAST f=17 a=1 4.729228 4.046000 8 530
|
||||
FAST f=17 a=1 0.049703 4.046000 8 530
|
||||
FAST f=17 a=2 3.764510 3.991519 8 1970
|
||||
FAST f=17 a=2 0.038195 3.991519 8 1970
|
||||
FAST f=17 a=3 3.416992 4.006296 6 914
|
||||
FAST f=17 a=3 0.036244 4.006296 6 914
|
||||
FAST f=17 a=4 3.145626 3.979182 8 1970
|
||||
FAST f=17 a=4 0.028676 3.979182 8 1970
|
||||
FAST f=17 a=5 2.995070 4.050070 8 770
|
||||
FAST f=17 a=5 0.025707 4.050070 8 770
|
||||
FAST f=17 a=6 2.911833 4.040024 8 770
|
||||
FAST f=17 a=6 0.02453 4.040024 8 770
|
||||
FAST f=17 a=7 2.894796 4.015884 8 818
|
||||
FAST f=17 a=7 0.023956 4.015884 8 818
|
||||
FAST f=17 a=8 2.789962 4.039303 8 530
|
||||
FAST f=17 a=8 0.023219 4.039303 8 530
|
||||
FAST f=17 a=9 2.787625 3.996762 8 1634
|
||||
FAST f=17 a=9 0.023651 3.996762 8 1634
|
||||
FAST f=17 a=10 2.754796 4.005059 8 1058
|
||||
FAST f=17 a=10 0.022537 4.005059 8 1058
|
||||
FAST f=18 a=1 4.779117 4.038214 8 242
|
||||
FAST f=18 a=1 0.048814 4.038214 8 242
|
||||
FAST f=18 a=2 3.829753 4.045768 8 722
|
||||
FAST f=18 a=2 0.036541 4.045768 8 722
|
||||
FAST f=18 a=3 3.495053 4.021497 8 770
|
||||
FAST f=18 a=3 0.032648 4.021497 8 770
|
||||
FAST f=18 a=4 3.221395 4.039623 8 770
|
||||
FAST f=18 a=4 0.027818 4.039623 8 770
|
||||
FAST f=18 a=5 3.059369 4.050414 8 530
|
||||
FAST f=18 a=5 0.026296 4.050414 8 530
|
||||
FAST f=18 a=6 3.019292 4.010714 6 962
|
||||
FAST f=18 a=6 0.031104 4.010714 6 962
|
||||
FAST f=18 a=7 2.949322 4.031439 6 770
|
||||
FAST f=18 a=7 0.030745 4.031439 6 770
|
||||
FAST f=18 a=8 2.876425 4.032088 6 386
|
||||
FAST f=18 a=8 0.027407 4.032088 6 386
|
||||
FAST f=18 a=9 2.850958 4.053372 8 674
|
||||
FAST f=18 a=9 0.023799 4.053372 8 674
|
||||
FAST f=18 a=10 2.884352 4.020148 8 1730
|
||||
FAST f=18 a=10 0.024401 4.020148 8 1730
|
||||
FAST f=19 a=1 4.815669 4.061203 8 674
|
||||
FAST f=19 a=1 0.051425 4.061203 8 674
|
||||
FAST f=19 a=2 3.951356 4.013822 8 1442
|
||||
FAST f=19 a=2 0.039968 4.013822 8 1442
|
||||
FAST f=19 a=3 3.554682 4.050425 8 722
|
||||
FAST f=19 a=3 0.032725 4.050425 8 722
|
||||
FAST f=19 a=4 3.242585 4.054677 8 722
|
||||
FAST f=19 a=4 0.028194 4.054677 8 722
|
||||
FAST f=19 a=5 3.105909 4.064524 8 818
|
||||
FAST f=19 a=5 0.02675 4.064524 8 818
|
||||
FAST f=19 a=6 3.059901 4.036857 8 1250
|
||||
FAST f=19 a=6 0.026396 4.036857 8 1250
|
||||
FAST f=19 a=7 3.016151 4.068234 6 770
|
||||
FAST f=19 a=7 0.031501 4.068234 6 770
|
||||
FAST f=19 a=8 2.962902 4.077509 8 530
|
||||
FAST f=19 a=8 0.023333 4.077509 8 530
|
||||
FAST f=19 a=9 2.899607 4.067328 8 530
|
||||
FAST f=19 a=9 0.024553 4.067328 8 530
|
||||
FAST f=19 a=10 2.950978 4.059901 8 434
|
||||
FAST f=19 a=10 0.023852 4.059901 8 434
|
||||
FAST f=20 a=1 5.259834 4.027579 8 1634
|
||||
FAST f=20 a=1 0.061123 4.027579 8 1634
|
||||
FAST f=20 a=2 4.382150 4.025093 8 1634
|
||||
FAST f=20 a=2 0.048009 4.025093 8 1634
|
||||
FAST f=20 a=3 4.104323 4.060842 8 530
|
||||
FAST f=20 a=3 0.040965 4.060842 8 530
|
||||
FAST f=20 a=4 3.853340 4.023504 6 914
|
||||
FAST f=20 a=4 0.041072 4.023504 6 914
|
||||
FAST f=20 a=5 3.728841 4.018089 6 1634
|
||||
FAST f=20 a=5 0.037469 4.018089 6 1634
|
||||
FAST f=20 a=6 3.683045 4.069138 8 578
|
||||
FAST f=20 a=6 0.028011 4.069138 8 578
|
||||
FAST f=20 a=7 3.726973 4.063160 8 722
|
||||
FAST f=20 a=7 0.028437 4.063160 8 722
|
||||
FAST f=20 a=8 3.555073 4.057690 8 386
|
||||
FAST f=20 a=8 0.027588 4.057690 8 386
|
||||
FAST f=20 a=9 3.551095 4.067253 8 482
|
||||
FAST f=20 a=9 0.025976 4.067253 8 482
|
||||
FAST f=20 a=10 3.490127 4.068518 8 530
|
||||
FAST f=20 a=10 0.025971 4.068518 8 530
|
||||
FAST f=21 a=1 7.343816 4.064945 8 770
|
||||
FAST f=21 a=1 0.085035 4.064945 8 770
|
||||
FAST f=21 a=2 5.930894 4.048206 8 386
|
||||
FAST f=21 a=2 0.067349 4.048206 8 386
|
||||
FAST f=21 a=3 6.770775 4.063417 8 578
|
||||
FAST f=21 a=3 0.077104 4.063417 8 578
|
||||
FAST f=21 a=4 6.889409 4.066761 8 626
|
||||
FAST f=21 a=4 0.0717 4.066761 8 626
|
||||
FAST f=21 a=5 6.714896 4.051813 8 914
|
||||
FAST f=21 a=5 0.071026 4.051813 8 914
|
||||
FAST f=21 a=6 6.539890 4.047263 8 1922
|
||||
FAST f=21 a=6 0.07127 4.047263 8 1922
|
||||
FAST f=21 a=7 6.511052 4.068373 8 482
|
||||
FAST f=21 a=7 0.065467 4.068373 8 482
|
||||
FAST f=21 a=8 6.458788 4.071597 8 482
|
||||
FAST f=21 a=8 0.063817 4.071597 8 482
|
||||
FAST f=21 a=9 6.377591 4.052905 8 434
|
||||
FAST f=21 a=9 0.063112 4.052905 8 434
|
||||
FAST f=21 a=10 6.360752 4.047773 8 530
|
||||
FAST f=21 a=10 0.063606 4.047773 8 530
|
||||
FAST f=22 a=1 10.523471 4.040812 8 962
|
||||
FAST f=22 a=1 0.14214 4.040812 8 962
|
||||
FAST f=22 a=2 9.454758 4.059396 8 914
|
||||
FAST f=22 a=2 0.118343 4.059396 8 914
|
||||
FAST f=22 a=3 9.043197 4.043019 8 1922
|
||||
FAST f=22 a=3 0.109798 4.043019 8 1922
|
||||
FAST f=22 a=4 8.716261 4.044819 8 770
|
||||
FAST f=22 a=4 0.099687 4.044819 8 770
|
||||
FAST f=22 a=5 8.529472 4.070576 8 530
|
||||
FAST f=22 a=5 0.093127 4.070576 8 530
|
||||
FAST f=22 a=6 8.424241 4.070565 8 722
|
||||
FAST f=22 a=6 0.093703 4.070565 8 722
|
||||
FAST f=22 a=7 8.403391 4.070591 8 578
|
||||
FAST f=22 a=7 0.089763 4.070591 8 578
|
||||
FAST f=22 a=8 8.285221 4.089171 8 530
|
||||
FAST f=22 a=8 0.087716 4.089171 8 530
|
||||
FAST f=22 a=9 8.282506 4.047470 8 722
|
||||
FAST f=22 a=9 0.089773 4.047470 8 722
|
||||
FAST f=22 a=10 8.241809 4.064151 8 818
|
||||
FAST f=22 a=10 0.090413 4.064151 8 818
|
||||
FAST f=23 a=1 12.389208 4.051635 6 530
|
||||
FAST f=23 a=1 0.147796 4.051635 6 530
|
||||
FAST f=23 a=2 11.300910 4.042835 6 914
|
||||
FAST f=23 a=2 0.133178 4.042835 6 914
|
||||
FAST f=23 a=3 10.879455 4.047415 8 626
|
||||
FAST f=23 a=3 0.129571 4.047415 8 626
|
||||
FAST f=23 a=4 10.522718 4.038269 6 914
|
||||
FAST f=23 a=4 0.118121 4.038269 6 914
|
||||
FAST f=23 a=5 10.348043 4.066884 8 434
|
||||
FAST f=23 a=5 0.112098 4.066884 8 434
|
||||
FAST f=23 a=6 10.238630 4.048635 8 1010
|
||||
FAST f=23 a=6 0.120281 4.048635 8 1010
|
||||
FAST f=23 a=7 10.213255 4.061809 8 530
|
||||
FAST f=23 a=7 0.1121 4.061809 8 530
|
||||
FAST f=23 a=8 10.107879 4.074104 8 818
|
||||
FAST f=23 a=8 0.116544 4.074104 8 818
|
||||
FAST f=23 a=9 10.063424 4.064811 8 674
|
||||
FAST f=23 a=9 0.109045 4.064811 8 674
|
||||
FAST f=23 a=10 10.035801 4.054918 8 530
|
||||
FAST f=23 a=10 0.108735 4.054918 8 530
|
||||
FAST f=24 a=1 14.963878 4.073490 8 722
|
||||
FAST f=24 a=1 0.206344 4.073490 8 722
|
||||
FAST f=24 a=2 13.833472 4.036100 8 962
|
||||
FAST f=24 a=2 0.17486 4.036100 8 962
|
||||
FAST f=24 a=3 13.404631 4.026281 6 1106
|
||||
FAST f=24 a=3 0.153961 4.026281 6 1106
|
||||
FAST f=24 a=4 13.041164 4.065448 8 674
|
||||
FAST f=24 a=4 0.155509 4.065448 8 674
|
||||
FAST f=24 a=5 12.879412 4.054636 8 674
|
||||
FAST f=24 a=5 0.148282 4.054636 8 674
|
||||
FAST f=24 a=6 12.773736 4.081376 8 530
|
||||
FAST f=24 a=6 0.142563 4.081376 8 530
|
||||
FAST f=24 a=7 12.711310 4.059834 8 770
|
||||
FAST f=24 a=7 0.149321 4.059834 8 770
|
||||
FAST f=24 a=8 12.635459 4.052050 8 1298
|
||||
FAST f=24 a=8 0.15095 4.052050 8 1298
|
||||
FAST f=24 a=9 12.558104 4.076516 8 722
|
||||
FAST f=24 a=9 0.144361 4.076516 8 722
|
||||
FAST f=24 a=10 10.661348 4.062137 8 818
|
||||
FAST f=24 a=10 0.108232 4.062137 8 818
|
||||
|
||||
|
||||
hg-changelog:
|
||||
NODICT 0.000017 1.377590
|
||||
RANDOM 0.186171 2.097487
|
||||
LEGACY 1.670867 2.058907
|
||||
COVER 173.561948 2.189685 8 98
|
||||
COVER 4.811180 2.189685 8 98
|
||||
FAST f=15 a=1 18.685906 2.129682 8 434
|
||||
FAST f=15 a=1 0.173376 2.129682 8 434
|
||||
FAST f=15 a=2 12.928259 2.131890 8 482
|
||||
FAST f=15 a=2 0.102582 2.131890 8 482
|
||||
FAST f=15 a=3 11.132343 2.128027 8 386
|
||||
FAST f=15 a=3 0.077122 2.128027 8 386
|
||||
FAST f=15 a=4 10.120683 2.125797 8 434
|
||||
FAST f=15 a=4 0.065175 2.125797 8 434
|
||||
FAST f=15 a=5 9.479092 2.127697 8 386
|
||||
FAST f=15 a=5 0.057905 2.127697 8 386
|
||||
FAST f=15 a=6 9.159523 2.127132 8 1682
|
||||
FAST f=15 a=6 0.058604 2.127132 8 1682
|
||||
FAST f=15 a=7 8.724003 2.129914 8 434
|
||||
FAST f=15 a=7 0.0493 2.129914 8 434
|
||||
FAST f=15 a=8 8.595001 2.127137 8 338
|
||||
FAST f=15 a=8 0.0474 2.127137 8 338
|
||||
FAST f=15 a=9 8.356405 2.125512 8 482
|
||||
FAST f=15 a=9 0.046126 2.125512 8 482
|
||||
FAST f=15 a=10 8.207111 2.126066 8 338
|
||||
FAST f=15 a=10 0.043292 2.126066 8 338
|
||||
FAST f=16 a=1 18.464436 2.144040 8 242
|
||||
FAST f=16 a=1 0.172156 2.144040 8 242
|
||||
FAST f=16 a=2 12.844825 2.148171 8 194
|
||||
FAST f=16 a=2 0.099619 2.148171 8 194
|
||||
FAST f=16 a=3 11.082568 2.140837 8 290
|
||||
FAST f=16 a=3 0.079165 2.140837 8 290
|
||||
FAST f=16 a=4 10.066749 2.144405 8 386
|
||||
FAST f=16 a=4 0.068411 2.144405 8 386
|
||||
FAST f=16 a=5 9.501121 2.140720 8 386
|
||||
FAST f=16 a=5 0.061316 2.140720 8 386
|
||||
FAST f=16 a=6 9.179332 2.139478 8 386
|
||||
FAST f=16 a=6 0.056322 2.139478 8 386
|
||||
FAST f=16 a=7 8.849438 2.142412 8 194
|
||||
FAST f=16 a=7 0.050493 2.142412 8 194
|
||||
FAST f=16 a=8 8.810919 2.143454 8 434
|
||||
FAST f=16 a=8 0.051304 2.143454 8 434
|
||||
FAST f=16 a=9 8.553900 2.140339 8 194
|
||||
FAST f=16 a=9 0.047285 2.140339 8 194
|
||||
FAST f=16 a=10 8.398027 2.143130 8 386
|
||||
FAST f=16 a=10 0.046386 2.143130 8 386
|
||||
FAST f=17 a=1 18.644657 2.157192 8 98
|
||||
FAST f=17 a=1 0.173884 2.157192 8 98
|
||||
FAST f=17 a=2 13.071242 2.159830 8 146
|
||||
FAST f=17 a=2 0.10388 2.159830 8 146
|
||||
FAST f=17 a=3 11.332366 2.153654 6 194
|
||||
FAST f=17 a=3 0.08983 2.153654 6 194
|
||||
FAST f=17 a=4 10.362413 2.156813 8 242
|
||||
FAST f=17 a=4 0.070389 2.156813 8 242
|
||||
FAST f=17 a=5 9.808159 2.155098 6 338
|
||||
FAST f=17 a=5 0.072661 2.155098 6 338
|
||||
FAST f=17 a=6 9.451165 2.153845 6 146
|
||||
FAST f=17 a=6 0.064959 2.153845 6 146
|
||||
FAST f=17 a=7 9.163097 2.155424 6 242
|
||||
FAST f=17 a=7 0.064323 2.155424 6 242
|
||||
FAST f=17 a=8 9.047276 2.156640 8 242
|
||||
FAST f=17 a=8 0.053382 2.156640 8 242
|
||||
FAST f=17 a=9 8.807671 2.152396 8 146
|
||||
FAST f=17 a=9 0.049617 2.152396 8 146
|
||||
FAST f=17 a=10 8.649827 2.152370 8 146
|
||||
FAST f=17 a=10 0.047849 2.152370 8 146
|
||||
FAST f=18 a=1 18.809502 2.168116 8 98
|
||||
FAST f=18 a=1 0.175226 2.168116 8 98
|
||||
FAST f=18 a=2 13.756502 2.170870 6 242
|
||||
FAST f=18 a=2 0.119507 2.170870 6 242
|
||||
FAST f=18 a=3 12.059748 2.163094 6 98
|
||||
FAST f=18 a=3 0.093912 2.163094 6 98
|
||||
FAST f=18 a=4 11.410294 2.172372 8 98
|
||||
FAST f=18 a=4 0.073048 2.172372 8 98
|
||||
FAST f=18 a=5 10.560297 2.166388 8 98
|
||||
FAST f=18 a=5 0.065136 2.166388 8 98
|
||||
FAST f=18 a=6 10.071390 2.162672 8 98
|
||||
FAST f=18 a=6 0.059402 2.162672 8 98
|
||||
FAST f=18 a=7 10.084214 2.166624 6 194
|
||||
FAST f=18 a=7 0.073276 2.166624 6 194
|
||||
FAST f=18 a=8 9.953226 2.167454 8 98
|
||||
FAST f=18 a=8 0.053659 2.167454 8 98
|
||||
FAST f=18 a=9 8.982461 2.161593 6 146
|
||||
FAST f=18 a=9 0.05955 2.161593 6 146
|
||||
FAST f=18 a=10 8.986092 2.164373 6 242
|
||||
FAST f=18 a=10 0.059135 2.164373 6 242
|
||||
FAST f=19 a=1 18.908277 2.176021 8 98
|
||||
FAST f=19 a=1 0.177316 2.176021 8 98
|
||||
FAST f=19 a=2 13.471313 2.176103 8 98
|
||||
FAST f=19 a=2 0.106344 2.176103 8 98
|
||||
FAST f=19 a=3 11.571406 2.172812 8 98
|
||||
FAST f=19 a=3 0.083293 2.172812 8 98
|
||||
FAST f=19 a=4 10.632775 2.177770 6 146
|
||||
FAST f=19 a=4 0.079864 2.177770 6 146
|
||||
FAST f=19 a=5 10.030190 2.175574 6 146
|
||||
FAST f=19 a=5 0.07223 2.175574 6 146
|
||||
FAST f=19 a=6 9.717818 2.169997 8 98
|
||||
FAST f=19 a=6 0.060049 2.169997 8 98
|
||||
FAST f=19 a=7 9.397531 2.172770 8 146
|
||||
FAST f=19 a=7 0.057188 2.172770 8 146
|
||||
FAST f=19 a=8 9.281061 2.175822 8 98
|
||||
FAST f=19 a=8 0.053711 2.175822 8 98
|
||||
FAST f=19 a=9 9.165242 2.169849 6 146
|
||||
FAST f=19 a=9 0.059898 2.169849 6 146
|
||||
FAST f=19 a=10 9.048763 2.173394 8 98
|
||||
FAST f=19 a=10 0.049757 2.173394 8 98
|
||||
FAST f=20 a=1 21.166917 2.183923 6 98
|
||||
FAST f=20 a=1 0.205425 2.183923 6 98
|
||||
FAST f=20 a=2 15.642753 2.182349 6 98
|
||||
FAST f=20 a=2 0.135957 2.182349 6 98
|
||||
FAST f=20 a=3 14.053730 2.173544 6 98
|
||||
FAST f=20 a=3 0.11266 2.173544 6 98
|
||||
FAST f=20 a=4 15.270019 2.183656 8 98
|
||||
FAST f=20 a=4 0.107892 2.183656 8 98
|
||||
FAST f=20 a=5 15.497927 2.174661 6 98
|
||||
FAST f=20 a=5 0.100305 2.174661 6 98
|
||||
FAST f=20 a=6 13.973505 2.172391 8 98
|
||||
FAST f=20 a=6 0.087565 2.172391 8 98
|
||||
FAST f=20 a=7 14.083296 2.172443 8 98
|
||||
FAST f=20 a=7 0.078062 2.172443 8 98
|
||||
FAST f=20 a=8 12.560048 2.175581 8 98
|
||||
FAST f=20 a=8 0.070282 2.175581 8 98
|
||||
FAST f=20 a=9 13.078645 2.173975 6 146
|
||||
FAST f=20 a=9 0.081041 2.173975 6 146
|
||||
FAST f=20 a=10 12.823328 2.177778 8 98
|
||||
FAST f=20 a=10 0.074522 2.177778 8 98
|
||||
FAST f=21 a=1 29.825370 2.183057 6 98
|
||||
FAST f=21 a=1 0.334453 2.183057 6 98
|
||||
FAST f=21 a=2 29.476474 2.182752 8 98
|
||||
FAST f=21 a=2 0.286602 2.182752 8 98
|
||||
FAST f=21 a=3 25.937186 2.175867 8 98
|
||||
FAST f=21 a=3 0.17626 2.175867 8 98
|
||||
FAST f=21 a=4 20.413865 2.179780 8 98
|
||||
FAST f=21 a=4 0.206085 2.179780 8 98
|
||||
FAST f=21 a=5 20.541889 2.178328 6 146
|
||||
FAST f=21 a=5 0.199157 2.178328 6 146
|
||||
FAST f=21 a=6 21.090670 2.174443 6 146
|
||||
FAST f=21 a=6 0.190645 2.174443 6 146
|
||||
FAST f=21 a=7 20.221569 2.177384 6 146
|
||||
FAST f=21 a=7 0.184278 2.177384 6 146
|
||||
FAST f=21 a=8 20.322357 2.179456 6 98
|
||||
FAST f=21 a=8 0.178458 2.179456 6 98
|
||||
FAST f=21 a=9 20.683912 2.174396 6 146
|
||||
FAST f=21 a=9 0.190829 2.174396 6 146
|
||||
FAST f=21 a=10 20.840865 2.174905 8 98
|
||||
FAST f=21 a=10 0.172515 2.174905 8 98
|
||||
FAST f=22 a=1 36.822827 2.181612 6 98
|
||||
FAST f=22 a=1 0.437389 2.181612 6 98
|
||||
FAST f=22 a=2 30.616902 2.183142 8 98
|
||||
FAST f=22 a=2 0.324284 2.183142 8 98
|
||||
FAST f=22 a=3 28.472482 2.178130 8 98
|
||||
FAST f=22 a=3 0.236538 2.178130 8 98
|
||||
FAST f=22 a=4 25.847028 2.181878 8 98
|
||||
FAST f=22 a=4 0.263744 2.181878 8 98
|
||||
FAST f=22 a=5 27.095881 2.180775 8 98
|
||||
FAST f=22 a=5 0.24988 2.180775 8 98
|
||||
FAST f=22 a=6 25.939172 2.170916 8 98
|
||||
FAST f=22 a=6 0.240033 2.170916 8 98
|
||||
FAST f=22 a=7 27.064194 2.177849 8 98
|
||||
FAST f=22 a=7 0.242383 2.177849 8 98
|
||||
FAST f=22 a=8 25.140221 2.178216 8 98
|
||||
FAST f=22 a=8 0.237601 2.178216 8 98
|
||||
FAST f=22 a=9 25.505283 2.177455 6 146
|
||||
FAST f=22 a=9 0.223217 2.177455 6 146
|
||||
FAST f=22 a=10 24.529362 2.176705 6 98
|
||||
FAST f=22 a=10 0.222876 2.176705 6 98
|
||||
FAST f=23 a=1 39.127310 2.183006 6 98
|
||||
FAST f=23 a=1 0.417338 2.183006 6 98
|
||||
FAST f=23 a=2 32.468161 2.183524 6 98
|
||||
FAST f=23 a=2 0.351645 2.183524 6 98
|
||||
FAST f=23 a=3 31.577620 2.172604 6 98
|
||||
FAST f=23 a=3 0.319659 2.172604 6 98
|
||||
FAST f=23 a=4 30.129247 2.183932 6 98
|
||||
FAST f=23 a=4 0.307239 2.183932 6 98
|
||||
FAST f=23 a=5 29.103376 2.183529 6 146
|
||||
FAST f=23 a=5 0.285533 2.183529 6 146
|
||||
FAST f=23 a=6 29.776045 2.174367 8 98
|
||||
FAST f=23 a=6 0.276846 2.174367 8 98
|
||||
FAST f=23 a=7 28.940407 2.178022 6 146
|
||||
FAST f=23 a=7 0.274082 2.178022 6 146
|
||||
FAST f=23 a=8 29.256009 2.179462 6 98
|
||||
FAST f=23 a=8 0.26949 2.179462 6 98
|
||||
FAST f=23 a=9 29.347312 2.170407 8 98
|
||||
FAST f=23 a=9 0.265034 2.170407 8 98
|
||||
FAST f=23 a=10 29.140081 2.171762 8 98
|
||||
FAST f=23 a=10 0.259183 2.171762 8 98
|
||||
FAST f=24 a=1 44.871179 2.182115 6 98
|
||||
FAST f=24 a=1 0.509433 2.182115 6 98
|
||||
FAST f=24 a=2 38.694867 2.180549 8 98
|
||||
FAST f=24 a=2 0.406695 2.180549 8 98
|
||||
FAST f=24 a=3 38.363769 2.172821 8 98
|
||||
FAST f=24 a=3 0.359581 2.172821 8 98
|
||||
FAST f=24 a=4 36.580797 2.184142 8 98
|
||||
FAST f=24 a=4 0.340614 2.184142 8 98
|
||||
FAST f=24 a=5 33.125701 2.183301 8 98
|
||||
FAST f=24 a=5 0.324874 2.183301 8 98
|
||||
FAST f=24 a=6 34.776068 2.173019 6 146
|
||||
FAST f=24 a=6 0.340397 2.173019 6 146
|
||||
FAST f=24 a=7 34.417625 2.176561 6 146
|
||||
FAST f=24 a=7 0.308223 2.176561 6 146
|
||||
FAST f=24 a=8 35.470291 2.182161 6 98
|
||||
FAST f=24 a=8 0.307724 2.182161 6 98
|
||||
FAST f=24 a=9 34.927252 2.172682 6 146
|
||||
FAST f=24 a=9 0.300598 2.172682 6 146
|
||||
FAST f=24 a=10 33.238355 2.173395 6 98
|
||||
FAST f=24 a=10 0.249916 2.173395 6 98
|
||||
|
||||
|
||||
hg-manifest:
|
||||
NODICT 0.000004 1.866377
|
||||
RANDOM 0.696346 2.309436
|
||||
LEGACY 7.064527 2.506977
|
||||
COVER 876.312865 2.582528 8 434
|
||||
COVER 35.684533 2.582528 8 434
|
||||
FAST f=15 a=1 76.618201 2.404013 8 1202
|
||||
FAST f=15 a=1 0.700722 2.404013 8 1202
|
||||
FAST f=15 a=2 49.213058 2.409248 6 1826
|
||||
FAST f=15 a=2 0.473393 2.409248 6 1826
|
||||
FAST f=15 a=3 41.753197 2.409677 8 1490
|
||||
FAST f=15 a=3 0.336848 2.409677 8 1490
|
||||
FAST f=15 a=4 38.648295 2.407996 8 1538
|
||||
FAST f=15 a=4 0.283952 2.407996 8 1538
|
||||
FAST f=15 a=5 36.144936 2.402895 8 1874
|
||||
FAST f=15 a=5 0.270128 2.402895 8 1874
|
||||
FAST f=15 a=6 35.484675 2.394873 8 1586
|
||||
FAST f=15 a=6 0.251637 2.394873 8 1586
|
||||
FAST f=15 a=7 34.280599 2.397311 8 1778
|
||||
FAST f=15 a=7 0.23984 2.397311 8 1778
|
||||
FAST f=15 a=8 32.122572 2.396089 6 1490
|
||||
FAST f=15 a=8 0.251508 2.396089 6 1490
|
||||
FAST f=15 a=9 29.909842 2.390092 6 1970
|
||||
FAST f=15 a=9 0.251233 2.390092 6 1970
|
||||
FAST f=15 a=10 30.102938 2.400086 6 1682
|
||||
FAST f=15 a=10 0.23688 2.400086 6 1682
|
||||
FAST f=16 a=1 67.750401 2.475460 6 1346
|
||||
FAST f=16 a=1 0.796035 2.475460 6 1346
|
||||
FAST f=16 a=2 52.812027 2.480860 6 1730
|
||||
FAST f=16 a=2 0.480384 2.480860 6 1730
|
||||
FAST f=16 a=3 44.179259 2.469304 8 1970
|
||||
FAST f=16 a=3 0.332657 2.469304 8 1970
|
||||
FAST f=16 a=4 37.612728 2.478208 6 1970
|
||||
FAST f=16 a=4 0.32498 2.478208 6 1970
|
||||
FAST f=16 a=5 35.056222 2.475568 6 1298
|
||||
FAST f=16 a=5 0.302824 2.475568 6 1298
|
||||
FAST f=16 a=6 34.713012 2.486079 8 1730
|
||||
FAST f=16 a=6 0.24755 2.486079 8 1730
|
||||
FAST f=16 a=7 33.713687 2.477180 6 1682
|
||||
FAST f=16 a=7 0.280358 2.477180 6 1682
|
||||
FAST f=16 a=8 31.571412 2.475418 8 1538
|
||||
FAST f=16 a=8 0.241241 2.475418 8 1538
|
||||
FAST f=16 a=9 31.608069 2.478263 8 1922
|
||||
FAST f=16 a=9 0.241764 2.478263 8 1922
|
||||
FAST f=16 a=10 31.358002 2.472263 8 1442
|
||||
FAST f=16 a=10 0.221661 2.472263 8 1442
|
||||
FAST f=17 a=1 66.185775 2.536085 6 1346
|
||||
FAST f=17 a=1 0.713549 2.536085 6 1346
|
||||
FAST f=17 a=2 50.365000 2.546105 8 1298
|
||||
FAST f=17 a=2 0.467846 2.546105 8 1298
|
||||
FAST f=17 a=3 42.712843 2.536250 8 1298
|
||||
FAST f=17 a=3 0.34047 2.536250 8 1298
|
||||
FAST f=17 a=4 39.514227 2.535555 8 1442
|
||||
FAST f=17 a=4 0.302989 2.535555 8 1442
|
||||
FAST f=17 a=5 35.189292 2.524925 8 1202
|
||||
FAST f=17 a=5 0.273451 2.524925 8 1202
|
||||
FAST f=17 a=6 35.791683 2.523466 8 1202
|
||||
FAST f=17 a=6 0.268261 2.523466 8 1202
|
||||
FAST f=17 a=7 37.416136 2.526625 6 1010
|
||||
FAST f=17 a=7 0.277558 2.526625 6 1010
|
||||
FAST f=17 a=8 37.084707 2.533274 6 1250
|
||||
FAST f=17 a=8 0.285104 2.533274 6 1250
|
||||
FAST f=17 a=9 34.183814 2.532765 8 1298
|
||||
FAST f=17 a=9 0.235133 2.532765 8 1298
|
||||
FAST f=17 a=10 31.149235 2.528722 8 1346
|
||||
FAST f=17 a=10 0.232679 2.528722 8 1346
|
||||
FAST f=18 a=1 72.942176 2.559857 6 386
|
||||
FAST f=18 a=1 0.718618 2.559857 6 386
|
||||
FAST f=18 a=2 51.690440 2.559572 8 290
|
||||
FAST f=18 a=2 0.403978 2.559572 8 290
|
||||
FAST f=18 a=3 45.344908 2.561040 8 962
|
||||
FAST f=18 a=3 0.357205 2.561040 8 962
|
||||
FAST f=18 a=4 39.804522 2.558446 8 1010
|
||||
FAST f=18 a=4 0.310526 2.558446 8 1010
|
||||
FAST f=18 a=5 38.134888 2.561811 8 626
|
||||
FAST f=18 a=5 0.273743 2.561811 8 626
|
||||
FAST f=18 a=6 35.091890 2.555518 8 722
|
||||
FAST f=18 a=6 0.260135 2.555518 8 722
|
||||
FAST f=18 a=7 34.639523 2.562938 8 290
|
||||
FAST f=18 a=7 0.234294 2.562938 8 290
|
||||
FAST f=18 a=8 36.076431 2.563567 8 1586
|
||||
FAST f=18 a=8 0.274075 2.563567 8 1586
|
||||
FAST f=18 a=9 36.376433 2.560950 8 722
|
||||
FAST f=18 a=9 0.240106 2.560950 8 722
|
||||
FAST f=18 a=10 32.624790 2.559340 8 578
|
||||
FAST f=18 a=10 0.234704 2.559340 8 578
|
||||
FAST f=19 a=1 70.513761 2.572441 8 194
|
||||
FAST f=19 a=1 0.726112 2.572441 8 194
|
||||
FAST f=19 a=2 59.263032 2.574560 8 482
|
||||
FAST f=19 a=2 0.451554 2.574560 8 482
|
||||
FAST f=19 a=3 51.509594 2.571546 6 194
|
||||
FAST f=19 a=3 0.393014 2.571546 6 194
|
||||
FAST f=19 a=4 55.393906 2.573386 8 482
|
||||
FAST f=19 a=4 0.38819 2.573386 8 482
|
||||
FAST f=19 a=5 43.201736 2.567589 8 674
|
||||
FAST f=19 a=5 0.292155 2.567589 8 674
|
||||
FAST f=19 a=6 42.911687 2.572666 6 434
|
||||
FAST f=19 a=6 0.303988 2.572666 6 434
|
||||
FAST f=19 a=7 44.687591 2.573613 6 290
|
||||
FAST f=19 a=7 0.308721 2.573613 6 290
|
||||
FAST f=19 a=8 37.372868 2.571039 6 194
|
||||
FAST f=19 a=8 0.287137 2.571039 6 194
|
||||
FAST f=19 a=9 36.074230 2.566473 6 482
|
||||
FAST f=19 a=9 0.280721 2.566473 6 482
|
||||
FAST f=19 a=10 33.731720 2.570306 8 194
|
||||
FAST f=19 a=10 0.224073 2.570306 8 194
|
||||
FAST f=20 a=1 79.670634 2.581146 6 290
|
||||
FAST f=20 a=1 0.899986 2.581146 6 290
|
||||
FAST f=20 a=2 58.827141 2.579782 8 386
|
||||
FAST f=20 a=2 0.602288 2.579782 8 386
|
||||
FAST f=20 a=3 51.289004 2.579627 8 722
|
||||
FAST f=20 a=3 0.446091 2.579627 8 722
|
||||
FAST f=20 a=4 47.711068 2.581508 8 722
|
||||
FAST f=20 a=4 0.473007 2.581508 8 722
|
||||
FAST f=20 a=5 47.402929 2.578062 6 434
|
||||
FAST f=20 a=5 0.497131 2.578062 6 434
|
||||
FAST f=20 a=6 54.797102 2.577365 8 482
|
||||
FAST f=20 a=6 0.515061 2.577365 8 482
|
||||
FAST f=20 a=7 51.370877 2.583050 8 386
|
||||
FAST f=20 a=7 0.402878 2.583050 8 386
|
||||
FAST f=20 a=8 51.437931 2.574875 6 242
|
||||
FAST f=20 a=8 0.453094 2.574875 6 242
|
||||
FAST f=20 a=9 44.105456 2.576700 6 242
|
||||
FAST f=20 a=9 0.456633 2.576700 6 242
|
||||
FAST f=20 a=10 44.447580 2.578305 8 338
|
||||
FAST f=20 a=10 0.409121 2.578305 8 338
|
||||
FAST f=21 a=1 113.031686 2.582449 6 242
|
||||
FAST f=21 a=1 1.456971 2.582449 6 242
|
||||
FAST f=21 a=2 97.700932 2.582124 8 194
|
||||
FAST f=21 a=2 1.072078 2.582124 8 194
|
||||
FAST f=21 a=3 96.563648 2.585479 8 434
|
||||
FAST f=21 a=3 0.949528 2.585479 8 434
|
||||
FAST f=21 a=4 90.597813 2.582366 6 386
|
||||
FAST f=21 a=4 0.76944 2.582366 6 386
|
||||
FAST f=21 a=5 86.815980 2.579043 8 434
|
||||
FAST f=21 a=5 0.858167 2.579043 8 434
|
||||
FAST f=21 a=6 91.235820 2.578378 8 530
|
||||
FAST f=21 a=6 0.684274 2.578378 8 530
|
||||
FAST f=21 a=7 84.392788 2.581243 8 386
|
||||
FAST f=21 a=7 0.814386 2.581243 8 386
|
||||
FAST f=21 a=8 82.052310 2.582547 8 338
|
||||
FAST f=21 a=8 0.822633 2.582547 8 338
|
||||
FAST f=21 a=9 74.696074 2.579319 8 194
|
||||
FAST f=21 a=9 0.811028 2.579319 8 194
|
||||
FAST f=21 a=10 76.211170 2.578766 8 290
|
||||
FAST f=21 a=10 0.809715 2.578766 8 290
|
||||
FAST f=22 a=1 138.976871 2.580478 8 194
|
||||
FAST f=22 a=1 1.748932 2.580478 8 194
|
||||
FAST f=22 a=2 120.164097 2.583633 8 386
|
||||
FAST f=22 a=2 1.333239 2.583633 8 386
|
||||
FAST f=22 a=3 111.986474 2.582566 6 194
|
||||
FAST f=22 a=3 1.305734 2.582566 6 194
|
||||
FAST f=22 a=4 108.548148 2.583068 6 194
|
||||
FAST f=22 a=4 1.314026 2.583068 6 194
|
||||
FAST f=22 a=5 103.173017 2.583495 6 290
|
||||
FAST f=22 a=5 1.228664 2.583495 6 290
|
||||
FAST f=22 a=6 108.421262 2.582349 8 530
|
||||
FAST f=22 a=6 1.076773 2.582349 8 530
|
||||
FAST f=22 a=7 103.284127 2.581022 8 386
|
||||
FAST f=22 a=7 1.112117 2.581022 8 386
|
||||
FAST f=22 a=8 96.330279 2.581073 8 290
|
||||
FAST f=22 a=8 1.109303 2.581073 8 290
|
||||
FAST f=22 a=9 97.651348 2.580075 6 194
|
||||
FAST f=22 a=9 0.933032 2.580075 6 194
|
||||
FAST f=22 a=10 101.660621 2.584886 8 194
|
||||
FAST f=22 a=10 0.796823 2.584886 8 194
|
||||
FAST f=23 a=1 159.322978 2.581474 6 242
|
||||
FAST f=23 a=1 2.015878 2.581474 6 242
|
||||
FAST f=23 a=2 134.331775 2.581619 8 194
|
||||
FAST f=23 a=2 1.545845 2.581619 8 194
|
||||
FAST f=23 a=3 127.724552 2.579888 6 338
|
||||
FAST f=23 a=3 1.444496 2.579888 6 338
|
||||
FAST f=23 a=4 126.077675 2.578137 6 242
|
||||
FAST f=23 a=4 1.364394 2.578137 6 242
|
||||
FAST f=23 a=5 124.914027 2.580843 8 338
|
||||
FAST f=23 a=5 1.116059 2.580843 8 338
|
||||
FAST f=23 a=6 122.874153 2.577637 6 338
|
||||
FAST f=23 a=6 1.164584 2.577637 6 338
|
||||
FAST f=23 a=7 123.099257 2.582715 6 386
|
||||
FAST f=23 a=7 1.354042 2.582715 6 386
|
||||
FAST f=23 a=8 122.026753 2.577681 8 194
|
||||
FAST f=23 a=8 1.210966 2.577681 8 194
|
||||
FAST f=23 a=9 121.164312 2.584599 6 290
|
||||
FAST f=23 a=9 1.174859 2.584599 6 290
|
||||
FAST f=23 a=10 117.462222 2.580358 8 194
|
||||
FAST f=23 a=10 1.075258 2.580358 8 194
|
||||
FAST f=24 a=1 169.539659 2.581642 6 194
|
||||
FAST f=24 a=1 1.916804 2.581642 6 194
|
||||
FAST f=24 a=2 160.539270 2.580421 6 290
|
||||
FAST f=24 a=2 1.71087 2.580421 6 290
|
||||
FAST f=24 a=3 155.455874 2.580449 6 242
|
||||
FAST f=24 a=3 1.60307 2.580449 6 242
|
||||
FAST f=24 a=4 147.630320 2.582953 6 338
|
||||
FAST f=24 a=4 1.396364 2.582953 6 338
|
||||
FAST f=24 a=5 133.767428 2.580589 6 290
|
||||
FAST f=24 a=5 1.19933 2.580589 6 290
|
||||
FAST f=24 a=6 146.437535 2.579453 8 194
|
||||
FAST f=24 a=6 1.385405 2.579453 8 194
|
||||
FAST f=24 a=7 147.227507 2.584155 8 386
|
||||
FAST f=24 a=7 1.48942 2.584155 8 386
|
||||
FAST f=24 a=8 138.005773 2.584115 8 194
|
||||
FAST f=24 a=8 1.352 2.584115 8 194
|
||||
FAST f=24 a=9 141.442625 2.582902 8 290
|
||||
FAST f=24 a=9 1.39647 2.582902 8 290
|
||||
FAST f=24 a=10 142.157446 2.582701 8 434
|
||||
FAST f=24 a=10 1.498889 2.582701 8 434
|
|
@ -1,442 +0,0 @@
|
|||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
#include <errno.h> /* errno */
|
||||
#include <ctype.h>
|
||||
#include <time.h>
|
||||
#include "random.h"
|
||||
#include "dictBuilder.h"
|
||||
#include "zstd_internal.h" /* includes zstd.h */
|
||||
#include "io.h"
|
||||
#include "util.h"
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||
|
||||
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
|
||||
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
||||
|
||||
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
|
||||
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
|
||||
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
|
||||
if (displayLevel>=4) fflush(stderr); } } }
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Exceptions
|
||||
***************************************/
|
||||
#ifndef DEBUG
|
||||
# define DEBUG 0
|
||||
#endif
|
||||
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
|
||||
#define EXM_THROW(error, ...) \
|
||||
{ \
|
||||
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
|
||||
DISPLAY("Error %i : ", error); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
DISPLAY("\n"); \
|
||||
exit(error); \
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
static const unsigned g_defaultMaxDictSize = 110 KB;
|
||||
#define DEFAULT_CLEVEL 3
|
||||
#define DEFAULT_DISPLAYLEVEL 2
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Struct
|
||||
***************************************/
|
||||
typedef struct {
|
||||
const void* dictBuffer;
|
||||
size_t dictSize;
|
||||
} dictInfo;
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Dictionary related operations
|
||||
***************************************/
|
||||
/** createDictFromFiles() :
|
||||
* Based on type of param given, train dictionary using the corresponding algorithm
|
||||
* @return dictInfo containing dictionary buffer and dictionary size
|
||||
*/
|
||||
dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize,
|
||||
ZDICT_random_params_t *randomParams, ZDICT_cover_params_t *coverParams,
|
||||
ZDICT_legacy_params_t *legacyParams, ZDICT_fastCover_params_t *fastParams) {
|
||||
unsigned const displayLevel = randomParams ? randomParams->zParams.notificationLevel :
|
||||
coverParams ? coverParams->zParams.notificationLevel :
|
||||
legacyParams ? legacyParams->zParams.notificationLevel :
|
||||
fastParams ? fastParams->zParams.notificationLevel :
|
||||
DEFAULT_DISPLAYLEVEL; /* no dict */
|
||||
void* const dictBuffer = malloc(maxDictSize);
|
||||
|
||||
dictInfo* dInfo = NULL;
|
||||
|
||||
/* Checks */
|
||||
if (!dictBuffer)
|
||||
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
|
||||
|
||||
{ size_t dictSize;
|
||||
if(randomParams) {
|
||||
dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *randomParams);
|
||||
}else if(coverParams) {
|
||||
/* Run the optimize version if either k or d is not provided */
|
||||
if (!coverParams->d || !coverParams->k){
|
||||
dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, coverParams);
|
||||
} else {
|
||||
dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *coverParams);
|
||||
}
|
||||
} else if(legacyParams) {
|
||||
dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *legacyParams);
|
||||
} else if(fastParams) {
|
||||
/* Run the optimize version if either k or d is not provided */
|
||||
if (!fastParams->d || !fastParams->k) {
|
||||
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, fastParams);
|
||||
} else {
|
||||
dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *fastParams);
|
||||
}
|
||||
} else {
|
||||
dictSize = 0;
|
||||
}
|
||||
if (ZDICT_isError(dictSize)) {
|
||||
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
|
||||
free(dictBuffer);
|
||||
return dInfo;
|
||||
}
|
||||
dInfo = (dictInfo *)malloc(sizeof(dictInfo));
|
||||
dInfo->dictBuffer = dictBuffer;
|
||||
dInfo->dictSize = dictSize;
|
||||
}
|
||||
return dInfo;
|
||||
}
|
||||
|
||||
|
||||
/** compressWithDict() :
|
||||
* Compress samples from sample buffer given dictionary stored on dictionary buffer and compression level
|
||||
* @return compression ratio
|
||||
*/
|
||||
double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLevel, int displayLevel) {
|
||||
/* Local variables */
|
||||
size_t totalCompressedSize = 0;
|
||||
size_t totalOriginalSize = 0;
|
||||
const unsigned hasDict = dInfo->dictSize > 0 ? 1 : 0;
|
||||
double cRatio;
|
||||
size_t dstCapacity;
|
||||
int i;
|
||||
|
||||
/* Pointers */
|
||||
ZSTD_CDict *cdict = NULL;
|
||||
ZSTD_CCtx* cctx = NULL;
|
||||
size_t *offsets = NULL;
|
||||
void* dst = NULL;
|
||||
|
||||
/* Allocate dst with enough space to compress the maximum sized sample */
|
||||
{
|
||||
size_t maxSampleSize = 0;
|
||||
for (i = 0; i < srcInfo->nbSamples; i++) {
|
||||
maxSampleSize = MAX(srcInfo->samplesSizes[i], maxSampleSize);
|
||||
}
|
||||
dstCapacity = ZSTD_compressBound(maxSampleSize);
|
||||
dst = malloc(dstCapacity);
|
||||
}
|
||||
|
||||
/* Calculate offset for each sample */
|
||||
offsets = (size_t *)malloc((srcInfo->nbSamples + 1) * sizeof(size_t));
|
||||
offsets[0] = 0;
|
||||
for (i = 1; i <= srcInfo->nbSamples; i++) {
|
||||
offsets[i] = offsets[i - 1] + srcInfo->samplesSizes[i - 1];
|
||||
}
|
||||
|
||||
/* Create the cctx */
|
||||
cctx = ZSTD_createCCtx();
|
||||
if(!cctx || !dst) {
|
||||
cRatio = -1;
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
/* Create CDict if there's a dictionary stored on buffer */
|
||||
if (hasDict) {
|
||||
cdict = ZSTD_createCDict(dInfo->dictBuffer, dInfo->dictSize, compressionLevel);
|
||||
if(!cdict) {
|
||||
cRatio = -1;
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compress each sample and sum their sizes*/
|
||||
const BYTE *const samples = (const BYTE *)srcInfo->srcBuffer;
|
||||
for (i = 0; i < srcInfo->nbSamples; i++) {
|
||||
size_t compressedSize;
|
||||
if(hasDict) {
|
||||
compressedSize = ZSTD_compress_usingCDict(cctx, dst, dstCapacity, samples + offsets[i], srcInfo->samplesSizes[i], cdict);
|
||||
} else {
|
||||
compressedSize = ZSTD_compressCCtx(cctx, dst, dstCapacity,samples + offsets[i], srcInfo->samplesSizes[i], compressionLevel);
|
||||
}
|
||||
if (ZSTD_isError(compressedSize)) {
|
||||
cRatio = -1;
|
||||
goto _cleanup;
|
||||
}
|
||||
totalCompressedSize += compressedSize;
|
||||
}
|
||||
|
||||
/* Sum original sizes */
|
||||
for (i = 0; i<srcInfo->nbSamples; i++) {
|
||||
totalOriginalSize += srcInfo->samplesSizes[i];
|
||||
}
|
||||
|
||||
/* Calculate compression ratio */
|
||||
DISPLAYLEVEL(2, "original size is %lu\n", totalOriginalSize);
|
||||
DISPLAYLEVEL(2, "compressed size is %lu\n", totalCompressedSize);
|
||||
cRatio = (double)totalOriginalSize/(double)totalCompressedSize;
|
||||
|
||||
_cleanup:
|
||||
free(dst);
|
||||
free(offsets);
|
||||
ZSTD_freeCCtx(cctx);
|
||||
ZSTD_freeCDict(cdict);
|
||||
return cRatio;
|
||||
}
|
||||
|
||||
|
||||
/** FreeDictInfo() :
|
||||
* Free memory allocated for dictInfo
|
||||
*/
|
||||
void freeDictInfo(dictInfo* info) {
|
||||
if (!info) return;
|
||||
if (info->dictBuffer) free((void*)(info->dictBuffer));
|
||||
free(info);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*-********************************************************
|
||||
* Benchmarking functions
|
||||
**********************************************************/
|
||||
/** benchmarkDictBuilder() :
|
||||
* Measure how long a dictionary builder takes and compression ratio with the dictionary built
|
||||
* @return 0 if benchmark successfully, 1 otherwise
|
||||
*/
|
||||
int benchmarkDictBuilder(sampleInfo *srcInfo, unsigned maxDictSize, ZDICT_random_params_t *randomParam,
|
||||
ZDICT_cover_params_t *coverParam, ZDICT_legacy_params_t *legacyParam,
|
||||
ZDICT_fastCover_params_t *fastParam) {
|
||||
/* Local variables */
|
||||
const unsigned displayLevel = randomParam ? randomParam->zParams.notificationLevel :
|
||||
coverParam ? coverParam->zParams.notificationLevel :
|
||||
legacyParam ? legacyParam->zParams.notificationLevel :
|
||||
fastParam ? fastParam->zParams.notificationLevel:
|
||||
DEFAULT_DISPLAYLEVEL; /* no dict */
|
||||
const char* name = randomParam ? "RANDOM" :
|
||||
coverParam ? "COVER" :
|
||||
legacyParam ? "LEGACY" :
|
||||
fastParam ? "FAST":
|
||||
"NODICT"; /* no dict */
|
||||
const unsigned cLevel = randomParam ? randomParam->zParams.compressionLevel :
|
||||
coverParam ? coverParam->zParams.compressionLevel :
|
||||
legacyParam ? legacyParam->zParams.compressionLevel :
|
||||
fastParam ? fastParam->zParams.compressionLevel:
|
||||
DEFAULT_CLEVEL; /* no dict */
|
||||
int result = 0;
|
||||
|
||||
/* Calculate speed */
|
||||
const UTIL_time_t begin = UTIL_getTime();
|
||||
dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, randomParam, coverParam, legacyParam, fastParam);
|
||||
const U64 timeMicro = UTIL_clockSpanMicro(begin);
|
||||
const double timeSec = timeMicro / (double)SEC_TO_MICRO;
|
||||
if (!dInfo) {
|
||||
DISPLAYLEVEL(1, "%s does not train successfully\n", name);
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
DISPLAYLEVEL(1, "%s took %f seconds to execute \n", name, timeSec);
|
||||
|
||||
/* Calculate compression ratio */
|
||||
const double cRatio = compressWithDict(srcInfo, dInfo, cLevel, displayLevel);
|
||||
if (cRatio < 0) {
|
||||
DISPLAYLEVEL(1, "Compressing with %s dictionary does not work\n", name);
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
|
||||
}
|
||||
DISPLAYLEVEL(1, "Compression ratio with %s dictionary is %f\n", name, cRatio);
|
||||
|
||||
_cleanup:
|
||||
freeDictInfo(dInfo);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argCount, const char* argv[])
|
||||
{
|
||||
const int displayLevel = DEFAULT_DISPLAYLEVEL;
|
||||
const char* programName = argv[0];
|
||||
int result = 0;
|
||||
|
||||
/* Initialize arguments to default values */
|
||||
unsigned k = 200;
|
||||
unsigned d = 8;
|
||||
unsigned f;
|
||||
unsigned accel;
|
||||
unsigned i;
|
||||
const unsigned cLevel = DEFAULT_CLEVEL;
|
||||
const unsigned dictID = 0;
|
||||
const unsigned maxDictSize = g_defaultMaxDictSize;
|
||||
|
||||
/* Initialize table to store input files */
|
||||
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
|
||||
unsigned filenameIdx = 0;
|
||||
|
||||
char* fileNamesBuf = NULL;
|
||||
unsigned fileNamesNb = filenameIdx;
|
||||
const int followLinks = 0;
|
||||
const char** extendedFileList = NULL;
|
||||
|
||||
/* Parse arguments */
|
||||
for (i = 1; i < argCount; i++) {
|
||||
const char* argument = argv[i];
|
||||
if (longCommandWArg(&argument, "in=")) {
|
||||
filenameTable[filenameIdx] = argument;
|
||||
filenameIdx++;
|
||||
continue;
|
||||
}
|
||||
DISPLAYLEVEL(1, "benchmark: Incorrect parameters\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Get the list of all files recursively (because followLinks==0)*/
|
||||
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
|
||||
&fileNamesNb, followLinks);
|
||||
if (extendedFileList) {
|
||||
unsigned u;
|
||||
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
|
||||
free((void*)filenameTable);
|
||||
filenameTable = extendedFileList;
|
||||
filenameIdx = fileNamesNb;
|
||||
}
|
||||
|
||||
/* get sampleInfo */
|
||||
size_t blockSize = 0;
|
||||
sampleInfo* srcInfo= getSampleInfo(filenameTable,
|
||||
filenameIdx, blockSize, maxDictSize, displayLevel);
|
||||
|
||||
/* set up zParams */
|
||||
ZDICT_params_t zParams;
|
||||
zParams.compressionLevel = cLevel;
|
||||
zParams.notificationLevel = displayLevel;
|
||||
zParams.dictID = dictID;
|
||||
|
||||
/* with no dict */
|
||||
{
|
||||
const int noDictResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, NULL);
|
||||
if(noDictResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* for random */
|
||||
{
|
||||
ZDICT_random_params_t randomParam;
|
||||
randomParam.zParams = zParams;
|
||||
randomParam.k = k;
|
||||
const int randomResult = benchmarkDictBuilder(srcInfo, maxDictSize, &randomParam, NULL, NULL, NULL);
|
||||
DISPLAYLEVEL(2, "k=%u\n", randomParam.k);
|
||||
if(randomResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* for legacy */
|
||||
{
|
||||
ZDICT_legacy_params_t legacyParam;
|
||||
legacyParam.zParams = zParams;
|
||||
legacyParam.selectivityLevel = 9;
|
||||
const int legacyResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, &legacyParam, NULL);
|
||||
DISPLAYLEVEL(2, "selectivityLevel=%u\n", legacyParam.selectivityLevel);
|
||||
if(legacyResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* for cover */
|
||||
{
|
||||
/* for cover (optimizing k and d) */
|
||||
ZDICT_cover_params_t coverParam;
|
||||
memset(&coverParam, 0, sizeof(coverParam));
|
||||
coverParam.zParams = zParams;
|
||||
coverParam.splitPoint = 1.0;
|
||||
coverParam.steps = 40;
|
||||
coverParam.nbThreads = 1;
|
||||
const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100));
|
||||
if(coverOptResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
/* for cover (with k and d provided) */
|
||||
const int coverResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100));
|
||||
if(coverResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* for fastCover */
|
||||
for (f = 15; f < 25; f++){
|
||||
DISPLAYLEVEL(2, "current f is %u\n", f);
|
||||
for (accel = 1; accel < 11; accel++) {
|
||||
DISPLAYLEVEL(2, "current accel is %u\n", accel);
|
||||
/* for fastCover (optimizing k and d) */
|
||||
ZDICT_fastCover_params_t fastParam;
|
||||
memset(&fastParam, 0, sizeof(fastParam));
|
||||
fastParam.zParams = zParams;
|
||||
fastParam.f = f;
|
||||
fastParam.steps = 40;
|
||||
fastParam.nbThreads = 1;
|
||||
fastParam.accel = accel;
|
||||
const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel);
|
||||
if(fastOptResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
/* for fastCover (with k and d provided) */
|
||||
for (i = 0; i < 5; i++) {
|
||||
const int fastResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel);
|
||||
if(fastResult) {
|
||||
result = 1;
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Free allocated memory */
|
||||
_cleanup:
|
||||
UTIL_freeFileList(extendedFileList, fileNamesBuf);
|
||||
freeSampleInfo(srcInfo);
|
||||
return result;
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
/* ZDICT_trainFromBuffer_legacy() :
|
||||
* issue : samplesBuffer need to be followed by a noisy guard band.
|
||||
* work around : duplicate the buffer, and add the noise */
|
||||
size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||
ZDICT_legacy_params_t params);
|
|
@ -1,2 +0,0 @@
|
|||
echo "Benchmark with in=../../lib/common"
|
||||
./benchmark in=../../../lib/common
|
|
@ -1,54 +0,0 @@
|
|||
ARG :=
|
||||
|
||||
CC ?= gcc
|
||||
CFLAGS ?= -O3 -g
|
||||
INCLUDES := -I ../../../programs -I ../randomDictBuilder -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
|
||||
|
||||
IO_FILE := ../randomDictBuilder/io.c
|
||||
|
||||
TEST_INPUT := ../../../lib
|
||||
TEST_OUTPUT := fastCoverDict
|
||||
|
||||
all: main run clean
|
||||
|
||||
.PHONY: test
|
||||
test: main testrun testshell clean
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
echo "Building a fastCover dictionary with given arguments"
|
||||
./main $(ARG)
|
||||
|
||||
main: main.o io.o fastCover.o libzstd.a
|
||||
$(CC) $(CFLAGS) main.o io.o fastCover.o libzstd.a -o main
|
||||
|
||||
main.o: main.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c main.c
|
||||
|
||||
fastCover.o: fastCover.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c fastCover.c
|
||||
|
||||
io.o: $(IO_FILE)
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)
|
||||
|
||||
libzstd.a:
|
||||
$(MAKE) MOREFLAGS=-g -C ../../../lib libzstd.a
|
||||
mv ../../../lib/libzstd.a .
|
||||
|
||||
.PHONY: testrun
|
||||
testrun: main
|
||||
echo "Run with $(TEST_INPUT) and $(TEST_OUTPUT) "
|
||||
./main in=$(TEST_INPUT) out=$(TEST_OUTPUT)
|
||||
zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q
|
||||
rm -f $(TEST_OUTPUT)
|
||||
|
||||
.PHONY: testshell
|
||||
testshell: test.sh
|
||||
sh test.sh
|
||||
echo "Finish running test.sh"
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -f *.o main libzstd.a
|
||||
$(MAKE) -C ../../../lib clean
|
||||
echo "Cleaning is completed"
|
|
@ -1,24 +0,0 @@
|
|||
FastCover Dictionary Builder
|
||||
|
||||
### Permitted Arguments:
|
||||
Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
|
||||
Output Dictionary (out=dictName): if not provided, default to fastCoverDict
|
||||
Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0
|
||||
Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB
|
||||
Size of Selected Segment (k=#): positive number; in bytes; if not provided, default to 200
|
||||
Size of Dmer (d=#): either 6 or 8; if not provided, default to 8
|
||||
Number of steps (steps=#): positive number, if not provided, default to 32
|
||||
Percentage of samples used for training(split=#): positive number; if not provided, default to 100
|
||||
|
||||
|
||||
###Running Test:
|
||||
make test
|
||||
|
||||
|
||||
###Usage:
|
||||
To build a FASTCOVER dictionary with the provided arguments: make ARG= followed by arguments
|
||||
If k or d is not provided, the optimize version of FASTCOVER is run.
|
||||
|
||||
### Examples:
|
||||
make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520"
|
||||
make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
|
|
@ -1,809 +0,0 @@
|
|||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memset */
|
||||
#include <time.h> /* clock */
|
||||
#include "mem.h" /* read */
|
||||
#include "pool.h"
|
||||
#include "threading.h"
|
||||
#include "fastCover.h"
|
||||
#include "zstd_internal.h" /* includes zstd.h */
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
|
||||
#define FASTCOVER_MAX_F 32
|
||||
#define DEFAULT_SPLITPOINT 1.0
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
static int g_displayLevel = 2;
|
||||
#define DISPLAY(...) \
|
||||
{ \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
fflush(stderr); \
|
||||
}
|
||||
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
||||
if (displayLevel >= l) { \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
||||
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
||||
|
||||
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
||||
if (displayLevel >= l) { \
|
||||
if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
|
||||
g_time = clock(); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
} \
|
||||
}
|
||||
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
||||
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
||||
static clock_t g_time = 0;
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Hash Functions
|
||||
***************************************/
|
||||
static const U64 prime6bytes = 227718039650203ULL;
|
||||
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
|
||||
|
||||
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
|
||||
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
|
||||
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
|
||||
|
||||
|
||||
/**
|
||||
* Hash the d-byte value pointed to by p and mod 2^f
|
||||
*/
|
||||
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) {
|
||||
if (d == 6) {
|
||||
return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1);
|
||||
}
|
||||
return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1);
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Context
|
||||
***************************************/
|
||||
typedef struct {
|
||||
const BYTE *samples;
|
||||
size_t *offsets;
|
||||
const size_t *samplesSizes;
|
||||
size_t nbSamples;
|
||||
size_t nbTrainSamples;
|
||||
size_t nbTestSamples;
|
||||
size_t nbDmers;
|
||||
U32 *freqs;
|
||||
U16 *segmentFreqs;
|
||||
unsigned d;
|
||||
} FASTCOVER_ctx_t;
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Helper functions
|
||||
***************************************/
|
||||
/**
|
||||
* Returns the sum of the sample sizes.
|
||||
*/
|
||||
static size_t FASTCOVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
|
||||
size_t sum = 0;
|
||||
unsigned i;
|
||||
for (i = 0; i < nbSamples; ++i) {
|
||||
sum += samplesSizes[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* fast functions
|
||||
***************************************/
|
||||
/**
|
||||
* A segment is a range in the source as well as the score of the segment.
|
||||
*/
|
||||
typedef struct {
|
||||
U32 begin;
|
||||
U32 end;
|
||||
U32 score;
|
||||
} FASTCOVER_segment_t;
|
||||
|
||||
|
||||
/**
|
||||
* Selects the best segment in an epoch.
|
||||
* Segments of are scored according to the function:
|
||||
*
|
||||
* Let F(d) be the frequency of all dmers with hash value d.
|
||||
* Let S_i be hash value of the dmer at position i of segment S which has length k.
|
||||
*
|
||||
* Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
|
||||
*
|
||||
* Once the dmer with hash value d is in the dictionary we set F(d) = F(d)/2.
|
||||
*/
|
||||
static FASTCOVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
|
||||
U32 *freqs, U32 begin,U32 end,
|
||||
ZDICT_fastCover_params_t parameters) {
|
||||
/* Constants */
|
||||
const U32 k = parameters.k;
|
||||
const U32 d = parameters.d;
|
||||
const U32 dmersInK = k - d + 1;
|
||||
/* Try each segment (activeSegment) and save the best (bestSegment) */
|
||||
FASTCOVER_segment_t bestSegment = {0, 0, 0};
|
||||
FASTCOVER_segment_t activeSegment;
|
||||
/* Reset the activeDmers in the segment */
|
||||
/* The activeSegment starts at the beginning of the epoch. */
|
||||
activeSegment.begin = begin;
|
||||
activeSegment.end = begin;
|
||||
activeSegment.score = 0;
|
||||
{
|
||||
/* Slide the activeSegment through the whole epoch.
|
||||
* Save the best segment in bestSegment.
|
||||
*/
|
||||
while (activeSegment.end < end) {
|
||||
/* Get hash value of current dmer */
|
||||
const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, parameters.f, ctx->d);
|
||||
/* Add frequency of this index to score if this is the first occurrence of index in active segment */
|
||||
if (ctx->segmentFreqs[index] == 0) {
|
||||
activeSegment.score += freqs[index];
|
||||
}
|
||||
ctx->segmentFreqs[index] += 1;
|
||||
/* Increment end of segment */
|
||||
activeSegment.end += 1;
|
||||
/* If the window is now too large, drop the first position */
|
||||
if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
|
||||
/* Get hash value of the dmer to be eliminated from active segment */
|
||||
const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d);
|
||||
ctx->segmentFreqs[delIndex] -= 1;
|
||||
/* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */
|
||||
if (ctx->segmentFreqs[delIndex] == 0) {
|
||||
activeSegment.score -= freqs[delIndex];
|
||||
}
|
||||
/* Increment start of segment */
|
||||
activeSegment.begin += 1;
|
||||
}
|
||||
/* If this segment is the best so far save it */
|
||||
if (activeSegment.score > bestSegment.score) {
|
||||
bestSegment = activeSegment;
|
||||
}
|
||||
}
|
||||
/* Zero out rest of segmentFreqs array */
|
||||
while (activeSegment.begin < end) {
|
||||
const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d);
|
||||
ctx->segmentFreqs[delIndex] -= 1;
|
||||
activeSegment.begin += 1;
|
||||
}
|
||||
}
|
||||
{
|
||||
/* Trim off the zero frequency head and tail from the segment. */
|
||||
U32 newBegin = bestSegment.end;
|
||||
U32 newEnd = bestSegment.begin;
|
||||
U32 pos;
|
||||
for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
|
||||
const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + pos, parameters.f, ctx->d);
|
||||
U32 freq = freqs[index];
|
||||
if (freq != 0) {
|
||||
newBegin = MIN(newBegin, pos);
|
||||
newEnd = pos + 1;
|
||||
}
|
||||
}
|
||||
bestSegment.begin = newBegin;
|
||||
bestSegment.end = newEnd;
|
||||
}
|
||||
{
|
||||
/* Zero the frequency of hash value of each dmer covered by the chosen segment. */
|
||||
U32 pos;
|
||||
for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
|
||||
const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, parameters.f, ctx->d);
|
||||
freqs[i] = 0;
|
||||
}
|
||||
}
|
||||
return bestSegment;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the validity of the parameters.
|
||||
* Returns non-zero if the parameters are valid and 0 otherwise.
|
||||
*/
|
||||
static int FASTCOVER_checkParameters(ZDICT_fastCover_params_t parameters,
|
||||
size_t maxDictSize) {
|
||||
/* k, d, and f are required parameters */
|
||||
if (parameters.d == 0 || parameters.k == 0 || parameters.f == 0) {
|
||||
return 0;
|
||||
}
|
||||
/* d has to be 6 or 8 */
|
||||
if (parameters.d != 6 && parameters.d != 8) {
|
||||
return 0;
|
||||
}
|
||||
/* 0 < f <= FASTCOVER_MAX_F */
|
||||
if (parameters.f > FASTCOVER_MAX_F) {
|
||||
return 0;
|
||||
}
|
||||
/* k <= maxDictSize */
|
||||
if (parameters.k > maxDictSize) {
|
||||
return 0;
|
||||
}
|
||||
/* d <= k */
|
||||
if (parameters.d > parameters.k) {
|
||||
return 0;
|
||||
}
|
||||
/* 0 < splitPoint <= 1 */
|
||||
if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Clean up a context initialized with `FASTCOVER_ctx_init()`.
|
||||
*/
|
||||
static void FASTCOVER_ctx_destroy(FASTCOVER_ctx_t *ctx) {
|
||||
if (!ctx) {
|
||||
return;
|
||||
}
|
||||
if (ctx->segmentFreqs) {
|
||||
free(ctx->segmentFreqs);
|
||||
ctx->segmentFreqs = NULL;
|
||||
}
|
||||
if (ctx->freqs) {
|
||||
free(ctx->freqs);
|
||||
ctx->freqs = NULL;
|
||||
}
|
||||
if (ctx->offsets) {
|
||||
free(ctx->offsets);
|
||||
ctx->offsets = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate for frequency of hash value of each dmer in ctx->samples
|
||||
*/
|
||||
static void FASTCOVER_computeFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t *ctx){
|
||||
size_t start; /* start of current dmer */
|
||||
for (unsigned i = 0; i < ctx->nbTrainSamples; i++) {
|
||||
size_t currSampleStart = ctx->offsets[i];
|
||||
size_t currSampleEnd = ctx->offsets[i+1];
|
||||
start = currSampleStart;
|
||||
while (start + ctx->d <= currSampleEnd) {
|
||||
const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, ctx->d);
|
||||
freqs[dmerIndex]++;
|
||||
start++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare a context for dictionary building.
|
||||
* The context is only dependent on the parameter `d` and can used multiple
|
||||
* times.
|
||||
* Returns 1 on success or zero on error.
|
||||
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
|
||||
*/
|
||||
static int FASTCOVER_ctx_init(FASTCOVER_ctx_t *ctx, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples,
|
||||
unsigned d, double splitPoint, unsigned f) {
|
||||
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
||||
const size_t totalSamplesSize = FASTCOVER_sum(samplesSizes, nbSamples);
|
||||
/* Split samples into testing and training sets */
|
||||
const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
|
||||
const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
|
||||
const size_t trainingSamplesSize = splitPoint < 1.0 ? FASTCOVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
|
||||
const size_t testSamplesSize = splitPoint < 1.0 ? FASTCOVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
|
||||
/* Checks */
|
||||
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
||||
totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
|
||||
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
||||
(U32)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
|
||||
return 0;
|
||||
}
|
||||
/* Check if there are at least 5 training samples */
|
||||
if (nbTrainSamples < 5) {
|
||||
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
|
||||
return 0;
|
||||
}
|
||||
/* Check if there's testing sample */
|
||||
if (nbTestSamples < 1) {
|
||||
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
|
||||
return 0;
|
||||
}
|
||||
/* Zero the context */
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
|
||||
(U32)trainingSamplesSize);
|
||||
DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
|
||||
(U32)testSamplesSize);
|
||||
|
||||
ctx->samples = samples;
|
||||
ctx->samplesSizes = samplesSizes;
|
||||
ctx->nbSamples = nbSamples;
|
||||
ctx->nbTrainSamples = nbTrainSamples;
|
||||
ctx->nbTestSamples = nbTestSamples;
|
||||
ctx->nbDmers = trainingSamplesSize - d + 1;
|
||||
ctx->d = d;
|
||||
|
||||
/* The offsets of each file */
|
||||
ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
|
||||
if (!ctx->offsets) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
|
||||
FASTCOVER_ctx_destroy(ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Fill offsets from the samplesSizes */
|
||||
{
|
||||
U32 i;
|
||||
ctx->offsets[0] = 0;
|
||||
for (i = 1; i <= nbSamples; ++i) {
|
||||
ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize frequency array of size 2^f */
|
||||
ctx->freqs = (U32 *)calloc((1 << f), sizeof(U32));
|
||||
ctx->segmentFreqs = (U16 *)calloc((1 << f), sizeof(U16));
|
||||
DISPLAYLEVEL(2, "Computing frequencies\n");
|
||||
FASTCOVER_computeFrequency(ctx->freqs, f, ctx);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given the prepared context build the dictionary.
|
||||
*/
|
||||
static size_t FASTCOVER_buildDictionary(const FASTCOVER_ctx_t *ctx, U32 *freqs,
|
||||
void *dictBuffer,
|
||||
size_t dictBufferCapacity,
|
||||
ZDICT_fastCover_params_t parameters){
|
||||
BYTE *const dict = (BYTE *)dictBuffer;
|
||||
size_t tail = dictBufferCapacity;
|
||||
/* Divide the data up into epochs of equal size.
|
||||
* We will select at least one segment from each epoch.
|
||||
*/
|
||||
const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k));
|
||||
const U32 epochSize = (U32)(ctx->nbDmers / epochs);
|
||||
size_t epoch;
|
||||
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
|
||||
epochSize);
|
||||
/* Loop through the epochs until there are no more segments or the dictionary
|
||||
* is full.
|
||||
*/
|
||||
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
|
||||
const U32 epochBegin = (U32)(epoch * epochSize);
|
||||
const U32 epochEnd = epochBegin + epochSize;
|
||||
size_t segmentSize;
|
||||
/* Select a segment */
|
||||
FASTCOVER_segment_t segment = FASTCOVER_selectSegment(
|
||||
ctx, freqs, epochBegin, epochEnd, parameters);
|
||||
|
||||
/* If the segment covers no dmers, then we are out of content */
|
||||
if (segment.score == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* Trim the segment if necessary and if it is too small then we are done */
|
||||
segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
|
||||
if (segmentSize < parameters.d) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* We fill the dictionary from the back to allow the best segments to be
|
||||
* referenced with the smallest offsets.
|
||||
*/
|
||||
tail -= segmentSize;
|
||||
memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
|
||||
DISPLAYUPDATE(
|
||||
2, "\r%u%% ",
|
||||
(U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
|
||||
}
|
||||
DISPLAYLEVEL(2, "\r%79s\r", "");
|
||||
return tail;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* FASTCOVER_best_t is used for two purposes:
|
||||
* 1. Synchronizing threads.
|
||||
* 2. Saving the best parameters and dictionary.
|
||||
*
|
||||
* All of the methods except FASTCOVER_best_init() are thread safe if zstd is
|
||||
* compiled with multithreaded support.
|
||||
*/
|
||||
typedef struct fast_best_s {
|
||||
ZSTD_pthread_mutex_t mutex;
|
||||
ZSTD_pthread_cond_t cond;
|
||||
size_t liveJobs;
|
||||
void *dict;
|
||||
size_t dictSize;
|
||||
ZDICT_fastCover_params_t parameters;
|
||||
size_t compressedSize;
|
||||
} FASTCOVER_best_t;
|
||||
|
||||
/**
|
||||
* Initialize the `FASTCOVER_best_t`.
|
||||
*/
|
||||
static void FASTCOVER_best_init(FASTCOVER_best_t *best) {
|
||||
if (best==NULL) return; /* compatible with init on NULL */
|
||||
(void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
|
||||
(void)ZSTD_pthread_cond_init(&best->cond, NULL);
|
||||
best->liveJobs = 0;
|
||||
best->dict = NULL;
|
||||
best->dictSize = 0;
|
||||
best->compressedSize = (size_t)-1;
|
||||
memset(&best->parameters, 0, sizeof(best->parameters));
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait until liveJobs == 0.
|
||||
*/
|
||||
static void FASTCOVER_best_wait(FASTCOVER_best_t *best) {
|
||||
if (!best) {
|
||||
return;
|
||||
}
|
||||
ZSTD_pthread_mutex_lock(&best->mutex);
|
||||
while (best->liveJobs != 0) {
|
||||
ZSTD_pthread_cond_wait(&best->cond, &best->mutex);
|
||||
}
|
||||
ZSTD_pthread_mutex_unlock(&best->mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Call FASTCOVER_best_wait() and then destroy the FASTCOVER_best_t.
|
||||
*/
|
||||
static void FASTCOVER_best_destroy(FASTCOVER_best_t *best) {
|
||||
if (!best) {
|
||||
return;
|
||||
}
|
||||
FASTCOVER_best_wait(best);
|
||||
if (best->dict) {
|
||||
free(best->dict);
|
||||
}
|
||||
ZSTD_pthread_mutex_destroy(&best->mutex);
|
||||
ZSTD_pthread_cond_destroy(&best->cond);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when a thread is about to be launched.
|
||||
* Increments liveJobs.
|
||||
*/
|
||||
static void FASTCOVER_best_start(FASTCOVER_best_t *best) {
|
||||
if (!best) {
|
||||
return;
|
||||
}
|
||||
ZSTD_pthread_mutex_lock(&best->mutex);
|
||||
++best->liveJobs;
|
||||
ZSTD_pthread_mutex_unlock(&best->mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when a thread finishes executing, both on error or success.
|
||||
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
||||
* If this dictionary is the best so far save it and its parameters.
|
||||
*/
|
||||
static void FASTCOVER_best_finish(FASTCOVER_best_t *best, size_t compressedSize,
|
||||
ZDICT_fastCover_params_t parameters, void *dict,
|
||||
size_t dictSize) {
|
||||
if (!best) {
|
||||
return;
|
||||
}
|
||||
{
|
||||
size_t liveJobs;
|
||||
ZSTD_pthread_mutex_lock(&best->mutex);
|
||||
--best->liveJobs;
|
||||
liveJobs = best->liveJobs;
|
||||
/* If the new dictionary is better */
|
||||
if (compressedSize < best->compressedSize) {
|
||||
/* Allocate space if necessary */
|
||||
if (!best->dict || best->dictSize < dictSize) {
|
||||
if (best->dict) {
|
||||
free(best->dict);
|
||||
}
|
||||
best->dict = malloc(dictSize);
|
||||
if (!best->dict) {
|
||||
best->compressedSize = ERROR(GENERIC);
|
||||
best->dictSize = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* Save the dictionary, parameters, and size */
|
||||
memcpy(best->dict, dict, dictSize);
|
||||
best->dictSize = dictSize;
|
||||
best->parameters = parameters;
|
||||
best->compressedSize = compressedSize;
|
||||
}
|
||||
ZSTD_pthread_mutex_unlock(&best->mutex);
|
||||
if (liveJobs == 0) {
|
||||
ZSTD_pthread_cond_broadcast(&best->cond);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for FASTCOVER_tryParameters().
|
||||
*/
|
||||
typedef struct FASTCOVER_tryParameters_data_s {
|
||||
const FASTCOVER_ctx_t *ctx;
|
||||
FASTCOVER_best_t *best;
|
||||
size_t dictBufferCapacity;
|
||||
ZDICT_fastCover_params_t parameters;
|
||||
} FASTCOVER_tryParameters_data_t;
|
||||
|
||||
/**
|
||||
* Tries a set of parameters and updates the FASTCOVER_best_t with the results.
|
||||
* This function is thread safe if zstd is compiled with multithreaded support.
|
||||
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
||||
*/
|
||||
static void FASTCOVER_tryParameters(void *opaque) {
|
||||
/* Save parameters as local variables */
|
||||
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
|
||||
const FASTCOVER_ctx_t *const ctx = data->ctx;
|
||||
const ZDICT_fastCover_params_t parameters = data->parameters;
|
||||
size_t dictBufferCapacity = data->dictBufferCapacity;
|
||||
size_t totalCompressedSize = ERROR(GENERIC);
|
||||
/* Allocate space for hash table, dict, and freqs */
|
||||
BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
|
||||
U32 *freqs = (U32*) malloc((1 << parameters.f) * sizeof(U32));
|
||||
if (!dict || !freqs) {
|
||||
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
||||
goto _cleanup;
|
||||
}
|
||||
/* Copy the frequencies because we need to modify them */
|
||||
memcpy(freqs, ctx->freqs, (1 << parameters.f) * sizeof(U32));
|
||||
/* Build the dictionary */
|
||||
{
|
||||
const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict,
|
||||
dictBufferCapacity, parameters);
|
||||
|
||||
dictBufferCapacity = ZDICT_finalizeDictionary(
|
||||
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
||||
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
|
||||
parameters.zParams);
|
||||
if (ZDICT_isError(dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
|
||||
goto _cleanup;
|
||||
}
|
||||
}
|
||||
/* Check total compressed size */
|
||||
{
|
||||
/* Pointers */
|
||||
ZSTD_CCtx *cctx;
|
||||
ZSTD_CDict *cdict;
|
||||
void *dst;
|
||||
/* Local variables */
|
||||
size_t dstCapacity;
|
||||
size_t i;
|
||||
/* Allocate dst with enough space to compress the maximum sized sample */
|
||||
{
|
||||
size_t maxSampleSize = 0;
|
||||
i = parameters.splitPoint < 1.0 ? ctx->nbTrainSamples : 0;
|
||||
for (; i < ctx->nbSamples; ++i) {
|
||||
maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
|
||||
}
|
||||
dstCapacity = ZSTD_compressBound(maxSampleSize);
|
||||
dst = malloc(dstCapacity);
|
||||
}
|
||||
/* Create the cctx and cdict */
|
||||
cctx = ZSTD_createCCtx();
|
||||
cdict = ZSTD_createCDict(dict, dictBufferCapacity,
|
||||
parameters.zParams.compressionLevel);
|
||||
if (!dst || !cctx || !cdict) {
|
||||
goto _compressCleanup;
|
||||
}
|
||||
/* Compress each sample and sum their sizes (or error) */
|
||||
totalCompressedSize = dictBufferCapacity;
|
||||
i = parameters.splitPoint < 1.0 ? ctx->nbTrainSamples : 0;
|
||||
for (; i < ctx->nbSamples; ++i) {
|
||||
const size_t size = ZSTD_compress_usingCDict(
|
||||
cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
|
||||
ctx->samplesSizes[i], cdict);
|
||||
if (ZSTD_isError(size)) {
|
||||
totalCompressedSize = ERROR(GENERIC);
|
||||
goto _compressCleanup;
|
||||
}
|
||||
totalCompressedSize += size;
|
||||
}
|
||||
_compressCleanup:
|
||||
ZSTD_freeCCtx(cctx);
|
||||
ZSTD_freeCDict(cdict);
|
||||
if (dst) {
|
||||
free(dst);
|
||||
}
|
||||
}
|
||||
|
||||
_cleanup:
|
||||
FASTCOVER_best_finish(data->best, totalCompressedSize, parameters, dict,
|
||||
dictBufferCapacity);
|
||||
free(data);
|
||||
if (dict) {
|
||||
free(dict);
|
||||
}
|
||||
if (freqs) {
|
||||
free(freqs);
|
||||
}
|
||||
}
|
||||
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(
|
||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters) {
|
||||
BYTE* const dict = (BYTE*)dictBuffer;
|
||||
FASTCOVER_ctx_t ctx;
|
||||
parameters.splitPoint = 1.0;
|
||||
/* Initialize global data */
|
||||
g_displayLevel = parameters.zParams.notificationLevel;
|
||||
/* Checks */
|
||||
if (!FASTCOVER_checkParameters(parameters, dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (nbSamples == 0) {
|
||||
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
||||
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
||||
ZDICT_DICTSIZE_MIN);
|
||||
return ERROR(dstSize_tooSmall);
|
||||
}
|
||||
/* Initialize context */
|
||||
if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
||||
parameters.d, parameters.splitPoint, parameters.f)) {
|
||||
DISPLAYLEVEL(1, "Failed to initialize context\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
/* Build the dictionary */
|
||||
DISPLAYLEVEL(2, "Building dictionary\n");
|
||||
{
|
||||
const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer,
|
||||
dictBufferCapacity, parameters);
|
||||
|
||||
const size_t dictionarySize = ZDICT_finalizeDictionary(
|
||||
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
||||
samplesBuffer, samplesSizes, (unsigned)ctx.nbTrainSamples,
|
||||
parameters.zParams);
|
||||
if (!ZSTD_isError(dictionarySize)) {
|
||||
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
||||
(U32)dictionarySize);
|
||||
}
|
||||
FASTCOVER_ctx_destroy(&ctx);
|
||||
return dictionarySize;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples,
|
||||
ZDICT_fastCover_params_t *parameters) {
|
||||
/* constants */
|
||||
const unsigned nbThreads = parameters->nbThreads;
|
||||
const double splitPoint =
|
||||
parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
|
||||
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
||||
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
||||
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
||||
const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
|
||||
const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
|
||||
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
|
||||
const unsigned kIterations =
|
||||
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
||||
const unsigned f = parameters->f == 0 ? 23 : parameters->f;
|
||||
|
||||
/* Local variables */
|
||||
const int displayLevel = parameters->zParams.notificationLevel;
|
||||
unsigned iteration = 1;
|
||||
unsigned d;
|
||||
unsigned k;
|
||||
FASTCOVER_best_t best;
|
||||
POOL_ctx *pool = NULL;
|
||||
|
||||
/* Checks */
|
||||
if (splitPoint <= 0 || splitPoint > 1) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (kMinK < kMaxD || kMaxK < kMinK) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (nbSamples == 0) {
|
||||
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
||||
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
||||
ZDICT_DICTSIZE_MIN);
|
||||
return ERROR(dstSize_tooSmall);
|
||||
}
|
||||
if (nbThreads > 1) {
|
||||
pool = POOL_create(nbThreads, 1);
|
||||
if (!pool) {
|
||||
return ERROR(memory_allocation);
|
||||
}
|
||||
}
|
||||
/* Initialization */
|
||||
FASTCOVER_best_init(&best);
|
||||
/* Turn down global display level to clean up display at level 2 and below */
|
||||
g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
|
||||
/* Loop through d first because each new value needs a new context */
|
||||
LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
|
||||
kIterations);
|
||||
for (d = kMinD; d <= kMaxD; d += 2) {
|
||||
/* Initialize the context for this value of d */
|
||||
FASTCOVER_ctx_t ctx;
|
||||
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
|
||||
if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f)) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
||||
FASTCOVER_best_destroy(&best);
|
||||
POOL_free(pool);
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
/* Loop through k reusing the same context */
|
||||
for (k = kMinK; k <= kMaxK; k += kStepSize) {
|
||||
/* Prepare the arguments */
|
||||
FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc(
|
||||
sizeof(FASTCOVER_tryParameters_data_t));
|
||||
LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
|
||||
if (!data) {
|
||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
|
||||
FASTCOVER_best_destroy(&best);
|
||||
FASTCOVER_ctx_destroy(&ctx);
|
||||
POOL_free(pool);
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
data->ctx = &ctx;
|
||||
data->best = &best;
|
||||
data->dictBufferCapacity = dictBufferCapacity;
|
||||
data->parameters = *parameters;
|
||||
data->parameters.k = k;
|
||||
data->parameters.d = d;
|
||||
data->parameters.f = f;
|
||||
data->parameters.splitPoint = splitPoint;
|
||||
data->parameters.steps = kSteps;
|
||||
data->parameters.zParams.notificationLevel = g_displayLevel;
|
||||
/* Check the parameters */
|
||||
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "fastCover parameters incorrect\n");
|
||||
free(data);
|
||||
continue;
|
||||
}
|
||||
/* Call the function and pass ownership of data to it */
|
||||
FASTCOVER_best_start(&best);
|
||||
if (pool) {
|
||||
POOL_add(pool, &FASTCOVER_tryParameters, data);
|
||||
} else {
|
||||
FASTCOVER_tryParameters(data);
|
||||
}
|
||||
/* Print status */
|
||||
LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
|
||||
(U32)((iteration * 100) / kIterations));
|
||||
++iteration;
|
||||
}
|
||||
FASTCOVER_best_wait(&best);
|
||||
FASTCOVER_ctx_destroy(&ctx);
|
||||
}
|
||||
LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
|
||||
/* Fill the output buffer and parameters with output of the best parameters */
|
||||
{
|
||||
const size_t dictSize = best.dictSize;
|
||||
if (ZSTD_isError(best.compressedSize)) {
|
||||
const size_t compressedSize = best.compressedSize;
|
||||
FASTCOVER_best_destroy(&best);
|
||||
POOL_free(pool);
|
||||
return compressedSize;
|
||||
}
|
||||
*parameters = best.parameters;
|
||||
memcpy(dictBuffer, best.dict, dictSize);
|
||||
FASTCOVER_best_destroy(&best);
|
||||
POOL_free(pool);
|
||||
return dictSize;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,57 +0,0 @@
|
|||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memset */
|
||||
#include <time.h> /* clock */
|
||||
#include "mem.h" /* read */
|
||||
#include "pool.h"
|
||||
#include "threading.h"
|
||||
#include "zstd_internal.h" /* includes zstd.h */
|
||||
#ifndef ZDICT_STATIC_LINKING_ONLY
|
||||
#define ZDICT_STATIC_LINKING_ONLY
|
||||
#endif
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
typedef struct {
|
||||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
||||
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
||||
unsigned f; /* log of size of frequency array */
|
||||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
|
||||
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
||||
double splitPoint; /* Percentage of samples used for training: the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
|
||||
ZDICT_params_t zParams;
|
||||
} ZDICT_fastCover_params_t;
|
||||
|
||||
|
||||
/*! ZDICT_optimizeTrainFromBuffer_fastCover():
|
||||
* Train a dictionary from an array of samples using a modified version of the COVER algorithm.
|
||||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* All of the parameters except for f are optional.
|
||||
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
|
||||
* if steps is zero it defaults to its default value.
|
||||
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
|
||||
*
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
* On success `*parameters` contains the parameters selected.
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(
|
||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples,
|
||||
ZDICT_fastCover_params_t *parameters);
|
||||
|
||||
|
||||
/*! ZDICT_trainFromBuffer_fastCover():
|
||||
* Train a dictionary from an array of samples using a modified version of the COVER algorithm.
|
||||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* d, k, and f are required.
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(
|
||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
||||
const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters);
|
|
@ -1,183 +0,0 @@
|
|||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
#include <errno.h> /* errno */
|
||||
#include <ctype.h>
|
||||
#include "fastCover.h"
|
||||
#include "io.h"
|
||||
#include "util.h"
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||
|
||||
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
|
||||
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
||||
|
||||
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
|
||||
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
|
||||
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
|
||||
if (displayLevel>=4) fflush(stderr); } } }
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Exceptions
|
||||
***************************************/
|
||||
#ifndef DEBUG
|
||||
# define DEBUG 0
|
||||
#endif
|
||||
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
|
||||
#define EXM_THROW(error, ...) \
|
||||
{ \
|
||||
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
|
||||
DISPLAY("Error %i : ", error); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
DISPLAY("\n"); \
|
||||
exit(error); \
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
static const unsigned g_defaultMaxDictSize = 110 KB;
|
||||
#define DEFAULT_CLEVEL 3
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* FASTCOVER
|
||||
***************************************/
|
||||
int FASTCOVER_trainFromFiles(const char* dictFileName, sampleInfo *info,
|
||||
unsigned maxDictSize,
|
||||
ZDICT_fastCover_params_t *params) {
|
||||
unsigned const displayLevel = params->zParams.notificationLevel;
|
||||
void* const dictBuffer = malloc(maxDictSize);
|
||||
|
||||
int result = 0;
|
||||
|
||||
/* Checks */
|
||||
if (!dictBuffer)
|
||||
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
|
||||
|
||||
{ size_t dictSize;
|
||||
/* Run the optimize version if either k or d is not provided */
|
||||
if (!params->d || !params->k) {
|
||||
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, params);
|
||||
} else {
|
||||
dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *params);
|
||||
}
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint*100));
|
||||
if (ZDICT_isError(dictSize)) {
|
||||
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
|
||||
result = 1;
|
||||
goto _done;
|
||||
}
|
||||
/* save dict */
|
||||
DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
|
||||
saveDict(dictFileName, dictBuffer, dictSize);
|
||||
}
|
||||
|
||||
/* clean up */
|
||||
_done:
|
||||
free(dictBuffer);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argCount, const char* argv[])
|
||||
{
|
||||
int displayLevel = 2;
|
||||
const char* programName = argv[0];
|
||||
int operationResult = 0;
|
||||
|
||||
/* Initialize arguments to default values */
|
||||
unsigned k = 0;
|
||||
unsigned d = 0;
|
||||
unsigned f = 23;
|
||||
unsigned steps = 32;
|
||||
unsigned nbThreads = 1;
|
||||
unsigned split = 100;
|
||||
const char* outputFile = "fastCoverDict";
|
||||
unsigned dictID = 0;
|
||||
unsigned maxDictSize = g_defaultMaxDictSize;
|
||||
|
||||
/* Initialize table to store input files */
|
||||
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
|
||||
unsigned filenameIdx = 0;
|
||||
|
||||
char* fileNamesBuf = NULL;
|
||||
unsigned fileNamesNb = filenameIdx;
|
||||
int followLinks = 0; /* follow directory recursively */
|
||||
const char** extendedFileList = NULL;
|
||||
|
||||
/* Parse arguments */
|
||||
for (int i = 1; i < argCount; i++) {
|
||||
const char* argument = argv[i];
|
||||
if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "d=")) { d = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "f=")) { f = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "steps=")) { steps = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "split=")) { split = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "in=")) {
|
||||
filenameTable[filenameIdx] = argument;
|
||||
filenameIdx++;
|
||||
continue;
|
||||
}
|
||||
if (longCommandWArg(&argument, "out=")) {
|
||||
outputFile = argument;
|
||||
continue;
|
||||
}
|
||||
DISPLAYLEVEL(1, "Incorrect parameters\n");
|
||||
operationResult = 1;
|
||||
return operationResult;
|
||||
}
|
||||
|
||||
/* Get the list of all files recursively (because followLinks==0)*/
|
||||
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
|
||||
&fileNamesNb, followLinks);
|
||||
if (extendedFileList) {
|
||||
unsigned u;
|
||||
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
|
||||
free((void*)filenameTable);
|
||||
filenameTable = extendedFileList;
|
||||
filenameIdx = fileNamesNb;
|
||||
}
|
||||
|
||||
size_t blockSize = 0;
|
||||
|
||||
/* Set up zParams */
|
||||
ZDICT_params_t zParams;
|
||||
zParams.compressionLevel = DEFAULT_CLEVEL;
|
||||
zParams.notificationLevel = displayLevel;
|
||||
zParams.dictID = dictID;
|
||||
|
||||
/* Set up fastCover params */
|
||||
ZDICT_fastCover_params_t params;
|
||||
params.zParams = zParams;
|
||||
params.k = k;
|
||||
params.d = d;
|
||||
params.f = f;
|
||||
params.steps = steps;
|
||||
params.nbThreads = nbThreads;
|
||||
params.splitPoint = (double)split/100;
|
||||
|
||||
/* Build dictionary */
|
||||
sampleInfo* info = getSampleInfo(filenameTable,
|
||||
filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
|
||||
operationResult = FASTCOVER_trainFromFiles(outputFile, info, maxDictSize, ¶ms);
|
||||
|
||||
/* Free allocated memory */
|
||||
UTIL_freeFileList(extendedFileList, fileNamesBuf);
|
||||
freeSampleInfo(info);
|
||||
|
||||
return operationResult;
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
echo "Building fastCover dictionary with in=../../lib/common f=20 out=dict1"
|
||||
./main in=../../../lib/common f=20 out=dict1
|
||||
zstd -be3 -D dict1 -r ../../../lib/common -q
|
||||
echo "Building fastCover dictionary with in=../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000"
|
||||
./main in=../../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000
|
||||
zstd -be3 -D dict2 -r ../../../lib/common -q
|
||||
echo "Building fastCover dictionary with 2 sample sources"
|
||||
./main in=../../../lib/common in=../../../lib/compress out=dict3
|
||||
zstd -be3 -D dict3 -r ../../../lib/common -q
|
||||
echo "Removing dict1 dict2 dict3"
|
||||
rm -f dict1 dict2 dict3
|
||||
|
||||
echo "Testing with invalid parameters, should fail"
|
||||
! ./main in=../../../lib/common r=10
|
||||
! ./main in=../../../lib/common d=10
|
|
@ -1,52 +0,0 @@
|
|||
ARG :=
|
||||
|
||||
CC ?= gcc
|
||||
CFLAGS ?= -O3
|
||||
INCLUDES := -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
|
||||
|
||||
TEST_INPUT := ../../../lib
|
||||
TEST_OUTPUT := randomDict
|
||||
|
||||
all: main run clean
|
||||
|
||||
.PHONY: test
|
||||
test: main testrun testshell clean
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
echo "Building a random dictionary with given arguments"
|
||||
./main $(ARG)
|
||||
|
||||
main: main.o io.o random.o libzstd.a
|
||||
$(CC) $(CFLAGS) main.o io.o random.o libzstd.a -o main
|
||||
|
||||
main.o: main.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c main.c
|
||||
|
||||
random.o: random.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c random.c
|
||||
|
||||
io.o: io.c
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c io.c
|
||||
|
||||
libzstd.a:
|
||||
$(MAKE) -C ../../../lib libzstd.a
|
||||
mv ../../../lib/libzstd.a .
|
||||
|
||||
.PHONY: testrun
|
||||
testrun: main
|
||||
echo "Run with $(TEST_INPUT) and $(TEST_OUTPUT) "
|
||||
./main in=$(TEST_INPUT) out=$(TEST_OUTPUT)
|
||||
zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q
|
||||
rm -f $(TEST_OUTPUT)
|
||||
|
||||
.PHONY: testshell
|
||||
testshell: test.sh
|
||||
sh test.sh
|
||||
echo "Finish running test.sh"
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -f *.o main libzstd.a
|
||||
$(MAKE) -C ../../../lib clean
|
||||
echo "Cleaning is completed"
|
|
@ -1,20 +0,0 @@
|
|||
Random Dictionary Builder
|
||||
|
||||
### Permitted Arguments:
|
||||
Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
|
||||
Output Dictionary (out=dictName): if not provided, default to defaultDict
|
||||
Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0
|
||||
Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB
|
||||
Size of Randomly Selected Segment (k=#): positive number; in bytes; if not provided, default to 200
|
||||
|
||||
###Running Test:
|
||||
make test
|
||||
|
||||
|
||||
###Usage:
|
||||
To build a random dictionary with the provided arguments: make ARG= followed by arguments
|
||||
|
||||
|
||||
### Examples:
|
||||
make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520"
|
||||
make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
|
|
@ -1,284 +0,0 @@
|
|||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
#include <errno.h> /* errno */
|
||||
#include <ctype.h>
|
||||
#include "io.h"
|
||||
#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */
|
||||
#include "platform.h" /* Large Files support */
|
||||
#include "util.h"
|
||||
#include "zdict.h"
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||
|
||||
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
|
||||
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
||||
|
||||
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
|
||||
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
|
||||
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
|
||||
if (displayLevel>=4) fflush(stderr); } } }
|
||||
|
||||
/*-*************************************
|
||||
* Exceptions
|
||||
***************************************/
|
||||
#ifndef DEBUG
|
||||
# define DEBUG 0
|
||||
#endif
|
||||
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
|
||||
#define EXM_THROW(error, ...) \
|
||||
{ \
|
||||
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
|
||||
DISPLAY("Error %i : ", error); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
DISPLAY("\n"); \
|
||||
exit(error); \
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
|
||||
#define SAMPLESIZE_MAX (128 KB)
|
||||
#define RANDOM_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
|
||||
#define RANDOM_MEMMULT 9
|
||||
static const size_t g_maxMemory = (sizeof(size_t) == 4) ?
|
||||
(2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
|
||||
|
||||
#define NOISELENGTH 32
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Commandline related functions
|
||||
***************************************/
|
||||
unsigned readU32FromChar(const char** stringPtr){
|
||||
const char errorMsg[] = "error: numeric value too large";
|
||||
unsigned result = 0;
|
||||
while ((**stringPtr >='0') && (**stringPtr <='9')) {
|
||||
unsigned const max = (((unsigned)(-1)) / 10) - 1;
|
||||
if (result > max) exit(1);
|
||||
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
|
||||
}
|
||||
if ((**stringPtr=='K') || (**stringPtr=='M')) {
|
||||
unsigned const maxK = ((unsigned)(-1)) >> 10;
|
||||
if (result > maxK) exit(1);
|
||||
result <<= 10;
|
||||
if (**stringPtr=='M') {
|
||||
if (result > maxK) exit(1);
|
||||
result <<= 10;
|
||||
}
|
||||
(*stringPtr)++; /* skip `K` or `M` */
|
||||
if (**stringPtr=='i') (*stringPtr)++;
|
||||
if (**stringPtr=='B') (*stringPtr)++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
unsigned longCommandWArg(const char** stringPtr, const char* longCommand){
|
||||
size_t const comSize = strlen(longCommand);
|
||||
int const result = !strncmp(*stringPtr, longCommand, comSize);
|
||||
if (result) *stringPtr += comSize;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* ********************************************************
|
||||
* File related operations
|
||||
**********************************************************/
|
||||
/** loadFiles() :
|
||||
* load samples from files listed in fileNamesTable into buffer.
|
||||
* works even if buffer is too small to load all samples.
|
||||
* Also provides the size of each sample into sampleSizes table
|
||||
* which must be sized correctly, using DiB_fileStats().
|
||||
* @return : nb of samples effectively loaded into `buffer`
|
||||
* *bufferSizePtr is modified, it provides the amount data loaded within buffer.
|
||||
* sampleSizes is filled with the size of each sample.
|
||||
*/
|
||||
static unsigned loadFiles(void* buffer, size_t* bufferSizePtr, size_t* sampleSizes,
|
||||
unsigned sstSize, const char** fileNamesTable, unsigned nbFiles,
|
||||
size_t targetChunkSize, unsigned displayLevel) {
|
||||
char* const buff = (char*)buffer;
|
||||
size_t pos = 0;
|
||||
unsigned nbLoadedChunks = 0, fileIndex;
|
||||
|
||||
for (fileIndex=0; fileIndex<nbFiles; fileIndex++) {
|
||||
const char* const fileName = fileNamesTable[fileIndex];
|
||||
unsigned long long const fs64 = UTIL_getFileSize(fileName);
|
||||
unsigned long long remainingToLoad = (fs64 == UTIL_FILESIZE_UNKNOWN) ? 0 : fs64;
|
||||
U32 const nbChunks = targetChunkSize ? (U32)((fs64 + (targetChunkSize-1)) / targetChunkSize) : 1;
|
||||
U64 const chunkSize = targetChunkSize ? MIN(targetChunkSize, fs64) : fs64;
|
||||
size_t const maxChunkSize = (size_t)MIN(chunkSize, SAMPLESIZE_MAX);
|
||||
U32 cnb;
|
||||
FILE* const f = fopen(fileName, "rb");
|
||||
if (f==NULL) EXM_THROW(10, "zstd: dictBuilder: %s %s ", fileName, strerror(errno));
|
||||
DISPLAYUPDATE(2, "Loading %s... \r", fileName);
|
||||
for (cnb=0; cnb<nbChunks; cnb++) {
|
||||
size_t const toLoad = (size_t)MIN(maxChunkSize, remainingToLoad);
|
||||
if (toLoad > *bufferSizePtr-pos) break;
|
||||
{ size_t const readSize = fread(buff+pos, 1, toLoad, f);
|
||||
if (readSize != toLoad) EXM_THROW(11, "Pb reading %s", fileName);
|
||||
pos += readSize;
|
||||
sampleSizes[nbLoadedChunks++] = toLoad;
|
||||
remainingToLoad -= targetChunkSize;
|
||||
if (nbLoadedChunks == sstSize) { /* no more space left in sampleSizes table */
|
||||
fileIndex = nbFiles; /* stop there */
|
||||
break;
|
||||
}
|
||||
if (toLoad < targetChunkSize) {
|
||||
fseek(f, (long)(targetChunkSize - toLoad), SEEK_CUR);
|
||||
} } }
|
||||
fclose(f);
|
||||
}
|
||||
DISPLAYLEVEL(2, "\r%79s\r", "");
|
||||
*bufferSizePtr = pos;
|
||||
DISPLAYLEVEL(4, "loaded : %u KB \n", (U32)(pos >> 10))
|
||||
return nbLoadedChunks;
|
||||
}
|
||||
|
||||
#define rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
||||
static U32 getRand(U32* src)
|
||||
{
|
||||
static const U32 prime1 = 2654435761U;
|
||||
static const U32 prime2 = 2246822519U;
|
||||
U32 rand32 = *src;
|
||||
rand32 *= prime1;
|
||||
rand32 ^= prime2;
|
||||
rand32 = rotl32(rand32, 13);
|
||||
*src = rand32;
|
||||
return rand32 >> 5;
|
||||
}
|
||||
|
||||
/* shuffle() :
|
||||
* shuffle a table of file names in a semi-random way
|
||||
* It improves dictionary quality by reducing "locality" impact, so if sample set is very large,
|
||||
* it will load random elements from it, instead of just the first ones. */
|
||||
static void shuffle(const char** fileNamesTable, unsigned nbFiles) {
|
||||
U32 seed = 0xFD2FB528;
|
||||
unsigned i;
|
||||
for (i = nbFiles - 1; i > 0; --i) {
|
||||
unsigned const j = getRand(&seed) % (i + 1);
|
||||
const char* const tmp = fileNamesTable[j];
|
||||
fileNamesTable[j] = fileNamesTable[i];
|
||||
fileNamesTable[i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*-********************************************************
|
||||
* Dictionary training functions
|
||||
**********************************************************/
|
||||
size_t findMaxMem(unsigned long long requiredMem) {
|
||||
size_t const step = 8 MB;
|
||||
void* testmem = NULL;
|
||||
|
||||
requiredMem = (((requiredMem >> 23) + 1) << 23);
|
||||
requiredMem += step;
|
||||
if (requiredMem > g_maxMemory) requiredMem = g_maxMemory;
|
||||
|
||||
while (!testmem) {
|
||||
testmem = malloc((size_t)requiredMem);
|
||||
requiredMem -= step;
|
||||
}
|
||||
|
||||
free(testmem);
|
||||
return (size_t)requiredMem;
|
||||
}
|
||||
|
||||
void saveDict(const char* dictFileName,
|
||||
const void* buff, size_t buffSize) {
|
||||
FILE* const f = fopen(dictFileName, "wb");
|
||||
if (f==NULL) EXM_THROW(3, "cannot open %s ", dictFileName);
|
||||
|
||||
{ size_t const n = fwrite(buff, 1, buffSize, f);
|
||||
if (n!=buffSize) EXM_THROW(4, "%s : write error", dictFileName) }
|
||||
|
||||
{ size_t const n = (size_t)fclose(f);
|
||||
if (n!=0) EXM_THROW(5, "%s : flush error", dictFileName) }
|
||||
}
|
||||
|
||||
/*! getFileStats() :
|
||||
* Given a list of files, and a chunkSize (0 == no chunk, whole files)
|
||||
* provides the amount of data to be loaded and the resulting nb of samples.
|
||||
* This is useful primarily for allocation purpose => sample buffer, and sample sizes table.
|
||||
*/
|
||||
static fileStats getFileStats(const char** fileNamesTable, unsigned nbFiles,
|
||||
size_t chunkSize, unsigned displayLevel) {
|
||||
fileStats fs;
|
||||
unsigned n;
|
||||
memset(&fs, 0, sizeof(fs));
|
||||
for (n=0; n<nbFiles; n++) {
|
||||
U64 const fileSize = UTIL_getFileSize(fileNamesTable[n]);
|
||||
U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? 0 : fileSize;
|
||||
U32 const nbSamples = (U32)(chunkSize ? (srcSize + (chunkSize-1)) / chunkSize : 1);
|
||||
U64 const chunkToLoad = chunkSize ? MIN(chunkSize, srcSize) : srcSize;
|
||||
size_t const cappedChunkSize = (size_t)MIN(chunkToLoad, SAMPLESIZE_MAX);
|
||||
fs.totalSizeToLoad += cappedChunkSize * nbSamples;
|
||||
fs.oneSampleTooLarge |= (chunkSize > 2*SAMPLESIZE_MAX);
|
||||
fs.nbSamples += nbSamples;
|
||||
}
|
||||
DISPLAYLEVEL(4, "Preparing to load : %u KB \n", (U32)(fs.totalSizeToLoad >> 10));
|
||||
return fs;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
sampleInfo* getSampleInfo(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
|
||||
unsigned maxDictSize, const unsigned displayLevel) {
|
||||
fileStats const fs = getFileStats(fileNamesTable, nbFiles, chunkSize, displayLevel);
|
||||
size_t* const sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t));
|
||||
size_t const memMult = RANDOM_MEMMULT;
|
||||
size_t const maxMem = findMaxMem(fs.totalSizeToLoad * memMult) / memMult;
|
||||
size_t loadedSize = (size_t) MIN ((unsigned long long)maxMem, fs.totalSizeToLoad);
|
||||
void* const srcBuffer = malloc(loadedSize+NOISELENGTH);
|
||||
|
||||
/* Checks */
|
||||
if ((!sampleSizes) || (!srcBuffer))
|
||||
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
|
||||
if (fs.oneSampleTooLarge) {
|
||||
DISPLAYLEVEL(2, "! Warning : some sample(s) are very large \n");
|
||||
DISPLAYLEVEL(2, "! Note that dictionary is only useful for small samples. \n");
|
||||
DISPLAYLEVEL(2, "! As a consequence, only the first %u bytes of each sample are loaded \n", SAMPLESIZE_MAX);
|
||||
}
|
||||
if (fs.nbSamples < 5) {
|
||||
DISPLAYLEVEL(2, "! Warning : nb of samples too low for proper processing ! \n");
|
||||
DISPLAYLEVEL(2, "! Please provide _one file per sample_. \n");
|
||||
DISPLAYLEVEL(2, "! Alternatively, split files into fixed-size blocks representative of samples, with -B# \n");
|
||||
EXM_THROW(14, "nb of samples too low"); /* we now clearly forbid this case */
|
||||
}
|
||||
if (fs.totalSizeToLoad < (unsigned long long)(8 * maxDictSize)) {
|
||||
DISPLAYLEVEL(2, "! Warning : data size of samples too small for target dictionary size \n");
|
||||
DISPLAYLEVEL(2, "! Samples should be about 100x larger than target dictionary size \n");
|
||||
}
|
||||
|
||||
/* init */
|
||||
if (loadedSize < fs.totalSizeToLoad)
|
||||
DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(loadedSize >> 20));
|
||||
|
||||
/* Load input buffer */
|
||||
DISPLAYLEVEL(3, "Shuffling input files\n");
|
||||
shuffle(fileNamesTable, nbFiles);
|
||||
nbFiles = loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples,
|
||||
fileNamesTable, nbFiles, chunkSize, displayLevel);
|
||||
|
||||
sampleInfo *info = (sampleInfo *)malloc(sizeof(sampleInfo));
|
||||
|
||||
info->nbSamples = fs.nbSamples;
|
||||
info->samplesSizes = sampleSizes;
|
||||
info->srcBuffer = srcBuffer;
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
|
||||
void freeSampleInfo(sampleInfo *info) {
|
||||
if (!info) return;
|
||||
if (info->samplesSizes) free((void*)(info->samplesSizes));
|
||||
if (info->srcBuffer) free((void*)(info->srcBuffer));
|
||||
free(info);
|
||||
}
|
|
@ -1,60 +0,0 @@
|
|||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
#include <errno.h> /* errno */
|
||||
#include <ctype.h>
|
||||
#include "zstd_internal.h" /* includes zstd.h */
|
||||
#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */
|
||||
#include "platform.h" /* Large Files support */
|
||||
#include "util.h"
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Structs
|
||||
***************************************/
|
||||
typedef struct {
|
||||
U64 totalSizeToLoad;
|
||||
unsigned oneSampleTooLarge;
|
||||
unsigned nbSamples;
|
||||
} fileStats;
|
||||
|
||||
typedef struct {
|
||||
const void* srcBuffer;
|
||||
const size_t *samplesSizes;
|
||||
size_t nbSamples;
|
||||
}sampleInfo;
|
||||
|
||||
|
||||
|
||||
/*! getSampleInfo():
|
||||
* Load from input files and add samples to buffer
|
||||
* @return: a sampleInfo struct containing infomation about buffer where samples are stored,
|
||||
* size of each sample, and total number of samples
|
||||
*/
|
||||
sampleInfo* getSampleInfo(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
|
||||
unsigned maxDictSize, const unsigned displayLevel);
|
||||
|
||||
|
||||
|
||||
/*! freeSampleInfo():
|
||||
* Free memory allocated for info
|
||||
*/
|
||||
void freeSampleInfo(sampleInfo *info);
|
||||
|
||||
|
||||
|
||||
/*! saveDict():
|
||||
* Save data stored on buff to dictFileName
|
||||
*/
|
||||
void saveDict(const char* dictFileName, const void* buff, size_t buffSize);
|
||||
|
||||
|
||||
unsigned readU32FromChar(const char** stringPtr);
|
||||
|
||||
/** longCommandWArg() :
|
||||
* check if *stringPtr is the same as longCommand.
|
||||
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
|
||||
* @return 0 and doesn't modify *stringPtr otherwise.
|
||||
*/
|
||||
unsigned longCommandWArg(const char** stringPtr, const char* longCommand);
|
|
@ -1,161 +0,0 @@
|
|||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
#include <errno.h> /* errno */
|
||||
#include <ctype.h>
|
||||
#include "random.h"
|
||||
#include "io.h"
|
||||
#include "util.h"
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||
|
||||
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
|
||||
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
||||
|
||||
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
|
||||
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
|
||||
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
|
||||
if (displayLevel>=4) fflush(stderr); } } }
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Exceptions
|
||||
***************************************/
|
||||
#ifndef DEBUG
|
||||
# define DEBUG 0
|
||||
#endif
|
||||
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
|
||||
#define EXM_THROW(error, ...) \
|
||||
{ \
|
||||
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
|
||||
DISPLAY("Error %i : ", error); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
DISPLAY("\n"); \
|
||||
exit(error); \
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
static const unsigned g_defaultMaxDictSize = 110 KB;
|
||||
#define DEFAULT_CLEVEL 3
|
||||
#define DEFAULT_k 200
|
||||
#define DEFAULT_OUTPUTFILE "defaultDict"
|
||||
#define DEFAULT_DICTID 0
|
||||
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* RANDOM
|
||||
***************************************/
|
||||
int RANDOM_trainFromFiles(const char* dictFileName, sampleInfo *info,
|
||||
unsigned maxDictSize,
|
||||
ZDICT_random_params_t *params) {
|
||||
unsigned const displayLevel = params->zParams.notificationLevel;
|
||||
void* const dictBuffer = malloc(maxDictSize);
|
||||
|
||||
int result = 0;
|
||||
|
||||
/* Checks */
|
||||
if (!dictBuffer)
|
||||
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
|
||||
|
||||
{ size_t dictSize;
|
||||
dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer,
|
||||
info->samplesSizes, info->nbSamples, *params);
|
||||
DISPLAYLEVEL(2, "k=%u\n", params->k);
|
||||
if (ZDICT_isError(dictSize)) {
|
||||
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
|
||||
result = 1;
|
||||
goto _done;
|
||||
}
|
||||
/* save dict */
|
||||
DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
|
||||
saveDict(dictFileName, dictBuffer, dictSize);
|
||||
}
|
||||
|
||||
/* clean up */
|
||||
_done:
|
||||
free(dictBuffer);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argCount, const char* argv[])
|
||||
{
|
||||
int displayLevel = 2;
|
||||
const char* programName = argv[0];
|
||||
int operationResult = 0;
|
||||
|
||||
/* Initialize arguments to default values */
|
||||
unsigned k = DEFAULT_k;
|
||||
const char* outputFile = DEFAULT_OUTPUTFILE;
|
||||
unsigned dictID = DEFAULT_DICTID;
|
||||
unsigned maxDictSize = g_defaultMaxDictSize;
|
||||
|
||||
/* Initialize table to store input files */
|
||||
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
|
||||
unsigned filenameIdx = 0;
|
||||
|
||||
/* Parse arguments */
|
||||
for (int i = 1; i < argCount; i++) {
|
||||
const char* argument = argv[i];
|
||||
if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "in=")) {
|
||||
filenameTable[filenameIdx] = argument;
|
||||
filenameIdx++;
|
||||
continue;
|
||||
}
|
||||
if (longCommandWArg(&argument, "out=")) {
|
||||
outputFile = argument;
|
||||
continue;
|
||||
}
|
||||
DISPLAYLEVEL(1, "Incorrect parameters\n");
|
||||
operationResult = 1;
|
||||
return operationResult;
|
||||
}
|
||||
|
||||
char* fileNamesBuf = NULL;
|
||||
unsigned fileNamesNb = filenameIdx;
|
||||
int followLinks = 0; /* follow directory recursively */
|
||||
const char** extendedFileList = NULL;
|
||||
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
|
||||
&fileNamesNb, followLinks);
|
||||
if (extendedFileList) {
|
||||
unsigned u;
|
||||
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
|
||||
free((void*)filenameTable);
|
||||
filenameTable = extendedFileList;
|
||||
filenameIdx = fileNamesNb;
|
||||
}
|
||||
|
||||
size_t blockSize = 0;
|
||||
|
||||
ZDICT_random_params_t params;
|
||||
ZDICT_params_t zParams;
|
||||
zParams.compressionLevel = DEFAULT_CLEVEL;
|
||||
zParams.notificationLevel = displayLevel;
|
||||
zParams.dictID = dictID;
|
||||
params.zParams = zParams;
|
||||
params.k = k;
|
||||
|
||||
sampleInfo* info = getSampleInfo(filenameTable,
|
||||
filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
|
||||
operationResult = RANDOM_trainFromFiles(outputFile, info, maxDictSize, ¶ms);
|
||||
|
||||
/* Free allocated memory */
|
||||
UTIL_freeFileList(extendedFileList, fileNamesBuf);
|
||||
freeSampleInfo(info);
|
||||
|
||||
return operationResult;
|
||||
}
|
|
@ -1,163 +0,0 @@
|
|||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memset */
|
||||
#include <time.h> /* clock */
|
||||
#include "random.h"
|
||||
#include "util.h" /* UTIL_getFileSize, UTIL_getTotalFileSize */
|
||||
#ifndef ZDICT_STATIC_LINKING_ONLY
|
||||
#define ZDICT_STATIC_LINKING_ONLY
|
||||
#endif
|
||||
#include "zdict.h"
|
||||
|
||||
/*-*************************************
|
||||
* Console display
|
||||
***************************************/
|
||||
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||
|
||||
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
||||
if (displayLevel >= l) { \
|
||||
if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
|
||||
g_time = clock(); \
|
||||
DISPLAY(__VA_ARGS__); \
|
||||
} \
|
||||
}
|
||||
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(displayLevel, l, __VA_ARGS__)
|
||||
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
||||
static clock_t g_time = 0;
|
||||
|
||||
|
||||
|
||||
/* ********************************************************
|
||||
* Random Dictionary Builder
|
||||
**********************************************************/
|
||||
/**
|
||||
* Returns the sum of the sample sizes.
|
||||
*/
|
||||
static size_t RANDOM_sum(const size_t *samplesSizes, unsigned nbSamples) {
|
||||
size_t sum = 0;
|
||||
unsigned i;
|
||||
for (i = 0; i < nbSamples; ++i) {
|
||||
sum += samplesSizes[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A segment is an inclusive range in the source.
|
||||
*/
|
||||
typedef struct {
|
||||
U32 begin;
|
||||
U32 end;
|
||||
} RANDOM_segment_t;
|
||||
|
||||
|
||||
/**
|
||||
* Selects a random segment from totalSamplesSize - k + 1 possible segments
|
||||
*/
|
||||
static RANDOM_segment_t RANDOM_selectSegment(const size_t totalSamplesSize,
|
||||
ZDICT_random_params_t parameters) {
|
||||
const U32 k = parameters.k;
|
||||
RANDOM_segment_t segment;
|
||||
unsigned index;
|
||||
|
||||
/* Randomly generate a number from 0 to sampleSizes - k */
|
||||
index = rand()%(totalSamplesSize - k + 1);
|
||||
|
||||
/* inclusive */
|
||||
segment.begin = index;
|
||||
segment.end = index + k - 1;
|
||||
|
||||
return segment;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check the validity of the parameters.
|
||||
* Returns non-zero if the parameters are valid and 0 otherwise.
|
||||
*/
|
||||
static int RANDOM_checkParameters(ZDICT_random_params_t parameters,
|
||||
size_t maxDictSize) {
|
||||
/* k is a required parameter */
|
||||
if (parameters.k == 0) {
|
||||
return 0;
|
||||
}
|
||||
/* k <= maxDictSize */
|
||||
if (parameters.k > maxDictSize) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given the prepared context build the dictionary.
|
||||
*/
|
||||
static size_t RANDOM_buildDictionary(const size_t totalSamplesSize, const BYTE *samples,
|
||||
void *dictBuffer, size_t dictBufferCapacity,
|
||||
ZDICT_random_params_t parameters) {
|
||||
BYTE *const dict = (BYTE *)dictBuffer;
|
||||
size_t tail = dictBufferCapacity;
|
||||
const int displayLevel = parameters.zParams.notificationLevel;
|
||||
while (tail > 0) {
|
||||
|
||||
/* Select a segment */
|
||||
RANDOM_segment_t segment = RANDOM_selectSegment(totalSamplesSize, parameters);
|
||||
|
||||
size_t segmentSize;
|
||||
segmentSize = MIN(segment.end - segment.begin + 1, tail);
|
||||
|
||||
tail -= segmentSize;
|
||||
memcpy(dict + tail, samples + segment.begin, segmentSize);
|
||||
DISPLAYUPDATE(
|
||||
2, "\r%u%% ",
|
||||
(U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
|
||||
}
|
||||
|
||||
return tail;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_random(
|
||||
void *dictBuffer, size_t dictBufferCapacity,
|
||||
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
||||
ZDICT_random_params_t parameters) {
|
||||
const int displayLevel = parameters.zParams.notificationLevel;
|
||||
BYTE* const dict = (BYTE*)dictBuffer;
|
||||
/* Checks */
|
||||
if (!RANDOM_checkParameters(parameters, dictBufferCapacity)) {
|
||||
DISPLAYLEVEL(1, "k is incorrect\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (nbSamples == 0) {
|
||||
DISPLAYLEVEL(1, "Random must have at least one input file\n");
|
||||
return ERROR(GENERIC);
|
||||
}
|
||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
||||
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
||||
ZDICT_DICTSIZE_MIN);
|
||||
return ERROR(dstSize_tooSmall);
|
||||
}
|
||||
const size_t totalSamplesSize = RANDOM_sum(samplesSizes, nbSamples);
|
||||
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
||||
|
||||
DISPLAYLEVEL(2, "Building dictionary\n");
|
||||
{
|
||||
const size_t tail = RANDOM_buildDictionary(totalSamplesSize, samples,
|
||||
dictBuffer, dictBufferCapacity, parameters);
|
||||
const size_t dictSize = ZDICT_finalizeDictionary(
|
||||
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
||||
samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
|
||||
if (!ZSTD_isError(dictSize)) {
|
||||
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
||||
(U32)dictSize);
|
||||
}
|
||||
return dictSize;
|
||||
}
|
||||
}
|
|
@ -1,29 +0,0 @@
|
|||
#include <stdio.h> /* fprintf */
|
||||
#include <stdlib.h> /* malloc, free, qsort */
|
||||
#include <string.h> /* memset */
|
||||
#include <time.h> /* clock */
|
||||
#include "zstd_internal.h" /* includes zstd.h */
|
||||
#ifndef ZDICT_STATIC_LINKING_ONLY
|
||||
#define ZDICT_STATIC_LINKING_ONLY
|
||||
#endif
|
||||
#include "zdict.h"
|
||||
|
||||
|
||||
|
||||
typedef struct {
|
||||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+]; Default to 200 */
|
||||
ZDICT_params_t zParams;
|
||||
} ZDICT_random_params_t;
|
||||
|
||||
|
||||
/*! ZDICT_trainFromBuffer_random():
|
||||
* Train a dictionary from an array of samples.
|
||||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
* or an error code, which can be tested with ZDICT_isError().
|
||||
*/
|
||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_random( void *dictBuffer, size_t dictBufferCapacity,
|
||||
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
||||
ZDICT_random_params_t parameters);
|
|
@ -1,14 +0,0 @@
|
|||
echo "Building random dictionary with in=../../lib/common k=200 out=dict1"
|
||||
./main in=../../../lib/common k=200 out=dict1
|
||||
zstd -be3 -D dict1 -r ../../../lib/common -q
|
||||
echo "Building random dictionary with in=../../lib/common k=500 out=dict2 dictID=100 maxdict=140000"
|
||||
./main in=../../../lib/common k=500 out=dict2 dictID=100 maxdict=140000
|
||||
zstd -be3 -D dict2 -r ../../../lib/common -q
|
||||
echo "Building random dictionary with 2 sample sources"
|
||||
./main in=../../../lib/common in=../../../lib/compress out=dict3
|
||||
zstd -be3 -D dict3 -r ../../../lib/common -q
|
||||
echo "Removing dict1 dict2 dict3"
|
||||
rm -f dict1 dict2 dict3
|
||||
|
||||
echo "Testing with invalid parameters, should fail"
|
||||
! ./main r=10
|
|
@ -429,14 +429,14 @@ void shuffleDictionaries(ddict_collection_t dicts)
|
|||
{
|
||||
size_t const nbDicts = dicts.nbDDict;
|
||||
for (size_t r=0; r<nbDicts; r++) {
|
||||
size_t const d = rand() % nbDicts;
|
||||
size_t const d = (size_t)rand() % nbDicts;
|
||||
ZSTD_DDict* tmpd = dicts.ddicts[d];
|
||||
dicts.ddicts[d] = dicts.ddicts[r];
|
||||
dicts.ddicts[r] = tmpd;
|
||||
}
|
||||
for (size_t r=0; r<nbDicts; r++) {
|
||||
size_t const d1 = rand() % nbDicts;
|
||||
size_t const d2 = rand() % nbDicts;
|
||||
size_t const d1 = (size_t)rand() % nbDicts;
|
||||
size_t const d2 = (size_t)rand() % nbDicts;
|
||||
ZSTD_DDict* tmpd = dicts.ddicts[d1];
|
||||
dicts.ddicts[d1] = dicts.ddicts[d2];
|
||||
dicts.ddicts[d2] = tmpd;
|
||||
|
@ -528,7 +528,7 @@ size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity
|
|||
static int benchMem(slice_collection_t dstBlocks,
|
||||
slice_collection_t srcBlocks,
|
||||
ddict_collection_t dictionaries,
|
||||
int nbRounds)
|
||||
unsigned nbRounds)
|
||||
{
|
||||
assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
|
||||
|
||||
|
@ -586,7 +586,7 @@ int bench(const char** fileNameTable, unsigned nbFiles,
|
|||
const char* dictionary,
|
||||
size_t blockSize, int clevel,
|
||||
unsigned nbDictMax, unsigned nbBlocks,
|
||||
int nbRounds)
|
||||
unsigned nbRounds)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
|
@ -707,7 +707,7 @@ static unsigned readU32FromChar(const char** stringPtr)
|
|||
while ((**stringPtr >='0') && (**stringPtr <='9')) {
|
||||
unsigned const max = (((unsigned)(-1)) / 10) - 1;
|
||||
assert(result <= max); /* check overflow */
|
||||
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
|
||||
result *= 10, result += (unsigned)**stringPtr - '0', (*stringPtr)++ ;
|
||||
}
|
||||
if ((**stringPtr=='K') || (**stringPtr=='M')) {
|
||||
unsigned const maxK = ((unsigned)(-1)) >> 10;
|
||||
|
@ -729,7 +729,7 @@ static unsigned readU32FromChar(const char** stringPtr)
|
|||
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
|
||||
* @return 0 and doesn't modify *stringPtr otherwise.
|
||||
*/
|
||||
static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
|
||||
static int longCommandWArg(const char** stringPtr, const char* longCommand)
|
||||
{
|
||||
size_t const comSize = strlen(longCommand);
|
||||
int const result = !strncmp(*stringPtr, longCommand, comSize);
|
||||
|
@ -765,12 +765,12 @@ int bad_usage(const char* exeName)
|
|||
int main (int argc, const char** argv)
|
||||
{
|
||||
int recursiveMode = 0;
|
||||
int nbRounds = BENCH_TIME_DEFAULT_S;
|
||||
unsigned nbRounds = BENCH_TIME_DEFAULT_S;
|
||||
const char* const exeName = argv[0];
|
||||
|
||||
if (argc < 2) return bad_usage(exeName);
|
||||
|
||||
const char** nameTable = (const char**)malloc(argc * sizeof(const char*));
|
||||
const char** nameTable = (const char**)malloc((size_t)argc * sizeof(const char*));
|
||||
assert(nameTable != NULL);
|
||||
unsigned nameIdx = 0;
|
||||
|
||||
|
@ -791,26 +791,27 @@ int main (int argc, const char** argv)
|
|||
if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--clevel=")) { cLevel = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "-")) { cLevel = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--clevel=")) { cLevel = (int)readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; }
|
||||
/* anything that's not a command is a filename */
|
||||
nameTable[nameIdx++] = argument;
|
||||
}
|
||||
|
||||
const char** filenameTable = nameTable;
|
||||
unsigned nbFiles = nameIdx;
|
||||
char* buffer_containing_filenames = NULL;
|
||||
FileNamesTable* filenameTable;
|
||||
|
||||
if (recursiveMode) {
|
||||
#ifndef UTIL_HAS_CREATEFILELIST
|
||||
assert(0); /* missing capability, do not run */
|
||||
#endif
|
||||
filenameTable = UTIL_createFileList(nameTable, nameIdx, &buffer_containing_filenames, &nbFiles, 1 /* follow_links */);
|
||||
filenameTable = UTIL_createExpandedFNT(nameTable, nameIdx, 1 /* follow_links */);
|
||||
} else {
|
||||
filenameTable = UTIL_assembleFileNamesTable(nameTable, nameIdx, NULL);
|
||||
nameTable = NULL; /* UTIL_createFileNamesTable() takes ownership of nameTable */
|
||||
}
|
||||
|
||||
int result = bench(filenameTable, nbFiles, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds);
|
||||
int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds);
|
||||
|
||||
free(buffer_containing_filenames);
|
||||
UTIL_freeFileNamesTable(filenameTable);
|
||||
free(nameTable);
|
||||
|
||||
return result;
|
||||
|
|
|
@ -337,23 +337,19 @@ Options::Status Options::parse(int argc, const char **argv) {
|
|||
|
||||
// Translate input files/directories into files to (de)compress
|
||||
if (recursive) {
|
||||
char *scratchBuffer = nullptr;
|
||||
unsigned numFiles = 0;
|
||||
const char **files =
|
||||
UTIL_createFileList(localInputFiles.data(), localInputFiles.size(),
|
||||
&scratchBuffer, &numFiles, followLinks);
|
||||
FileNamesTable* const files = UTIL_createExpandedFNT(localInputFiles.data(), localInputFiles.size(), followLinks);
|
||||
if (files == nullptr) {
|
||||
std::fprintf(stderr, "Error traversing directories\n");
|
||||
return Status::Failure;
|
||||
}
|
||||
auto guard =
|
||||
makeScopeGuard([&] { UTIL_freeFileList(files, scratchBuffer); });
|
||||
if (numFiles == 0) {
|
||||
makeScopeGuard([&] { UTIL_freeFileNamesTable(files); });
|
||||
if (files->tableSize == 0) {
|
||||
std::fprintf(stderr, "No files found\n");
|
||||
return Status::Failure;
|
||||
}
|
||||
inputFiles.resize(numFiles);
|
||||
std::copy(files, files + numFiles, inputFiles.begin());
|
||||
inputFiles.resize(files->tableSize);
|
||||
std::copy(files->fileNames, files->fileNames + files->tableSize, inputFiles.begin());
|
||||
} else {
|
||||
inputFiles.resize(localInputFiles.size());
|
||||
std::copy(localInputFiles.begin(), localInputFiles.end(),
|
||||
|
|
392
programs/util.c
392
programs/util.c
|
@ -17,22 +17,83 @@ extern "C" {
|
|||
* Dependencies
|
||||
******************************************/
|
||||
#include "util.h" /* note : ensure that platform.h is included first ! */
|
||||
#include <stdlib.h> /* malloc, realloc, free */
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */
|
||||
#include <errno.h>
|
||||
#include <assert.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
# include <sys/utime.h> /* utime */
|
||||
# include <io.h> /* _chmod */
|
||||
#else
|
||||
# include <unistd.h> /* chown, stat */
|
||||
# if PLATFORM_POSIX_VERSION < 200809L
|
||||
# include <utime.h> /* utime */
|
||||
# else
|
||||
# include <fcntl.h> /* AT_FDCWD */
|
||||
# include <sys/stat.h> /* utimensat */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__)
|
||||
#include <direct.h> /* needed for _mkdir in windows */
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */
|
||||
# include <dirent.h> /* opendir, readdir */
|
||||
# include <string.h> /* strerror, memcpy */
|
||||
#endif /* #ifdef _WIN32 */
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* Internal Macros
|
||||
******************************************/
|
||||
|
||||
/* CONTROL is almost like an assert(), but is never disabled.
|
||||
* It's designed for failures that may happen rarely,
|
||||
* but we don't want to maintain a specific error code path for them,
|
||||
* such as a malloc() returning NULL for example.
|
||||
* Since it's always active, this macro can trigger side effects.
|
||||
*/
|
||||
#define CONTROL(c) { \
|
||||
if (!(c)) { \
|
||||
UTIL_DISPLAYLEVEL(1, "Error : %s, %i : %s", \
|
||||
__FILE__, __LINE__, #c); \
|
||||
exit(1); \
|
||||
} }
|
||||
|
||||
/* console log */
|
||||
#define UTIL_DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define UTIL_DISPLAYLEVEL(l, ...) { if (g_utilDisplayLevel>=l) { UTIL_DISPLAY(__VA_ARGS__); } }
|
||||
|
||||
/* A modified version of realloc().
|
||||
* If UTIL_realloc() fails the original block is freed.
|
||||
*/
|
||||
UTIL_STATIC void* UTIL_realloc(void *ptr, size_t size)
|
||||
{
|
||||
void *newptr = realloc(ptr, size);
|
||||
if (newptr) return newptr;
|
||||
free(ptr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define chmod _chmod
|
||||
#endif
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* Console log
|
||||
******************************************/
|
||||
int g_utilDisplayLevel;
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
#define LIST_SIZE_INCREASE (8*1024)
|
||||
#define MAX_FILE_OF_FILE_NAMES_SIZE (1<<20)*50
|
||||
|
||||
|
||||
/*-*************************************
|
||||
|
@ -194,28 +255,218 @@ U64 UTIL_getFileSize(const char* infilename)
|
|||
}
|
||||
|
||||
|
||||
U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbFiles)
|
||||
U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles)
|
||||
{
|
||||
U64 total = 0;
|
||||
int error = 0;
|
||||
unsigned n;
|
||||
for (n=0; n<nbFiles; n++) {
|
||||
U64 const size = UTIL_getFileSize(fileNamesTable[n]);
|
||||
error |= (size == UTIL_FILESIZE_UNKNOWN);
|
||||
if (size == UTIL_FILESIZE_UNKNOWN) return UTIL_FILESIZE_UNKNOWN;
|
||||
total += size;
|
||||
}
|
||||
return error ? UTIL_FILESIZE_UNKNOWN : total;
|
||||
return total;
|
||||
}
|
||||
|
||||
|
||||
/* condition : @file must be valid, and not have reached its end.
|
||||
* @return : length of line written into @buf, ended with `\0` instead of '\n',
|
||||
* or 0, if there is no new line */
|
||||
static size_t readLineFromFile(char* buf, size_t len, FILE* file)
|
||||
{
|
||||
assert(!feof(file));
|
||||
CONTROL( fgets(buf, (int) len, file) == buf ); /* requires success */
|
||||
{ size_t linelen = strlen(buf);
|
||||
if (strlen(buf)==0) return 0;
|
||||
if (buf[linelen-1] == '\n') linelen--;
|
||||
buf[linelen] = '\0';
|
||||
return linelen+1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Conditions :
|
||||
* size of @inputFileName file must be < @dstCapacity
|
||||
* @dst must be initialized
|
||||
* @return : nb of lines
|
||||
* or -1 if there's an error
|
||||
*/
|
||||
static int
|
||||
readLinesFromFile(void* dst, size_t dstCapacity,
|
||||
const char* inputFileName)
|
||||
{
|
||||
int nbFiles = 0;
|
||||
size_t pos = 0;
|
||||
char* const buf = (char*)dst;
|
||||
FILE* const inputFile = fopen(inputFileName, "r");
|
||||
|
||||
assert(dst != NULL);
|
||||
|
||||
if(!inputFile) {
|
||||
if (g_utilDisplayLevel >= 1) perror("zstd:util:readLinesFromFile");
|
||||
return -1;
|
||||
}
|
||||
|
||||
while ( !feof(inputFile) ) {
|
||||
size_t const lineLength = readLineFromFile(buf+pos, dstCapacity-pos, inputFile);
|
||||
if (lineLength == 0) break;
|
||||
assert(pos + lineLength < dstCapacity);
|
||||
pos += lineLength;
|
||||
++nbFiles;
|
||||
}
|
||||
|
||||
CONTROL( fclose(inputFile) == 0 );
|
||||
|
||||
return nbFiles;
|
||||
}
|
||||
|
||||
/*Note: buf is not freed in case function successfully created table because filesTable->fileNames[0] = buf*/
|
||||
FileNamesTable*
|
||||
UTIL_createFileNamesTable_fromFileName(const char* inputFileName)
|
||||
{
|
||||
size_t nbFiles = 0;
|
||||
char* buf;
|
||||
size_t bufSize;
|
||||
size_t pos = 0;
|
||||
|
||||
if (!UTIL_fileExist(inputFileName) || !UTIL_isRegularFile(inputFileName))
|
||||
return NULL;
|
||||
|
||||
{ U64 const inputFileSize = UTIL_getFileSize(inputFileName);
|
||||
if(inputFileSize > MAX_FILE_OF_FILE_NAMES_SIZE)
|
||||
return NULL;
|
||||
bufSize = (size_t)(inputFileSize + 1); /* (+1) to add '\0' at the end of last filename */
|
||||
}
|
||||
|
||||
buf = (char*) malloc(bufSize);
|
||||
CONTROL( buf != NULL );
|
||||
|
||||
{ int const ret_nbFiles = readLinesFromFile(buf, bufSize, inputFileName);
|
||||
|
||||
if (ret_nbFiles <= 0) {
|
||||
free(buf);
|
||||
return NULL;
|
||||
}
|
||||
nbFiles = (size_t)ret_nbFiles;
|
||||
}
|
||||
|
||||
{ const char** filenamesTable = (const char**) malloc(nbFiles * sizeof(*filenamesTable));
|
||||
CONTROL(filenamesTable != NULL);
|
||||
|
||||
{ size_t fnb;
|
||||
for (fnb = 0, pos = 0; fnb < nbFiles; fnb++) {
|
||||
filenamesTable[fnb] = buf+pos;
|
||||
pos += strlen(buf+pos)+1; /* +1 for the finishing `\0` */
|
||||
} }
|
||||
assert(pos <= bufSize);
|
||||
|
||||
return UTIL_assembleFileNamesTable(filenamesTable, nbFiles, buf);
|
||||
}
|
||||
}
|
||||
|
||||
FileNamesTable*
|
||||
UTIL_assembleFileNamesTable(const char** filenames, size_t tableSize, char* buf)
|
||||
{
|
||||
FileNamesTable* const table = (FileNamesTable*) malloc(sizeof(*table));
|
||||
CONTROL(table != NULL);
|
||||
table->fileNames = filenames;
|
||||
table->buf = buf;
|
||||
table->tableSize = tableSize;
|
||||
table->tableCapacity = tableSize;
|
||||
return table;
|
||||
}
|
||||
|
||||
void UTIL_freeFileNamesTable(FileNamesTable* table)
|
||||
{
|
||||
if (table==NULL) return;
|
||||
free((void*)table->fileNames);
|
||||
free(table->buf);
|
||||
free(table);
|
||||
}
|
||||
|
||||
FileNamesTable* UTIL_allocateFileNamesTable(size_t tableSize)
|
||||
{
|
||||
const char** const fnTable = (const char**)malloc(tableSize * sizeof(*fnTable));
|
||||
FileNamesTable* fnt;
|
||||
if (fnTable==NULL) return NULL;
|
||||
fnt = UTIL_assembleFileNamesTable(fnTable, tableSize, NULL);
|
||||
fnt->tableSize = 0; /* the table is empty */
|
||||
return fnt;
|
||||
}
|
||||
|
||||
void UTIL_refFilename(FileNamesTable* fnt, const char* filename)
|
||||
{
|
||||
assert(fnt->tableSize < fnt->tableCapacity);
|
||||
fnt->fileNames[fnt->tableSize] = filename;
|
||||
fnt->tableSize++;
|
||||
}
|
||||
|
||||
static size_t getTotalTableSize(FileNamesTable* table)
|
||||
{
|
||||
size_t fnb = 0, totalSize = 0;
|
||||
for(fnb = 0 ; fnb < table->tableSize && table->fileNames[fnb] ; ++fnb) {
|
||||
totalSize += strlen(table->fileNames[fnb]) + 1; /* +1 to add '\0' at the end of each fileName */
|
||||
}
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
FileNamesTable*
|
||||
UTIL_mergeFileNamesTable(FileNamesTable* table1, FileNamesTable* table2)
|
||||
{
|
||||
unsigned newTableIdx = 0;
|
||||
size_t pos = 0;
|
||||
size_t newTotalTableSize;
|
||||
char* buf;
|
||||
|
||||
FileNamesTable* const newTable = UTIL_assembleFileNamesTable(NULL, 0, NULL);
|
||||
CONTROL( newTable != NULL );
|
||||
|
||||
newTotalTableSize = getTotalTableSize(table1) + getTotalTableSize(table2);
|
||||
|
||||
buf = (char*) calloc(newTotalTableSize, sizeof(*buf));
|
||||
CONTROL ( buf != NULL );
|
||||
|
||||
newTable->buf = buf;
|
||||
newTable->tableSize = table1->tableSize + table2->tableSize;
|
||||
newTable->fileNames = (const char **) calloc(newTable->tableSize, sizeof(*(newTable->fileNames)));
|
||||
CONTROL ( newTable->fileNames != NULL );
|
||||
|
||||
{ unsigned idx1;
|
||||
for( idx1=0 ; (idx1 < table1->tableSize) && table1->fileNames[idx1] && (pos < newTotalTableSize); ++idx1, ++newTableIdx) {
|
||||
size_t const curLen = strlen(table1->fileNames[idx1]);
|
||||
memcpy(buf+pos, table1->fileNames[idx1], curLen);
|
||||
assert(newTableIdx <= newTable->tableSize);
|
||||
newTable->fileNames[newTableIdx] = buf+pos;
|
||||
pos += curLen+1;
|
||||
} }
|
||||
|
||||
{ unsigned idx2;
|
||||
for( idx2=0 ; (idx2 < table2->tableSize) && table2->fileNames[idx2] && (pos < newTotalTableSize) ; ++idx2, ++newTableIdx) {
|
||||
size_t const curLen = strlen(table2->fileNames[idx2]);
|
||||
memcpy(buf+pos, table2->fileNames[idx2], curLen);
|
||||
assert(newTableIdx <= newTable->tableSize);
|
||||
newTable->fileNames[newTableIdx] = buf+pos;
|
||||
pos += curLen+1;
|
||||
} }
|
||||
assert(pos <= newTotalTableSize);
|
||||
newTable->tableSize = newTableIdx;
|
||||
|
||||
UTIL_freeFileNamesTable(table1);
|
||||
UTIL_freeFileNamesTable(table2);
|
||||
|
||||
return newTable;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
|
||||
static int UTIL_prepareFileList(const char* dirName,
|
||||
char** bufStart, size_t* pos,
|
||||
char** bufEnd, int followLinks)
|
||||
{
|
||||
char* path;
|
||||
int dirLength, fnameLength, pathLength, nbFiles = 0;
|
||||
size_t dirLength, pathLength;
|
||||
int nbFiles = 0;
|
||||
WIN32_FIND_DATAA cFile;
|
||||
HANDLE hFile;
|
||||
|
||||
dirLength = (int)strlen(dirName);
|
||||
dirLength = strlen(dirName);
|
||||
path = (char*) malloc(dirLength + 3);
|
||||
if (!path) return 0;
|
||||
|
||||
|
@ -232,7 +483,7 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
|
|||
free(path);
|
||||
|
||||
do {
|
||||
fnameLength = (int)strlen(cFile.cFileName);
|
||||
size_t const fnameLength = strlen(cFile.cFileName);
|
||||
path = (char*) malloc(dirLength + fnameLength + 2);
|
||||
if (!path) { FindClose(hFile); return 0; }
|
||||
memcpy(path, dirName, dirLength);
|
||||
|
@ -260,8 +511,7 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
|
|||
memcpy(*bufStart + *pos, path, pathLength+1 /* include final \0 */);
|
||||
*pos += pathLength + 1;
|
||||
nbFiles++;
|
||||
}
|
||||
}
|
||||
} }
|
||||
free(path);
|
||||
} while (FindNextFileA(hFile, &cFile));
|
||||
|
||||
|
@ -271,12 +521,13 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
|
|||
|
||||
#elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */
|
||||
|
||||
int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
|
||||
static int UTIL_prepareFileList(const char *dirName,
|
||||
char** bufStart, size_t* pos,
|
||||
char** bufEnd, int followLinks)
|
||||
{
|
||||
DIR *dir;
|
||||
struct dirent *entry;
|
||||
char* path;
|
||||
size_t dirLength, fnameLength, pathLength;
|
||||
DIR* dir;
|
||||
struct dirent * entry;
|
||||
size_t dirLength;
|
||||
int nbFiles = 0;
|
||||
|
||||
if (!(dir = opendir(dirName))) {
|
||||
|
@ -287,6 +538,8 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
|
|||
dirLength = strlen(dirName);
|
||||
errno = 0;
|
||||
while ((entry = readdir(dir)) != NULL) {
|
||||
char* path;
|
||||
size_t fnameLength, pathLength;
|
||||
if (strcmp (entry->d_name, "..") == 0 ||
|
||||
strcmp (entry->d_name, ".") == 0) continue;
|
||||
fnameLength = strlen(entry->d_name);
|
||||
|
@ -320,14 +573,13 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
|
|||
memcpy(*bufStart + *pos, path, pathLength + 1); /* with final \0 */
|
||||
*pos += pathLength + 1;
|
||||
nbFiles++;
|
||||
}
|
||||
}
|
||||
} }
|
||||
free(path);
|
||||
errno = 0; /* clear errno after UTIL_isDirectory, UTIL_prepareFileList */
|
||||
}
|
||||
|
||||
if (errno != 0) {
|
||||
UTIL_DISPLAYLEVEL(1, "readdir(%s) error: %s\n", dirName, strerror(errno));
|
||||
UTIL_DISPLAYLEVEL(1, "readdir(%s) error: %s \n", dirName, strerror(errno));
|
||||
free(*bufStart);
|
||||
*bufStart = NULL;
|
||||
}
|
||||
|
@ -337,10 +589,12 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char
|
|||
|
||||
#else
|
||||
|
||||
int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
|
||||
static int UTIL_prepareFileList(const char *dirName,
|
||||
char** bufStart, size_t* pos,
|
||||
char** bufEnd, int followLinks)
|
||||
{
|
||||
(void)bufStart; (void)bufEnd; (void)pos; (void)followLinks;
|
||||
UTIL_DISPLAYLEVEL(1, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName);
|
||||
UTIL_DISPLAYLEVEL(1, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE) \n", dirName);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -367,68 +621,68 @@ const char* UTIL_getFileExtension(const char* infilename)
|
|||
return extension;
|
||||
}
|
||||
|
||||
/*
|
||||
* UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories,
|
||||
* and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb).
|
||||
* After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer)
|
||||
* In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called.
|
||||
*/
|
||||
const char**
|
||||
UTIL_createFileList(const char **inputNames, unsigned inputNamesNb,
|
||||
char** allocatedBuffer, unsigned* allocatedNamesNb,
|
||||
int followLinks)
|
||||
|
||||
FileNamesTable*
|
||||
UTIL_createExpandedFNT(const char** inputNames, size_t nbIfns, int followLinks)
|
||||
{
|
||||
size_t pos;
|
||||
unsigned i, nbFiles;
|
||||
unsigned nbFiles;
|
||||
char* buf = (char*)malloc(LIST_SIZE_INCREASE);
|
||||
char* bufend = buf + LIST_SIZE_INCREASE;
|
||||
|
||||
if (!buf) return NULL;
|
||||
|
||||
for (i=0, pos=0, nbFiles=0; i<inputNamesNb; i++) {
|
||||
if (!UTIL_isDirectory(inputNames[i])) {
|
||||
size_t const len = strlen(inputNames[i]);
|
||||
if (buf + pos + len >= bufend) {
|
||||
ptrdiff_t newListSize = (bufend - buf) + LIST_SIZE_INCREASE;
|
||||
assert(newListSize >= 0);
|
||||
buf = (char*)UTIL_realloc(buf, (size_t)newListSize);
|
||||
bufend = buf + newListSize;
|
||||
if (!buf) return NULL;
|
||||
}
|
||||
if (buf + pos + len < bufend) {
|
||||
memcpy(buf+pos, inputNames[i], len+1); /* including final \0 */
|
||||
pos += len + 1;
|
||||
nbFiles++;
|
||||
}
|
||||
} else {
|
||||
nbFiles += (unsigned)UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend, followLinks);
|
||||
if (buf == NULL) return NULL;
|
||||
} }
|
||||
{ size_t ifnNb, pos;
|
||||
for (ifnNb=0, pos=0, nbFiles=0; ifnNb<nbIfns; ifnNb++) {
|
||||
if (!UTIL_isDirectory(inputNames[ifnNb])) {
|
||||
size_t const len = strlen(inputNames[ifnNb]);
|
||||
if (buf + pos + len >= bufend) {
|
||||
ptrdiff_t newListSize = (bufend - buf) + LIST_SIZE_INCREASE;
|
||||
assert(newListSize >= 0);
|
||||
buf = (char*)UTIL_realloc(buf, (size_t)newListSize);
|
||||
if (!buf) return NULL;
|
||||
bufend = buf + newListSize;
|
||||
}
|
||||
if (buf + pos + len < bufend) {
|
||||
memcpy(buf+pos, inputNames[ifnNb], len+1); /* including final \0 */
|
||||
pos += len + 1;
|
||||
nbFiles++;
|
||||
}
|
||||
} else {
|
||||
nbFiles += (unsigned)UTIL_prepareFileList(inputNames[ifnNb], &buf, &pos, &bufend, followLinks);
|
||||
if (buf == NULL) return NULL;
|
||||
} } }
|
||||
|
||||
if (nbFiles == 0) { free(buf); return NULL; }
|
||||
|
||||
{ const char** const fileTable = (const char**)malloc((nbFiles + 1) * sizeof(*fileTable));
|
||||
if (!fileTable) { free(buf); return NULL; }
|
||||
{ size_t ifnNb, pos;
|
||||
const char** const fileNamesTable = (const char**)malloc((nbFiles + 1) * sizeof(*fileNamesTable));
|
||||
if (!fileNamesTable) { free(buf); return NULL; }
|
||||
|
||||
for (i = 0, pos = 0; i < nbFiles; i++) {
|
||||
fileTable[i] = buf + pos;
|
||||
if (buf + pos > bufend) { free(buf); free((void*)fileTable); return NULL; }
|
||||
pos += strlen(fileTable[i]) + 1;
|
||||
for (ifnNb = 0, pos = 0; ifnNb < nbFiles; ifnNb++) {
|
||||
fileNamesTable[ifnNb] = buf + pos;
|
||||
if (buf + pos > bufend) { free(buf); free((void*)fileNamesTable); return NULL; }
|
||||
pos += strlen(fileNamesTable[ifnNb]) + 1;
|
||||
}
|
||||
|
||||
*allocatedBuffer = buf;
|
||||
*allocatedNamesNb = nbFiles;
|
||||
|
||||
return fileTable;
|
||||
return UTIL_assembleFileNamesTable(fileNamesTable, nbFiles, buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* Console log
|
||||
******************************************/
|
||||
int g_utilDisplayLevel;
|
||||
void UTIL_expandFNT(FileNamesTable** fnt, int followLinks)
|
||||
{
|
||||
FileNamesTable* const newFNT = UTIL_createExpandedFNT((*fnt)->fileNames, (*fnt)->tableSize, followLinks);
|
||||
UTIL_freeFileNamesTable(*fnt);
|
||||
*fnt = newFNT;
|
||||
}
|
||||
|
||||
FileNamesTable* UTIL_createFNT_fromROTable(const char** filenames, size_t nbFilenames)
|
||||
{
|
||||
size_t const sizeof_FNTable = nbFilenames * sizeof(*filenames);
|
||||
const char** const newFNTable = (const char**)malloc(sizeof_FNTable);
|
||||
if (newFNTable==NULL) return NULL;
|
||||
memcpy((void*)newFNTable, filenames, sizeof_FNTable); /* void* : mitigate a Visual compiler bug or limitation */
|
||||
return UTIL_assembleFileNamesTable(newFNTable, nbFilenames, NULL);
|
||||
}
|
||||
|
||||
|
||||
/*-****************************************
|
||||
|
@ -483,8 +737,7 @@ int UTIL_countPhysicalCores(void)
|
|||
}
|
||||
} else {
|
||||
done = TRUE;
|
||||
}
|
||||
}
|
||||
} }
|
||||
|
||||
ptr = buffer;
|
||||
|
||||
|
@ -596,8 +849,7 @@ int UTIL_countPhysicalCores(void)
|
|||
} else if (ferror(cpuinfo)) {
|
||||
/* fall back on the sysconf value */
|
||||
goto failed;
|
||||
}
|
||||
}
|
||||
} }
|
||||
if (siblings && cpu_cores) {
|
||||
ratio = siblings / cpu_cores;
|
||||
}
|
||||
|
|
150
programs/util.h
150
programs/util.h
|
@ -20,37 +20,23 @@ extern "C" {
|
|||
* Dependencies
|
||||
******************************************/
|
||||
#include "platform.h" /* PLATFORM_POSIX_VERSION, ZSTD_NANOSLEEP_SUPPORT, ZSTD_SETPRIORITY_SUPPORT */
|
||||
#include <stdlib.h> /* malloc, realloc, free */
|
||||
#include <stddef.h> /* size_t, ptrdiff_t */
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <sys/types.h> /* stat, utime */
|
||||
#include <sys/stat.h> /* stat, chmod */
|
||||
#if defined(_WIN32)
|
||||
# include <sys/utime.h> /* utime */
|
||||
# include <io.h> /* _chmod */
|
||||
#else
|
||||
# include <unistd.h> /* chown, stat */
|
||||
# if PLATFORM_POSIX_VERSION < 200809L
|
||||
# include <utime.h> /* utime */
|
||||
# else
|
||||
# include <fcntl.h> /* AT_FDCWD */
|
||||
# include <sys/stat.h> /* utimensat */
|
||||
# endif
|
||||
#endif
|
||||
#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */
|
||||
#include "mem.h" /* U32, U64 */
|
||||
#include "mem.h" /* U64 */
|
||||
|
||||
|
||||
/*-************************************************************
|
||||
* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
|
||||
***************************************************************/
|
||||
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
|
||||
# define UTIL_fseek _fseeki64
|
||||
# define UTIL_fseek _fseeki64
|
||||
#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
|
||||
# define UTIL_fseek fseeko
|
||||
#elif defined(__MINGW32__) && defined(__MSVCRT__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS)
|
||||
# define UTIL_fseek fseeko64
|
||||
# define UTIL_fseek fseeko64
|
||||
#else
|
||||
# define UTIL_fseek fseek
|
||||
# define UTIL_fseek fseek
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -106,8 +92,6 @@ extern "C" {
|
|||
* Console log
|
||||
******************************************/
|
||||
extern int g_utilDisplayLevel;
|
||||
#define UTIL_DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define UTIL_DISPLAYLEVEL(l, ...) { if (g_utilDisplayLevel>=l) { UTIL_DISPLAY(__VA_ARGS__); } }
|
||||
|
||||
|
||||
/*-****************************************
|
||||
|
@ -131,7 +115,7 @@ int UTIL_isFIFO(const char* infilename);
|
|||
|
||||
#define UTIL_FILESIZE_UNKNOWN ((U64)(-1))
|
||||
U64 UTIL_getFileSize(const char* infilename);
|
||||
U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbFiles);
|
||||
U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles);
|
||||
int UTIL_getFileStat(const char* infilename, stat_t* statbuf);
|
||||
int UTIL_setFileStat(const char* filename, stat_t* statbuf);
|
||||
int UTIL_chmod(char const* filename, mode_t permissions); /*< like chmod, but avoid changing permission of /dev/null */
|
||||
|
@ -139,47 +123,111 @@ int UTIL_compareStr(const void *p1, const void *p2);
|
|||
const char* UTIL_getFileExtension(const char* infilename);
|
||||
|
||||
|
||||
/*
|
||||
* A modified version of realloc().
|
||||
* If UTIL_realloc() fails the original block is freed.
|
||||
*/
|
||||
UTIL_STATIC void* UTIL_realloc(void* ptr, size_t size)
|
||||
{
|
||||
void* const newptr = realloc(ptr, size);
|
||||
if (newptr) return newptr;
|
||||
free(ptr);
|
||||
return NULL;
|
||||
}
|
||||
/*-****************************************
|
||||
* Lists of Filenames
|
||||
******************************************/
|
||||
|
||||
int UTIL_prepareFileList(const char* dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks);
|
||||
typedef struct
|
||||
{ const char** fileNames;
|
||||
char* buf; /* fileNames are stored in this buffer (or are read-only) */
|
||||
size_t tableSize; /* nb of fileNames */
|
||||
size_t tableCapacity;
|
||||
} FileNamesTable;
|
||||
|
||||
/*! UTIL_createFileNamesTable_fromFileName() :
|
||||
* read filenames from @inputFileName, and store them into returned object.
|
||||
* @return : a FileNamesTable*, or NULL in case of error (ex: @inputFileName doesn't exist).
|
||||
* Note: inputFileSize must be less than 50MB
|
||||
*/
|
||||
FileNamesTable*
|
||||
UTIL_createFileNamesTable_fromFileName(const char* inputFileName);
|
||||
|
||||
/*! UTIL_assembleFileNamesTable() :
|
||||
* This function takes ownership of its arguments, @filenames and @buf,
|
||||
* and store them inside the created object.
|
||||
* note : this function never fails,
|
||||
* it will rather exit() the program if internal allocation fails.
|
||||
* @return : resulting FileNamesTable* object.
|
||||
*/
|
||||
FileNamesTable*
|
||||
UTIL_assembleFileNamesTable(const char** filenames, size_t tableSize, char* buf);
|
||||
|
||||
/*! UTIL_freeFileNamesTable() :
|
||||
* This function is compatible with NULL argument and never fails.
|
||||
*/
|
||||
void UTIL_freeFileNamesTable(FileNamesTable* table);
|
||||
|
||||
/*! UTIL_mergeFileNamesTable():
|
||||
* @return : FileNamesTable*, concatenation of @table1 and @table2
|
||||
* note: @table1 and @table2 are consumed (freed) by this operation
|
||||
*/
|
||||
FileNamesTable*
|
||||
UTIL_mergeFileNamesTable(FileNamesTable* table1, FileNamesTable* table2);
|
||||
|
||||
|
||||
/*! UTIL_expandFNT() :
|
||||
* read names from @fnt, and expand those corresponding to directories
|
||||
* update @fnt, now containing only file names,
|
||||
* @return : 0 in case of success, 1 if error
|
||||
* note : in case of error, @fnt[0] is NULL
|
||||
*/
|
||||
void UTIL_expandFNT(FileNamesTable** fnt, int followLinks);
|
||||
|
||||
/*! UTIL_createFNT_fromROTable() :
|
||||
* copy the @filenames pointer table inside the returned object.
|
||||
* The names themselves are still stored in their original buffer, which must outlive the object.
|
||||
* @return : a FileNamesTable* object,
|
||||
* or NULL in case of error
|
||||
*/
|
||||
FileNamesTable*
|
||||
UTIL_createFNT_fromROTable(const char** filenames, size_t nbFilenames);
|
||||
|
||||
/*! UTIL_allocateFileNamesTable() :
|
||||
* Allocates a table of const char*, to insert read-only names later on.
|
||||
* The created FileNamesTable* doesn't hold a buffer.
|
||||
* @return : FileNamesTable*, or NULL, if allocation fails.
|
||||
*/
|
||||
FileNamesTable* UTIL_allocateFileNamesTable(size_t tableSize);
|
||||
|
||||
|
||||
/*! UTIL_refFilename() :
|
||||
* Add a reference to read-only name into @fnt table.
|
||||
* As @filename is only referenced, its lifetime must outlive @fnt.
|
||||
* Internal table must be large enough to reference a new member,
|
||||
* otherwise its UB (protected by an `assert()`).
|
||||
*/
|
||||
void UTIL_refFilename(FileNamesTable* fnt, const char* filename);
|
||||
|
||||
|
||||
/* UTIL_createExpandedFNT() is only active if UTIL_HAS_CREATEFILELIST is defined.
|
||||
* Otherwise, UTIL_createExpandedFNT() is a shell function which does nothing
|
||||
* apart from displaying a warning message.
|
||||
*/
|
||||
#ifdef _WIN32
|
||||
# define UTIL_HAS_CREATEFILELIST
|
||||
#elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */
|
||||
# define UTIL_HAS_CREATEFILELIST
|
||||
# include <dirent.h> /* opendir, readdir */
|
||||
# include <string.h> /* strerror, memcpy */
|
||||
#else
|
||||
#endif /* #ifdef _WIN32 */
|
||||
/* do not define UTIL_HAS_CREATEFILELIST */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories,
|
||||
* and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb).
|
||||
* After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer)
|
||||
* In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called.
|
||||
/*! UTIL_createExpandedFNT() :
|
||||
* read names from @filenames, and expand those corresponding to directories.
|
||||
* links are followed or not depending on @followLinks directive.
|
||||
* @return : an expanded FileNamesTable*, where each name is a file
|
||||
* or NULL in case of error
|
||||
*/
|
||||
const char**
|
||||
UTIL_createFileList(const char **inputNames, unsigned inputNamesNb,
|
||||
char** allocatedBuffer, unsigned* allocatedNamesNb,
|
||||
int followLinks);
|
||||
FileNamesTable*
|
||||
UTIL_createExpandedFNT(const char** filenames, size_t nbFilenames, int followLinks);
|
||||
|
||||
UTIL_STATIC void UTIL_freeFileList(const char** filenameTable, char* allocatedBuffer)
|
||||
{
|
||||
if (allocatedBuffer) free(allocatedBuffer);
|
||||
if (filenameTable) free((void*)filenameTable);
|
||||
}
|
||||
|
||||
/*-****************************************
|
||||
* System
|
||||
******************************************/
|
||||
|
||||
int UTIL_countPhysicalCores(void);
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
|
147
programs/zstd.1
147
programs/zstd.1
|
@ -1,5 +1,5 @@
|
|||
.
|
||||
.TH "ZSTD" "1" "October 2019" "zstd 1.4.4" "User Commands"
|
||||
.TH "ZSTD" "1" "November 2019" "zstd 1.4.4" "User Commands"
|
||||
.
|
||||
.SH "NAME"
|
||||
\fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
|
||||
|
@ -95,120 +95,97 @@ Display information related to a zstd compressed file, such as size, ratio, and
|
|||
.
|
||||
.SS "Operation modifiers"
|
||||
.
|
||||
.TP
|
||||
\fB\-#\fR
|
||||
\fB#\fR compression level [1\-19] (default: 3)
|
||||
.IP "\(bu" 4
|
||||
\fB\-#\fR: \fB#\fR compression level [1\-19] (default: 3)
|
||||
.
|
||||
.TP
|
||||
\fB\-\-fast[=#]\fR
|
||||
switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-\-fast[=#]\fR: switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\.
|
||||
.
|
||||
.TP
|
||||
\fB\-\-ultra\fR
|
||||
unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-\-ultra\fR: unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\.
|
||||
.
|
||||
.TP
|
||||
\fB\-\-long[=#]\fR
|
||||
enables long distance matching with \fB#\fR \fBwindowLog\fR, if not \fB#\fR is not present it defaults to \fB27\fR\. This increases the window size (\fBwindowLog\fR) and memory usage for both the compressor and decompressor\. This setting is designed to improve the compression ratio for files with long matches at a large distance\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-\-long[=#]\fR: enables long distance matching with \fB#\fR \fBwindowLog\fR, if not \fB#\fR is not present it defaults to \fB27\fR\. This increases the window size (\fBwindowLog\fR) and memory usage for both the compressor and decompressor\. This setting is designed to improve the compression ratio for files with long matches at a large distance\.
|
||||
.
|
||||
.IP
|
||||
Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \fB\-\-memory=windowSize\fR needs to be passed to the decompressor\.
|
||||
.
|
||||
.TP
|
||||
\fB\-T#\fR, \fB\-\-threads=#\fR
|
||||
Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==200\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-T#\fR, \fB\-\-threads=#\fR: Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to ZSTDMT_NBTHREADS_MAX==200\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
|
||||
.
|
||||
.TP
|
||||
\fB\-\-single\-thread\fR
|
||||
Does not spawn a thread for compression, use a single thread for both I/O and compression\. In this mode, compression is serialized with I/O, which is slightly slower\. (This is different from \fB\-T1\fR, which spawns 1 compression thread in parallel of I/O)\. This mode is the only one available when multithread support is disabled\. Single\-thread mode features lower memory usage\. Final compressed result is slightly different from \fB\-T1\fR\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-\-single\-thread\fR: Does not spawn a thread for compression, use a single thread for both I/O and compression\. In this mode, compression is serialized with I/O, which is slightly slower\. (This is different from \fB\-T1\fR, which spawns 1 compression thread in parallel of I/O)\. This mode is the only one available when multithread support is disabled\. Single\-thread mode features lower memory usage\. Final compressed result is slightly different from \fB\-T1\fR\.
|
||||
.
|
||||
.TP
|
||||
\fB\-\-adapt[=min=#,max=#]\fR
|
||||
\fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\. \fInote\fR : at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-\-adapt[=min=#,max=#]\fR : \fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\. \fInote\fR : at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\.
|
||||
.
|
||||
.TP
|
||||
\fB\-\-stream\-size=#\fR
|
||||
Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-\-stream\-size=#\fR : Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\.
|
||||
.
|
||||
.TP
|
||||
\fB\-\-size\-hint=#\fR
|
||||
When handling input from a stream, \fBzstd\fR must guess how large the source size will be when optimizing compression parameters\. If the stream size is relatively small, this guess may be a poor one, resulting in a higher compression ratio than expected\. This feature allows for controlling the guess when needed\. Exact guesses result in better compression ratios\. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-\-size\-hint=#\fR: When handling input from a stream, \fBzstd\fR must guess how large the source size will be when optimizing compression parameters\. If the stream size is relatively small, this guess may be a poor one, resulting in a higher compression ratio than expected\. This feature allows for controlling the guess when needed\. Exact guesses result in better compression ratios\. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation\.
|
||||
.
|
||||
.TP
|
||||
\fB\-\-rsyncable\fR
|
||||
\fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and the faster compression levels will see a small compression speed hit\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your milage may vary\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-\-rsyncable\fR : \fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and the faster compression levels will see a small compression speed hit\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your milage may vary\.
|
||||
.
|
||||
.TP
|
||||
\fB\-D file\fR
|
||||
use \fBfile\fR as Dictionary to compress or decompress FILE(s)
|
||||
.IP "\(bu" 4
|
||||
\fB\-D file\fR: use \fBfile\fR as Dictionary to compress or decompress FILE(s)
|
||||
.
|
||||
.TP
|
||||
\fB\-\-no\-dictID\fR
|
||||
do not store dictionary ID within frame header (dictionary compression)\. The decoder will have to rely on implicit knowledge about which dictionary to use, it won\'t be able to check if it\'s correct\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-\-no\-dictID\fR: do not store dictionary ID within frame header (dictionary compression)\. The decoder will have to rely on implicit knowledge about which dictionary to use, it won\'t be able to check if it\'s correct\.
|
||||
.
|
||||
.TP
|
||||
\fB\-o file\fR
|
||||
save result into \fBfile\fR (only possible with a single \fIINPUT\-FILE\fR)
|
||||
.IP "\(bu" 4
|
||||
\fB\-o file\fR: save result into \fBfile\fR (only possible with a single \fIINPUT\-FILE\fR)
|
||||
.
|
||||
.TP
|
||||
\fB\-f\fR, \fB\-\-force\fR
|
||||
overwrite output without prompting, and (de)compress symbolic links
|
||||
.IP "\(bu" 4
|
||||
\fB\-f\fR, \fB\-\-force\fR: overwrite output without prompting, and (de)compress symbolic links
|
||||
.
|
||||
.TP
|
||||
\fB\-c\fR, \fB\-\-stdout\fR
|
||||
force write to standard output, even if it is the console
|
||||
.IP "\(bu" 4
|
||||
\fB\-c\fR, \fB\-\-stdout\fR: force write to standard output, even if it is the console
|
||||
.
|
||||
.TP
|
||||
\fB\-\-[no\-]sparse\fR
|
||||
enable / disable sparse FS support, to make files with many zeroes smaller on disk\. Creating sparse files may save disk space and speed up decompression by reducing the amount of disk I/O\. default: enabled when output is into a file, and disabled when output is stdout\. This setting overrides default and can force sparse mode over stdout\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-\-[no\-]sparse\fR: enable / disable sparse FS support, to make files with many zeroes smaller on disk\. Creating sparse files may save disk space and speed up decompression by reducing the amount of disk I/O\. default: enabled when output is into a file, and disabled when output is stdout\. This setting overrides default and can force sparse mode over stdout\.
|
||||
.
|
||||
.TP
|
||||
\fB\-\-rm\fR
|
||||
remove source file(s) after successful compression or decompression
|
||||
.IP "\(bu" 4
|
||||
\fB\-\-rm\fR: remove source file(s) after successful compression or decompression
|
||||
.
|
||||
.TP
|
||||
\fB\-k\fR, \fB\-\-keep\fR
|
||||
keep source file(s) after successful compression or decompression\. This is the default behavior\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-k\fR, \fB\-\-keep\fR: keep source file(s) after successful compression or decompression\. This is the default behavior\.
|
||||
.
|
||||
.TP
|
||||
\fB\-r\fR
|
||||
operate recursively on directories
|
||||
.IP "\(bu" 4
|
||||
\fB\-r\fR: operate recursively on directories
|
||||
.
|
||||
.TP
|
||||
\fB\-\-output\-dir\-flat[=dir]\fR
|
||||
resulting files are stored into target \fBdir\fR directory, instead of same directory as origin file\. Be aware that this command can introduce name collision issues, if multiple files, from different directories, end up having the same name\. Collision resolution ensures first file with a given name will be present in \fBdir\fR, while in combination with \fB\-f\fR, the last file will be present instead\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-\-filelist=FILE\fR read a list of files to process as content from \fBFILE\fR\. Format is compatible with \fBls\fR output, with one file per line\.
|
||||
.
|
||||
.TP
|
||||
\fB\-\-format=FORMAT\fR
|
||||
compress and decompress in other formats\. If compiled with support, zstd can compress to or decompress from other compression algorithm formats\. Possibly available options are \fBzstd\fR, \fBgzip\fR, \fBxz\fR, \fBlzma\fR, and \fBlz4\fR\. If no such format is provided, \fBzstd\fR is the default\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-\-output\-dir\-flat[=dir]\fR: resulting files are stored into target \fBdir\fR directory, instead of same directory as origin file\. Be aware that this command can introduce name collision issues, if multiple files, from different directories, end up having the same name\. Collision resolution ensures first file with a given name will be present in \fBdir\fR, while in combination with \fB\-f\fR, the last file will be present instead\.
|
||||
.
|
||||
.TP
|
||||
\fB\-h\fR/\fB\-H\fR, \fB\-\-help\fR
|
||||
display help/long help and exit
|
||||
.IP "\(bu" 4
|
||||
\fB\-\-format=FORMAT\fR: compress and decompress in other formats\. If compiled with support, zstd can compress to or decompress from other compression algorithm formats\. Possibly available options are \fBzstd\fR, \fBgzip\fR, \fBxz\fR, \fBlzma\fR, and \fBlz4\fR\. If no such format is provided, \fBzstd\fR is the default\.
|
||||
.
|
||||
.TP
|
||||
\fB\-V\fR, \fB\-\-version\fR
|
||||
display version number and exit\. Advanced : \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-h\fR/\fB\-H\fR, \fB\-\-help\fR: display help/long help and exit
|
||||
.
|
||||
.TP
|
||||
\fB\-v\fR
|
||||
verbose mode
|
||||
.IP "\(bu" 4
|
||||
\fB\-V\fR, \fB\-\-version\fR: display version number and exit\. Advanced : \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\.
|
||||
.
|
||||
.TP
|
||||
\fB\-q\fR, \fB\-\-quiet\fR
|
||||
suppress warnings, interactivity, and notifications\. specify twice to suppress errors too\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-v\fR: verbose mode
|
||||
.
|
||||
.TP
|
||||
\fB\-\-no\-progress\fR
|
||||
do not display the progress bar, but keep all other messages\.
|
||||
.IP "\(bu" 4
|
||||
\fB\-q\fR, \fB\-\-quiet\fR: suppress warnings, interactivity, and notifications\. specify twice to suppress errors too\.
|
||||
.
|
||||
.TP
|
||||
\fB\-C\fR, \fB\-\-[no\-]check\fR
|
||||
add integrity check computed from uncompressed data (default: enabled)
|
||||
.IP "\(bu" 4
|
||||
\fB\-\-no\-progress\fR: do not display the progress bar, but keep all other messages\.
|
||||
.
|
||||
.TP
|
||||
\fB\-\-\fR
|
||||
All arguments after \fB\-\-\fR are treated as files
|
||||
.IP "\(bu" 4
|
||||
\fB\-C\fR, \fB\-\-[no\-]check\fR: add integrity check computed from uncompressed data (default: enabled)
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
\fB\-\-\fR: All arguments after \fB\-\-\fR are treated as files
|
||||
.
|
||||
.IP "" 0
|
||||
.
|
||||
.SS "Restricted usage of Environment Variables"
|
||||
Using environment variables to set parameters has security implications\. Therefore, this avenue is intentionally restricted\. Only \fBZSTD_CLEVEL\fR is supported currently, for setting compression level\. \fBZSTD_CLEVEL\fR can be used to set the level between 1 and 19 (the "normal" range)\. If the value of \fBZSTD_CLEVEL\fR is not a valid integer, it will be ignored with a warning message\. \fBZSTD_CLEVEL\fR just replaces the default compression level (\fB3\fR)\. It can be overridden by corresponding command line arguments\.
|
||||
|
|
|
@ -191,6 +191,9 @@ the last one takes effect.
|
|||
This is the default behavior.
|
||||
* `-r`:
|
||||
operate recursively on directories
|
||||
* `--filelist=FILE`
|
||||
read a list of files to process as content from `FILE`.
|
||||
Format is compatible with `ls` output, with one file per line.
|
||||
* `--output-dir-flat[=dir]`:
|
||||
resulting files are stored into target `dir` directory,
|
||||
instead of same directory as origin file.
|
||||
|
|
|
@ -38,8 +38,7 @@
|
|||
#ifndef ZSTD_NODICT
|
||||
# include "dibio.h" /* ZDICT_cover_params_t, DiB_trainFromFiles() */
|
||||
#endif
|
||||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_minCLevel */
|
||||
#include "zstd.h" /* ZSTD_VERSION_STRING, ZSTD_maxCLevel */
|
||||
#include "zstd.h" /* ZSTD_VERSION_STRING, ZSTD_minCLevel, ZSTD_maxCLevel */
|
||||
|
||||
|
||||
/*-************************************
|
||||
|
@ -156,7 +155,8 @@ static int usage_advanced(const char* programName)
|
|||
#endif
|
||||
#ifdef UTIL_HAS_CREATEFILELIST
|
||||
DISPLAY( " -r : operate recursively on directories \n");
|
||||
DISPLAY( "--output-dir-flat[=directory]: all resulting files stored into `directory`. \n");
|
||||
DISPLAY( "--filelist=FILE : read a list of files from FILE. \n");
|
||||
DISPLAY( "--output-dir-flat=DIR : all resulting files are stored into DIR. \n");
|
||||
#endif
|
||||
DISPLAY( "--format=zstd : compress files to the .zst format (default) \n");
|
||||
#ifdef ZSTD_GZCOMPRESS
|
||||
|
@ -585,8 +585,8 @@ int main(int argCount, const char* argv[])
|
|||
int cLevelLast = -1000000000;
|
||||
unsigned recursive = 0;
|
||||
unsigned memLimit = 0;
|
||||
const char** filenameTable = (const char**)malloc((size_t)argCount * sizeof(const char*)); /* argCount >= 1 */
|
||||
unsigned filenameIdx = 0;
|
||||
FileNamesTable* filenames = UTIL_allocateFileNamesTable((size_t)argCount); /* argCount >= 1 */
|
||||
FileNamesTable* file_of_names = UTIL_allocateFileNamesTable((size_t)argCount); /* argCount >= 1 */
|
||||
const char* programName = argv[0];
|
||||
const char* outFileName = NULL;
|
||||
const char* outDirName = NULL;
|
||||
|
@ -599,11 +599,6 @@ int main(int argCount, const char* argv[])
|
|||
size_t srcSizeHint = 0;
|
||||
int dictCLevel = g_defaultDictCLevel;
|
||||
unsigned dictSelect = g_defaultSelectivityLevel;
|
||||
#ifdef UTIL_HAS_CREATEFILELIST
|
||||
const char** extendedFileList = NULL;
|
||||
char* fileNamesBuf = NULL;
|
||||
unsigned fileNamesNb;
|
||||
#endif
|
||||
#ifndef ZSTD_NODICT
|
||||
ZDICT_cover_params_t coverParams = defaultCoverParams();
|
||||
ZDICT_fastCover_params_t fastCoverParams = defaultFastCoverParams();
|
||||
|
@ -618,8 +613,7 @@ int main(int argCount, const char* argv[])
|
|||
/* init */
|
||||
(void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */
|
||||
(void)memLimit; /* not used when ZSTD_NODECOMPRESS set */
|
||||
if (filenameTable==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); }
|
||||
filenameTable[0] = stdinmark;
|
||||
if ((filenames==NULL) || (file_of_names==NULL)) { DISPLAY("zstd: allocation error \n"); exit(1); }
|
||||
g_displayOut = stderr;
|
||||
cLevel = init_cLevel();
|
||||
programName = lastNameFromPath(programName);
|
||||
|
@ -649,312 +643,317 @@ int main(int argCount, const char* argv[])
|
|||
/* command switches */
|
||||
for (argNb=1; argNb<argCount; argNb++) {
|
||||
const char* argument = argv[argNb];
|
||||
if(!argument) continue; /* Protection if argument empty */
|
||||
if (!argument) continue; /* Protection if argument empty */
|
||||
|
||||
if (nextArgumentsAreFiles==0) {
|
||||
/* "-" means stdin/stdout */
|
||||
if (!strcmp(argument, "-")){
|
||||
if (!filenameIdx) {
|
||||
filenameIdx=1, filenameTable[0]=stdinmark;
|
||||
outFileName=stdoutmark;
|
||||
g_displayLevel-=(g_displayLevel==2);
|
||||
continue;
|
||||
} }
|
||||
if (nextArgumentsAreFiles) {
|
||||
UTIL_refFilename(filenames, argument);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Decode commands (note : aggregated commands are allowed) */
|
||||
if (argument[0]=='-') {
|
||||
/* "-" means stdin/stdout */
|
||||
if (!strcmp(argument, "-")){
|
||||
UTIL_refFilename(filenames, stdinmark);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (argument[1]=='-') {
|
||||
/* long commands (--long-word) */
|
||||
if (!strcmp(argument, "--")) { nextArgumentsAreFiles=1; continue; } /* only file names allowed from now on */
|
||||
if (!strcmp(argument, "--list")) { operation=zom_list; continue; }
|
||||
if (!strcmp(argument, "--compress")) { operation=zom_compress; continue; }
|
||||
if (!strcmp(argument, "--decompress")) { operation=zom_decompress; continue; }
|
||||
if (!strcmp(argument, "--uncompress")) { operation=zom_decompress; continue; }
|
||||
if (!strcmp(argument, "--force")) { FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; continue; }
|
||||
if (!strcmp(argument, "--version")) { g_displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); }
|
||||
if (!strcmp(argument, "--help")) { g_displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); }
|
||||
if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; }
|
||||
if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; }
|
||||
if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; g_displayLevel-=(g_displayLevel==2); continue; }
|
||||
if (!strcmp(argument, "--ultra")) { ultra=1; continue; }
|
||||
if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(prefs, 2); continue; }
|
||||
if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(prefs, 0); continue; }
|
||||
if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(prefs, 2); continue; }
|
||||
if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(prefs, 0); continue; }
|
||||
if (!strcmp(argument, "--test")) { operation=zom_test; continue; }
|
||||
if (!strcmp(argument, "--train")) { operation=zom_train; if (outFileName==NULL) outFileName=g_defaultDictName; continue; }
|
||||
if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
|
||||
if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
|
||||
if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(prefs, 0); continue; }
|
||||
if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(prefs, 0); continue; }
|
||||
if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; }
|
||||
if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; }
|
||||
if (!strcmp(argument, "--output-dir-flat")) {nextArgumentIsOutDirName=1; lastCommand=1; continue; }
|
||||
if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }
|
||||
if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) CLEAN_RETURN(badusage(programName)); continue; }
|
||||
if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
|
||||
if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; FIO_setCompressionType(prefs, FIO_zstdCompression); continue; }
|
||||
/* Decode commands (note : aggregated commands are allowed) */
|
||||
if (argument[0]=='-') {
|
||||
|
||||
if (argument[1]=='-') {
|
||||
/* long commands (--long-word) */
|
||||
if (!strcmp(argument, "--")) { nextArgumentsAreFiles=1; continue; } /* only file names allowed from now on */
|
||||
if (!strcmp(argument, "--list")) { operation=zom_list; continue; }
|
||||
if (!strcmp(argument, "--compress")) { operation=zom_compress; continue; }
|
||||
if (!strcmp(argument, "--decompress")) { operation=zom_decompress; continue; }
|
||||
if (!strcmp(argument, "--uncompress")) { operation=zom_decompress; continue; }
|
||||
if (!strcmp(argument, "--force")) { FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; continue; }
|
||||
if (!strcmp(argument, "--version")) { g_displayOut=stdout; DISPLAY(WELCOME_MESSAGE); CLEAN_RETURN(0); }
|
||||
if (!strcmp(argument, "--help")) { g_displayOut=stdout; CLEAN_RETURN(usage_advanced(programName)); }
|
||||
if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; }
|
||||
if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; }
|
||||
if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; g_displayLevel-=(g_displayLevel==2); continue; }
|
||||
if (!strcmp(argument, "--ultra")) { ultra=1; continue; }
|
||||
if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(prefs, 2); continue; }
|
||||
if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(prefs, 0); continue; }
|
||||
if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(prefs, 2); continue; }
|
||||
if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(prefs, 0); continue; }
|
||||
if (!strcmp(argument, "--test")) { operation=zom_test; continue; }
|
||||
if (!strcmp(argument, "--train")) { operation=zom_train; if (outFileName==NULL) outFileName=g_defaultDictName; continue; }
|
||||
if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
|
||||
if (!strcmp(argument, "--dictID")) { nextArgumentIsDictID=1; lastCommand=1; continue; } /* kept available for compatibility with old syntax ; will be removed one day */
|
||||
if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(prefs, 0); continue; }
|
||||
if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(prefs, 0); continue; }
|
||||
if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; }
|
||||
if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; }
|
||||
if (!strcmp(argument, "--output-dir-flat")) {nextArgumentIsOutDirName=1; lastCommand=1; continue; }
|
||||
if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }
|
||||
if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) CLEAN_RETURN(badusage(programName)); continue; }
|
||||
if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
|
||||
if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; FIO_setCompressionType(prefs, FIO_zstdCompression); continue; }
|
||||
#ifdef ZSTD_GZCOMPRESS
|
||||
if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); continue; }
|
||||
if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); continue; }
|
||||
#endif
|
||||
#ifdef ZSTD_LZMACOMPRESS
|
||||
if (!strcmp(argument, "--format=lzma")) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression); continue; }
|
||||
if (!strcmp(argument, "--format=xz")) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression); continue; }
|
||||
if (!strcmp(argument, "--format=lzma")) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression); continue; }
|
||||
if (!strcmp(argument, "--format=xz")) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression); continue; }
|
||||
#endif
|
||||
#ifdef ZSTD_LZ4COMPRESS
|
||||
if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression); continue; }
|
||||
if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression); continue; }
|
||||
#endif
|
||||
if (!strcmp(argument, "--rsyncable")) { rsyncable = 1; continue; }
|
||||
if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_lcm_huffman; continue; }
|
||||
if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; }
|
||||
if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; }
|
||||
if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; }
|
||||
/* long commands with arguments */
|
||||
if (!strcmp(argument, "--rsyncable")) { rsyncable = 1; continue; }
|
||||
if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_lcm_huffman; continue; }
|
||||
if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; }
|
||||
if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; }
|
||||
if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; }
|
||||
/* long commands with arguments */
|
||||
#ifndef ZSTD_NODICT
|
||||
if (longCommandWArg(&argument, "--train-cover")) {
|
||||
operation = zom_train;
|
||||
if (outFileName == NULL)
|
||||
outFileName = g_defaultDictName;
|
||||
dict = cover;
|
||||
/* Allow optional arguments following an = */
|
||||
if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
|
||||
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
|
||||
else if (!parseCoverParameters(argument, &coverParams)) { CLEAN_RETURN(badusage(programName)); }
|
||||
continue;
|
||||
}
|
||||
if (longCommandWArg(&argument, "--train-fastcover")) {
|
||||
operation = zom_train;
|
||||
if (outFileName == NULL)
|
||||
outFileName = g_defaultDictName;
|
||||
dict = fastCover;
|
||||
/* Allow optional arguments following an = */
|
||||
if (*argument == 0) { memset(&fastCoverParams, 0, sizeof(fastCoverParams)); }
|
||||
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
|
||||
else if (!parseFastCoverParameters(argument, &fastCoverParams)) { CLEAN_RETURN(badusage(programName)); }
|
||||
continue;
|
||||
}
|
||||
if (longCommandWArg(&argument, "--train-legacy")) {
|
||||
operation = zom_train;
|
||||
if (outFileName == NULL)
|
||||
outFileName = g_defaultDictName;
|
||||
dict = legacy;
|
||||
/* Allow optional arguments following an = */
|
||||
if (*argument == 0) { continue; }
|
||||
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
|
||||
else if (!parseLegacyParameters(argument, &dictSelect)) { CLEAN_RETURN(badusage(programName)); }
|
||||
continue;
|
||||
}
|
||||
if (longCommandWArg(&argument, "--train-cover")) {
|
||||
operation = zom_train;
|
||||
if (outFileName == NULL)
|
||||
outFileName = g_defaultDictName;
|
||||
dict = cover;
|
||||
/* Allow optional arguments following an = */
|
||||
if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
|
||||
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
|
||||
else if (!parseCoverParameters(argument, &coverParams)) { CLEAN_RETURN(badusage(programName)); }
|
||||
continue;
|
||||
}
|
||||
if (longCommandWArg(&argument, "--train-fastcover")) {
|
||||
operation = zom_train;
|
||||
if (outFileName == NULL)
|
||||
outFileName = g_defaultDictName;
|
||||
dict = fastCover;
|
||||
/* Allow optional arguments following an = */
|
||||
if (*argument == 0) { memset(&fastCoverParams, 0, sizeof(fastCoverParams)); }
|
||||
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
|
||||
else if (!parseFastCoverParameters(argument, &fastCoverParams)) { CLEAN_RETURN(badusage(programName)); }
|
||||
continue;
|
||||
}
|
||||
if (longCommandWArg(&argument, "--train-legacy")) {
|
||||
operation = zom_train;
|
||||
if (outFileName == NULL)
|
||||
outFileName = g_defaultDictName;
|
||||
dict = legacy;
|
||||
/* Allow optional arguments following an = */
|
||||
if (*argument == 0) { continue; }
|
||||
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
|
||||
else if (!parseLegacyParameters(argument, &dictSelect)) { CLEAN_RETURN(badusage(programName)); }
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
if (longCommandWArg(&argument, "--threads=")) { nbWorkers = (int)readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--memlimit=")) { memLimit = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--memory=")) { memLimit = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--memlimit-decompress=")) { memLimit = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--block-size=")) { blockSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
|
||||
if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--output-dir-flat=")) { outDirName = argument; continue; }
|
||||
if (longCommandWArg(&argument, "--long")) {
|
||||
unsigned ldmWindowLog = 0;
|
||||
ldmFlag = 1;
|
||||
/* Parse optional window log */
|
||||
if (*argument == '=') {
|
||||
++argument;
|
||||
ldmWindowLog = readU32FromChar(&argument);
|
||||
} else if (*argument != 0) {
|
||||
/* Invalid character following --long */
|
||||
CLEAN_RETURN(badusage(programName));
|
||||
}
|
||||
/* Only set windowLog if not already set by --zstd */
|
||||
if (compressionParams.windowLog == 0)
|
||||
compressionParams.windowLog = ldmWindowLog;
|
||||
continue;
|
||||
if (longCommandWArg(&argument, "--threads=")) { nbWorkers = (int)readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--memlimit=")) { memLimit = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--memory=")) { memLimit = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--memlimit-decompress=")) { memLimit = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--block-size=")) { blockSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
|
||||
if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--output-dir-flat=")) { outDirName = argument; continue; }
|
||||
if (longCommandWArg(&argument, "--long")) {
|
||||
unsigned ldmWindowLog = 0;
|
||||
ldmFlag = 1;
|
||||
/* Parse optional window log */
|
||||
if (*argument == '=') {
|
||||
++argument;
|
||||
ldmWindowLog = readU32FromChar(&argument);
|
||||
} else if (*argument != 0) {
|
||||
/* Invalid character following --long */
|
||||
CLEAN_RETURN(badusage(programName));
|
||||
}
|
||||
/* Only set windowLog if not already set by --zstd */
|
||||
if (compressionParams.windowLog == 0)
|
||||
compressionParams.windowLog = ldmWindowLog;
|
||||
continue;
|
||||
}
|
||||
#ifndef ZSTD_NOCOMPRESS /* linking ZSTD_minCLevel() requires compression support */
|
||||
if (longCommandWArg(&argument, "--fast")) {
|
||||
/* Parse optional acceleration factor */
|
||||
if (*argument == '=') {
|
||||
U32 const maxFast = (U32)-ZSTD_minCLevel();
|
||||
U32 fastLevel;
|
||||
++argument;
|
||||
fastLevel = readU32FromChar(&argument);
|
||||
if (fastLevel > maxFast) fastLevel = maxFast;
|
||||
if (fastLevel) {
|
||||
dictCLevel = cLevel = -(int)fastLevel;
|
||||
} else {
|
||||
CLEAN_RETURN(badusage(programName));
|
||||
}
|
||||
} else if (*argument != 0) {
|
||||
/* Invalid character following --fast */
|
||||
CLEAN_RETURN(badusage(programName));
|
||||
if (longCommandWArg(&argument, "--fast")) {
|
||||
/* Parse optional acceleration factor */
|
||||
if (*argument == '=') {
|
||||
U32 const maxFast = (U32)-ZSTD_minCLevel();
|
||||
U32 fastLevel;
|
||||
++argument;
|
||||
fastLevel = readU32FromChar(&argument);
|
||||
if (fastLevel > maxFast) fastLevel = maxFast;
|
||||
if (fastLevel) {
|
||||
dictCLevel = cLevel = -(int)fastLevel;
|
||||
} else {
|
||||
cLevel = -1; /* default for --fast */
|
||||
CLEAN_RETURN(badusage(programName));
|
||||
}
|
||||
continue;
|
||||
} else if (*argument != 0) {
|
||||
/* Invalid character following --fast */
|
||||
CLEAN_RETURN(badusage(programName));
|
||||
} else {
|
||||
cLevel = -1; /* default for --fast */
|
||||
}
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
/* fall-through, will trigger bad_usage() later on */
|
||||
|
||||
if (longCommandWArg(&argument, "--filelist=")) {
|
||||
UTIL_refFilename(file_of_names, argument);
|
||||
continue;
|
||||
}
|
||||
|
||||
argument++;
|
||||
while (argument[0]!=0) {
|
||||
if (lastCommand) {
|
||||
DISPLAY("error : command must be followed by argument \n");
|
||||
CLEAN_RETURN(1);
|
||||
}
|
||||
/* fall-through, will trigger bad_usage() later on */
|
||||
}
|
||||
|
||||
argument++;
|
||||
while (argument[0]!=0) {
|
||||
if (lastCommand) {
|
||||
DISPLAY("error : command must be followed by argument \n");
|
||||
CLEAN_RETURN(1);
|
||||
}
|
||||
#ifndef ZSTD_NOCOMPRESS
|
||||
/* compression Level */
|
||||
if ((*argument>='0') && (*argument<='9')) {
|
||||
dictCLevel = cLevel = (int)readU32FromChar(&argument);
|
||||
continue;
|
||||
}
|
||||
/* compression Level */
|
||||
if ((*argument>='0') && (*argument<='9')) {
|
||||
dictCLevel = cLevel = (int)readU32FromChar(&argument);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
switch(argument[0])
|
||||
{
|
||||
/* Display help */
|
||||
case 'V': g_displayOut=stdout; printVersion(); CLEAN_RETURN(0); /* Version Only */
|
||||
case 'H':
|
||||
case 'h': g_displayOut=stdout; CLEAN_RETURN(usage_advanced(programName));
|
||||
switch(argument[0])
|
||||
{
|
||||
/* Display help */
|
||||
case 'V': g_displayOut=stdout; printVersion(); CLEAN_RETURN(0); /* Version Only */
|
||||
case 'H':
|
||||
case 'h': g_displayOut=stdout; CLEAN_RETURN(usage_advanced(programName));
|
||||
|
||||
/* Compress */
|
||||
case 'z': operation=zom_compress; argument++; break;
|
||||
/* Compress */
|
||||
case 'z': operation=zom_compress; argument++; break;
|
||||
|
||||
/* Decoding */
|
||||
case 'd':
|
||||
/* Decoding */
|
||||
case 'd':
|
||||
#ifndef ZSTD_NOBENCH
|
||||
benchParams.mode = BMK_decodeOnly;
|
||||
if (operation==zom_bench) { argument++; break; } /* benchmark decode (hidden option) */
|
||||
benchParams.mode = BMK_decodeOnly;
|
||||
if (operation==zom_bench) { argument++; break; } /* benchmark decode (hidden option) */
|
||||
#endif
|
||||
operation=zom_decompress; argument++; break;
|
||||
operation=zom_decompress; argument++; break;
|
||||
|
||||
/* Force stdout, even if stdout==console */
|
||||
case 'c': forceStdout=1; outFileName=stdoutmark; argument++; break;
|
||||
/* Force stdout, even if stdout==console */
|
||||
case 'c': forceStdout=1; outFileName=stdoutmark; argument++; break;
|
||||
|
||||
/* Use file content as dictionary */
|
||||
case 'D': nextEntryIsDictionary = 1; lastCommand = 1; argument++; break;
|
||||
/* Use file content as dictionary */
|
||||
case 'D': nextEntryIsDictionary = 1; lastCommand = 1; argument++; break;
|
||||
|
||||
/* Overwrite */
|
||||
case 'f': FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; argument++; break;
|
||||
/* Overwrite */
|
||||
case 'f': FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; argument++; break;
|
||||
|
||||
/* Verbose mode */
|
||||
case 'v': g_displayLevel++; argument++; break;
|
||||
/* Verbose mode */
|
||||
case 'v': g_displayLevel++; argument++; break;
|
||||
|
||||
/* Quiet mode */
|
||||
case 'q': g_displayLevel--; argument++; break;
|
||||
/* Quiet mode */
|
||||
case 'q': g_displayLevel--; argument++; break;
|
||||
|
||||
/* keep source file (default) */
|
||||
case 'k': FIO_setRemoveSrcFile(prefs, 0); argument++; break;
|
||||
/* keep source file (default) */
|
||||
case 'k': FIO_setRemoveSrcFile(prefs, 0); argument++; break;
|
||||
|
||||
/* Checksum */
|
||||
case 'C': FIO_setChecksumFlag(prefs, 2); argument++; break;
|
||||
/* Checksum */
|
||||
case 'C': FIO_setChecksumFlag(prefs, 2); argument++; break;
|
||||
|
||||
/* test compressed file */
|
||||
case 't': operation=zom_test; argument++; break;
|
||||
/* test compressed file */
|
||||
case 't': operation=zom_test; argument++; break;
|
||||
|
||||
/* destination file name */
|
||||
case 'o': nextArgumentIsOutFileName=1; lastCommand=1; argument++; break;
|
||||
/* destination file name */
|
||||
case 'o': nextArgumentIsOutFileName=1; lastCommand=1; argument++; break;
|
||||
|
||||
/* limit decompression memory */
|
||||
case 'M':
|
||||
argument++;
|
||||
memLimit = readU32FromChar(&argument);
|
||||
break;
|
||||
case 'l': operation=zom_list; argument++; break;
|
||||
/* limit decompression memory */
|
||||
case 'M':
|
||||
argument++;
|
||||
memLimit = readU32FromChar(&argument);
|
||||
break;
|
||||
case 'l': operation=zom_list; argument++; break;
|
||||
#ifdef UTIL_HAS_CREATEFILELIST
|
||||
/* recursive */
|
||||
case 'r': recursive=1; argument++; break;
|
||||
/* recursive */
|
||||
case 'r': recursive=1; argument++; break;
|
||||
#endif
|
||||
|
||||
#ifndef ZSTD_NOBENCH
|
||||
/* Benchmark */
|
||||
case 'b':
|
||||
operation=zom_bench;
|
||||
argument++;
|
||||
break;
|
||||
/* Benchmark */
|
||||
case 'b':
|
||||
operation=zom_bench;
|
||||
argument++;
|
||||
break;
|
||||
|
||||
/* range bench (benchmark only) */
|
||||
case 'e':
|
||||
/* compression Level */
|
||||
argument++;
|
||||
cLevelLast = (int)readU32FromChar(&argument);
|
||||
break;
|
||||
/* range bench (benchmark only) */
|
||||
case 'e':
|
||||
/* compression Level */
|
||||
argument++;
|
||||
cLevelLast = (int)readU32FromChar(&argument);
|
||||
break;
|
||||
|
||||
/* Modify Nb Iterations (benchmark only) */
|
||||
case 'i':
|
||||
argument++;
|
||||
bench_nbSeconds = readU32FromChar(&argument);
|
||||
break;
|
||||
/* Modify Nb Iterations (benchmark only) */
|
||||
case 'i':
|
||||
argument++;
|
||||
bench_nbSeconds = readU32FromChar(&argument);
|
||||
break;
|
||||
|
||||
/* cut input into blocks (benchmark only) */
|
||||
case 'B':
|
||||
argument++;
|
||||
blockSize = readU32FromChar(&argument);
|
||||
break;
|
||||
/* cut input into blocks (benchmark only) */
|
||||
case 'B':
|
||||
argument++;
|
||||
blockSize = readU32FromChar(&argument);
|
||||
break;
|
||||
|
||||
/* benchmark files separately (hidden option) */
|
||||
case 'S':
|
||||
argument++;
|
||||
separateFiles = 1;
|
||||
break;
|
||||
/* benchmark files separately (hidden option) */
|
||||
case 'S':
|
||||
argument++;
|
||||
separateFiles = 1;
|
||||
break;
|
||||
|
||||
#endif /* ZSTD_NOBENCH */
|
||||
|
||||
/* nb of threads (hidden option) */
|
||||
case 'T':
|
||||
argument++;
|
||||
nbWorkers = (int)readU32FromChar(&argument);
|
||||
break;
|
||||
|
||||
/* Dictionary Selection level */
|
||||
case 's':
|
||||
argument++;
|
||||
dictSelect = readU32FromChar(&argument);
|
||||
break;
|
||||
|
||||
/* Pause at the end (-p) or set an additional param (-p#) (hidden option) */
|
||||
case 'p': argument++;
|
||||
#ifndef ZSTD_NOBENCH
|
||||
if ((*argument>='0') && (*argument<='9')) {
|
||||
benchParams.additionalParam = (int)readU32FromChar(&argument);
|
||||
} else
|
||||
#endif
|
||||
main_pause=1;
|
||||
break;
|
||||
|
||||
/* Select compressibility of synthetic sample */
|
||||
case 'P':
|
||||
{ argument++;
|
||||
compressibility = (double)readU32FromChar(&argument) / 100;
|
||||
}
|
||||
/* nb of threads (hidden option) */
|
||||
case 'T':
|
||||
argument++;
|
||||
nbWorkers = (int)readU32FromChar(&argument);
|
||||
break;
|
||||
|
||||
/* unknown command */
|
||||
default : CLEAN_RETURN(badusage(programName));
|
||||
}
|
||||
/* Dictionary Selection level */
|
||||
case 's':
|
||||
argument++;
|
||||
dictSelect = readU32FromChar(&argument);
|
||||
break;
|
||||
|
||||
/* Pause at the end (-p) or set an additional param (-p#) (hidden option) */
|
||||
case 'p': argument++;
|
||||
#ifndef ZSTD_NOBENCH
|
||||
if ((*argument>='0') && (*argument<='9')) {
|
||||
benchParams.additionalParam = (int)readU32FromChar(&argument);
|
||||
} else
|
||||
#endif
|
||||
main_pause=1;
|
||||
break;
|
||||
|
||||
/* Select compressibility of synthetic sample */
|
||||
case 'P':
|
||||
{ argument++;
|
||||
compressibility = (double)readU32FromChar(&argument) / 100;
|
||||
}
|
||||
continue;
|
||||
} /* if (argument[0]=='-') */
|
||||
break;
|
||||
|
||||
if (nextArgumentIsMaxDict) { /* kept available for compatibility with old syntax ; will be removed one day */
|
||||
nextArgumentIsMaxDict = 0;
|
||||
lastCommand = 0;
|
||||
maxDictSize = readU32FromChar(&argument);
|
||||
continue;
|
||||
/* unknown command */
|
||||
default : CLEAN_RETURN(badusage(programName));
|
||||
}
|
||||
}
|
||||
continue;
|
||||
} /* if (argument[0]=='-') */
|
||||
|
||||
if (nextArgumentIsDictID) { /* kept available for compatibility with old syntax ; will be removed one day */
|
||||
nextArgumentIsDictID = 0;
|
||||
lastCommand = 0;
|
||||
dictID = readU32FromChar(&argument);
|
||||
continue;
|
||||
}
|
||||
if (nextArgumentIsMaxDict) { /* kept available for compatibility with old syntax ; will be removed one day */
|
||||
nextArgumentIsMaxDict = 0;
|
||||
lastCommand = 0;
|
||||
maxDictSize = readU32FromChar(&argument);
|
||||
continue;
|
||||
}
|
||||
|
||||
} /* if (nextArgumentIsAFile==0) */
|
||||
if (nextArgumentIsDictID) { /* kept available for compatibility with old syntax ; will be removed one day */
|
||||
nextArgumentIsDictID = 0;
|
||||
lastCommand = 0;
|
||||
dictID = readU32FromChar(&argument);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (nextEntryIsDictionary) {
|
||||
nextEntryIsDictionary = 0;
|
||||
|
@ -978,8 +977,8 @@ int main(int argCount, const char* argv[])
|
|||
continue;
|
||||
}
|
||||
|
||||
/* add filename to list */
|
||||
filenameTable[filenameIdx++] = argument;
|
||||
/* none of the above : add filename to list */
|
||||
UTIL_refFilename(filenames, argument);
|
||||
}
|
||||
|
||||
if (lastCommand) { /* forgotten argument */
|
||||
|
@ -1003,35 +1002,45 @@ int main(int argCount, const char* argv[])
|
|||
#ifdef UTIL_HAS_CREATEFILELIST
|
||||
g_utilDisplayLevel = g_displayLevel;
|
||||
if (!followLinks) {
|
||||
unsigned u;
|
||||
for (u=0, fileNamesNb=0; u<filenameIdx; u++) {
|
||||
if ( UTIL_isLink(filenameTable[u])
|
||||
&& !UTIL_isFIFO(filenameTable[u])
|
||||
unsigned u, fileNamesNb;
|
||||
unsigned const nbFilenames = (unsigned)filenames->tableSize;
|
||||
for (u=0, fileNamesNb=0; u<nbFilenames; u++) {
|
||||
if ( UTIL_isLink(filenames->fileNames[u])
|
||||
&& !UTIL_isFIFO(filenames->fileNames[u])
|
||||
) {
|
||||
DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring\n", filenameTable[u]);
|
||||
DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring \n", filenames->fileNames[u]);
|
||||
} else {
|
||||
filenameTable[fileNamesNb++] = filenameTable[u];
|
||||
filenames->fileNames[fileNamesNb++] = filenames->fileNames[u];
|
||||
} }
|
||||
if (fileNamesNb == 0 && filenameIdx > 0)
|
||||
if (fileNamesNb == 0 && nbFilenames > 0) /* all names are eliminated */
|
||||
CLEAN_RETURN(1);
|
||||
filenameIdx = fileNamesNb;
|
||||
filenames->tableSize = fileNamesNb;
|
||||
} /* if (!followLinks) */
|
||||
|
||||
/* read names from a file */
|
||||
if (file_of_names->tableSize) {
|
||||
size_t const nbFileLists = file_of_names->tableSize;
|
||||
size_t flNb;
|
||||
for (flNb=0; flNb < nbFileLists; flNb++) {
|
||||
FileNamesTable* const fnt = UTIL_createFileNamesTable_fromFileName(file_of_names->fileNames[flNb]);
|
||||
if (fnt==NULL) {
|
||||
DISPLAYLEVEL(1, "zstd: error reading %s \n", file_of_names->fileNames[flNb]);
|
||||
CLEAN_RETURN(1);
|
||||
}
|
||||
filenames = UTIL_mergeFileNamesTable(filenames, fnt);
|
||||
}
|
||||
}
|
||||
|
||||
if (recursive) { /* at this stage, filenameTable is a list of paths, which can contain both files and directories */
|
||||
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf, &fileNamesNb, followLinks);
|
||||
if (extendedFileList) {
|
||||
unsigned u;
|
||||
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
|
||||
free((void*)filenameTable);
|
||||
filenameTable = extendedFileList;
|
||||
filenameIdx = fileNamesNb;
|
||||
} }
|
||||
UTIL_expandFNT(&filenames, followLinks);
|
||||
}
|
||||
#else
|
||||
(void)followLinks;
|
||||
#endif
|
||||
|
||||
if (operation == zom_list) {
|
||||
#ifndef ZSTD_NODECOMPRESS
|
||||
int const ret = FIO_listMultipleFiles(filenameIdx, filenameTable, g_displayLevel);
|
||||
int const ret = FIO_listMultipleFiles((unsigned)filenames->tableSize, filenames->fileNames, g_displayLevel);
|
||||
CLEAN_RETURN(ret);
|
||||
#else
|
||||
DISPLAY("file information is not supported \n");
|
||||
|
@ -1062,18 +1071,18 @@ int main(int argCount, const char* argv[])
|
|||
if (cLevelLast < cLevel) cLevelLast = cLevel;
|
||||
if (cLevelLast > cLevel)
|
||||
DISPLAYLEVEL(3, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
|
||||
if(filenameIdx) {
|
||||
if (filenames->tableSize > 0) {
|
||||
if(separateFiles) {
|
||||
unsigned i;
|
||||
for(i = 0; i < filenameIdx; i++) {
|
||||
for(i = 0; i < filenames->tableSize; i++) {
|
||||
int c;
|
||||
DISPLAYLEVEL(3, "Benchmarking %s \n", filenameTable[i]);
|
||||
DISPLAYLEVEL(3, "Benchmarking %s \n", filenames->fileNames[i]);
|
||||
for(c = cLevel; c <= cLevelLast; c++) {
|
||||
BMK_benchFilesAdvanced(&filenameTable[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams);
|
||||
BMK_benchFilesAdvanced(&filenames->fileNames[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams);
|
||||
} }
|
||||
} else {
|
||||
for(; cLevel <= cLevelLast; cLevel++) {
|
||||
BMK_benchFilesAdvanced(filenameTable, filenameIdx, dictFileName, cLevel, &compressionParams, g_displayLevel, &benchParams);
|
||||
BMK_benchFilesAdvanced(filenames->fileNames, (unsigned)filenames->tableSize, dictFileName, cLevel, &compressionParams, g_displayLevel, &benchParams);
|
||||
} }
|
||||
} else {
|
||||
for(; cLevel <= cLevelLast; cLevel++) {
|
||||
|
@ -1097,18 +1106,18 @@ int main(int argCount, const char* argv[])
|
|||
int const optimize = !coverParams.k || !coverParams.d;
|
||||
coverParams.nbThreads = (unsigned)nbWorkers;
|
||||
coverParams.zParams = zParams;
|
||||
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, &coverParams, NULL, optimize);
|
||||
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (unsigned)filenames->tableSize, blockSize, NULL, &coverParams, NULL, optimize);
|
||||
} else if (dict == fastCover) {
|
||||
int const optimize = !fastCoverParams.k || !fastCoverParams.d;
|
||||
fastCoverParams.nbThreads = (unsigned)nbWorkers;
|
||||
fastCoverParams.zParams = zParams;
|
||||
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, NULL, NULL, &fastCoverParams, optimize);
|
||||
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (unsigned)filenames->tableSize, blockSize, NULL, NULL, &fastCoverParams, optimize);
|
||||
} else {
|
||||
ZDICT_legacy_params_t dictParams;
|
||||
memset(&dictParams, 0, sizeof(dictParams));
|
||||
dictParams.selectivityLevel = dictSelect;
|
||||
dictParams.zParams = zParams;
|
||||
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, blockSize, &dictParams, NULL, NULL, 0);
|
||||
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (unsigned)filenames->tableSize, blockSize, &dictParams, NULL, NULL, 0);
|
||||
}
|
||||
#else
|
||||
(void)dictCLevel; (void)dictSelect; (void)dictID; (void)maxDictSize; /* not used when ZSTD_NODICT set */
|
||||
|
@ -1123,16 +1132,16 @@ int main(int argCount, const char* argv[])
|
|||
#endif
|
||||
|
||||
/* No input filename ==> use stdin and stdout */
|
||||
filenameIdx += !filenameIdx; /* filenameTable[0] is stdin by default */
|
||||
if (!strcmp(filenameTable[0], stdinmark) && !outFileName)
|
||||
if (filenames->tableSize == 0) UTIL_refFilename(filenames, stdinmark);
|
||||
if (!strcmp(filenames->fileNames[0], stdinmark) && !outFileName)
|
||||
outFileName = stdoutmark; /* when input is stdin, default output is stdout */
|
||||
|
||||
/* Check if input/output defined as console; trigger an error in this case */
|
||||
if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) )
|
||||
if (!strcmp(filenames->fileNames[0], stdinmark) && IS_CONSOLE(stdin) )
|
||||
CLEAN_RETURN(badusage(programName));
|
||||
if ( outFileName && !strcmp(outFileName, stdoutmark)
|
||||
&& IS_CONSOLE(stdout)
|
||||
&& !strcmp(filenameTable[0], stdinmark)
|
||||
&& !strcmp(filenames->fileNames[0], stdinmark)
|
||||
&& !forceStdout
|
||||
&& operation!=zom_decompress )
|
||||
CLEAN_RETURN(badusage(programName));
|
||||
|
@ -1147,8 +1156,8 @@ int main(int argCount, const char* argv[])
|
|||
#endif
|
||||
|
||||
/* No status message in pipe mode (stdin - stdout) or multi-files mode */
|
||||
if (!strcmp(filenameTable[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (g_displayLevel==2)) g_displayLevel=1;
|
||||
if ((filenameIdx>1) & (g_displayLevel==2)) g_displayLevel=1;
|
||||
if (!strcmp(filenames->fileNames[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (g_displayLevel==2)) g_displayLevel=1;
|
||||
if ((filenames->tableSize > 1) & (g_displayLevel==2)) g_displayLevel=1;
|
||||
|
||||
/* IO Stream/File */
|
||||
FIO_setNotificationLevel(g_displayLevel);
|
||||
|
@ -1173,10 +1182,10 @@ int main(int argCount, const char* argv[])
|
|||
if (adaptMin > cLevel) cLevel = adaptMin;
|
||||
if (adaptMax < cLevel) cLevel = adaptMax;
|
||||
|
||||
if ((filenameIdx==1) && outFileName)
|
||||
operationResult = FIO_compressFilename(prefs, outFileName, filenameTable[0], dictFileName, cLevel, compressionParams);
|
||||
if ((filenames->tableSize==1) && outFileName)
|
||||
operationResult = FIO_compressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName, cLevel, compressionParams);
|
||||
else
|
||||
operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams);
|
||||
operationResult = FIO_compressMultipleFilenames(prefs, filenames->fileNames, (unsigned)filenames->tableSize, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams);
|
||||
#else
|
||||
(void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */
|
||||
DISPLAY("Compression not supported \n");
|
||||
|
@ -1191,10 +1200,11 @@ int main(int argCount, const char* argv[])
|
|||
}
|
||||
}
|
||||
FIO_setMemLimit(prefs, memLimit);
|
||||
if (filenameIdx==1 && outFileName)
|
||||
operationResult = FIO_decompressFilename(prefs, outFileName, filenameTable[0], dictFileName);
|
||||
else
|
||||
operationResult = FIO_decompressMultipleFilenames(prefs, filenameTable, filenameIdx, outDirName, outFileName, dictFileName);
|
||||
if (filenames->tableSize == 1 && outFileName) {
|
||||
operationResult = FIO_decompressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName);
|
||||
} else {
|
||||
operationResult = FIO_decompressMultipleFilenames(prefs, filenames->fileNames, (unsigned)filenames->tableSize, outDirName, outFileName, dictFileName);
|
||||
}
|
||||
#else
|
||||
DISPLAY("Decompression not supported \n");
|
||||
#endif
|
||||
|
@ -1202,13 +1212,9 @@ int main(int argCount, const char* argv[])
|
|||
|
||||
_end:
|
||||
FIO_freePreferences(prefs);
|
||||
|
||||
if (main_pause) waitEnter();
|
||||
#ifdef UTIL_HAS_CREATEFILELIST
|
||||
if (extendedFileList)
|
||||
UTIL_freeFileList(extendedFileList, fileNamesBuf);
|
||||
else
|
||||
#endif
|
||||
free((void*)filenameTable);
|
||||
UTIL_freeFileNamesTable(filenames);
|
||||
UTIL_freeFileNamesTable(file_of_names);
|
||||
|
||||
return operationResult;
|
||||
}
|
||||
|
|
|
@ -18,24 +18,26 @@
|
|||
int main(int argc, char const **argv) {
|
||||
size_t const kMaxFileSize = (size_t)1 << 27;
|
||||
int const kFollowLinks = 1;
|
||||
char *fileNamesBuf = NULL;
|
||||
char const **files = argv + 1;
|
||||
unsigned numFiles = argc - 1;
|
||||
FileNamesTable* files;
|
||||
const char** const fnTable = argv + 1;
|
||||
unsigned numFiles = (unsigned)(argc - 1);
|
||||
uint8_t *buffer = NULL;
|
||||
size_t bufferSize = 0;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
#ifdef UTIL_HAS_CREATEFILELIST
|
||||
files = UTIL_createFileList(files, numFiles, &fileNamesBuf, &numFiles,
|
||||
kFollowLinks);
|
||||
if (!files)
|
||||
numFiles = 0;
|
||||
files = UTIL_createExpandedFNT(fnTable, numFiles, kFollowLinks);
|
||||
if (!files) numFiles = 0;
|
||||
#else
|
||||
files = UTIL_createFNT_fromROTable(fnTable, numFiles);
|
||||
if (!files) numFiles = 0;
|
||||
assert(numFiles == files->tableSize);
|
||||
#endif
|
||||
if (numFiles == 0)
|
||||
fprintf(stderr, "WARNING: No files passed to %s\n", argv[0]);
|
||||
for (i = 0; i < numFiles; ++i) {
|
||||
char const *fileName = files[i];
|
||||
char const *fileName = files->fileNames[i];
|
||||
DEBUGLOG(3, "Running %s", fileName);
|
||||
size_t const fileSize = UTIL_getFileSize(fileName);
|
||||
size_t readSize;
|
||||
|
@ -70,8 +72,6 @@ int main(int argc, char const **argv) {
|
|||
|
||||
ret = 0;
|
||||
free(buffer);
|
||||
#ifdef UTIL_HAS_CREATEFILELIST
|
||||
UTIL_freeFileList(files, fileNamesBuf);
|
||||
#endif
|
||||
UTIL_freeFileNamesTable(files);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -114,6 +114,7 @@ else
|
|||
fi
|
||||
|
||||
|
||||
|
||||
println "\n===> simple tests "
|
||||
|
||||
./datagen > tmp
|
||||
|
@ -277,6 +278,7 @@ $ZSTD -f tmp && die "attempt to compress a non existing file"
|
|||
test -f tmp.zst # destination file should still be present
|
||||
rm -rf tmp* # may also erase tmp* directory from previous failed run
|
||||
|
||||
|
||||
println "\n===> decompression only tests "
|
||||
# the following test verifies that the decoder is compatible with RLE as first block
|
||||
# older versions of zstd cli are not able to decode such corner case.
|
||||
|
@ -286,7 +288,8 @@ $ZSTD -d -o tmp1 "$TESTDIR/golden-decompression/rle-first-block.zst"
|
|||
$DIFF -s tmp1 tmp
|
||||
rm tmp*
|
||||
|
||||
println "\m===> compress multiple files"
|
||||
|
||||
println "\n===> compress multiple files"
|
||||
println hello > tmp1
|
||||
println world > tmp2
|
||||
$ZSTD tmp1 tmp2 -o "$INTOVOID" -f
|
||||
|
@ -306,10 +309,47 @@ $ZSTD tmp1 tmp2 -o tmpexists && die "should have refused to overwrite"
|
|||
if [ "$?" -eq 139 ]; then
|
||||
die "should not have segfaulted"
|
||||
fi
|
||||
println "\n===> multiple files and shell completion "
|
||||
./datagen -s1 > tmp1 2> $INTOVOID
|
||||
./datagen -s2 -g100K > tmp2 2> $INTOVOID
|
||||
./datagen -s3 -g1M > tmp3 2> $INTOVOID
|
||||
println "compress tmp* : "
|
||||
$ZSTD -f tmp*
|
||||
test -f tmp1.zst
|
||||
test -f tmp2.zst
|
||||
test -f tmp3.zst
|
||||
rm tmp1 tmp2 tmp3
|
||||
println "decompress tmp* : "
|
||||
$ZSTD -df ./*.zst
|
||||
test -f tmp1
|
||||
test -f tmp2
|
||||
test -f tmp3
|
||||
println "compress tmp* into stdout > tmpall : "
|
||||
$ZSTD -c tmp1 tmp2 tmp3 > tmpall
|
||||
test -f tmpall # should check size of tmpall (should be tmp1.zst + tmp2.zst + tmp3.zst)
|
||||
println "decompress tmpall* into stdout > tmpdec : "
|
||||
cp tmpall tmpall2
|
||||
$ZSTD -dc tmpall* > tmpdec
|
||||
test -f tmpdec # should check size of tmpdec (should be 2*(tmp1 + tmp2 + tmp3))
|
||||
println "compress multiple files including a missing one (notHere) : "
|
||||
$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
|
||||
rm tmp*
|
||||
|
||||
if [ -n "$DEVNULLRIGHTS" ]
|
||||
then
|
||||
|
||||
if [ "$isWindows" = false ] ; then
|
||||
println "\n===> zstd fifo named pipe test "
|
||||
echo "Hello World!" > tmp_original
|
||||
mkfifo tmp_named_pipe
|
||||
# note : fifo test doesn't work in combination with `dd` or `cat`
|
||||
echo "Hello World!" > tmp_named_pipe &
|
||||
$ZSTD tmp_named_pipe -o tmp_compressed
|
||||
$ZSTD -d -o tmp_decompressed tmp_compressed
|
||||
$DIFF -s tmp_original tmp_decompressed
|
||||
rm -rf tmp*
|
||||
fi
|
||||
|
||||
|
||||
if [ -n "$DEVNULLRIGHTS" ] ; then
|
||||
# these tests requires sudo rights, which is uncommon.
|
||||
# they are only triggered if DEVNULLRIGHTS macro is defined.
|
||||
println "\n===> checking /dev/null permissions are unaltered "
|
||||
|
@ -322,6 +362,7 @@ then
|
|||
ls -las $INTOVOID | grep "rw-rw-rw-"
|
||||
fi
|
||||
|
||||
|
||||
println "\n===> compress multiple files into an output directory, --output-dir-flat"
|
||||
println henlo > tmp1
|
||||
mkdir tmpInputTestDir
|
||||
|
@ -345,6 +386,68 @@ test -f tmpOutDirDecomp/tmp2
|
|||
test -f tmpOutDirDecomp/tmp1
|
||||
rm -rf tmp*
|
||||
|
||||
|
||||
println "test : compress multiple files reading them from a file, --filelist=FILE"
|
||||
println "Hello world!, file1" > tmp1
|
||||
println "Hello world!, file2" > tmp2
|
||||
println tmp1 > tmp_fileList
|
||||
println tmp2 >> tmp_fileList
|
||||
$ZSTD -f --filelist=tmp_fileList
|
||||
test -f tmp2.zst
|
||||
test -f tmp1.zst
|
||||
|
||||
println "test : reading file list from a symlink, --filelist=FILE"
|
||||
rm -f *.zst
|
||||
ln -s tmp_fileList tmp_symLink
|
||||
$ZSTD -f --filelist=tmp_symLink
|
||||
test -f tmp2.zst
|
||||
test -f tmp1.zst
|
||||
|
||||
println "test : compress multiple files reading them from multiple files, --filelist=FILE"
|
||||
rm -f *.zst
|
||||
println "Hello world!, file3" > tmp3
|
||||
println "Hello world!, file4" > tmp4
|
||||
println tmp3 > tmp_fileList2
|
||||
println tmp4 >> tmp_fileList2
|
||||
$ZSTD -f --filelist=tmp_fileList --filelist=tmp_fileList2
|
||||
test -f tmp1.zst
|
||||
test -f tmp2.zst
|
||||
test -f tmp3.zst
|
||||
test -f tmp4.zst
|
||||
|
||||
println "test : decompress multiple files reading them from a file, --filelist=FILE"
|
||||
rm -f tmp1 tmp2
|
||||
println tmp1.zst > tmpZst
|
||||
println tmp2.zst >> tmpZst
|
||||
$ZSTD -d -f --filelist=tmpZst
|
||||
test -f tmp1
|
||||
test -f tmp2
|
||||
|
||||
println "test : decompress multiple files reading them from multiple files, --filelist=FILE"
|
||||
rm -f tmp1 tmp2 tmp3 tmp4
|
||||
println tmp3.zst > tmpZst2
|
||||
println tmp4.zst >> tmpZst2
|
||||
$ZSTD -d -f --filelist=tmpZst --filelist=tmpZst2
|
||||
test -f tmp1
|
||||
test -f tmp2
|
||||
test -f tmp3
|
||||
test -f tmp4
|
||||
|
||||
println "test : survive a list of files which is text garbage (--filelist=FILE)"
|
||||
./datagen > tmp_badList
|
||||
$ZSTD -f --filelist=tmp_badList && die "should have failed : list is text garbage"
|
||||
|
||||
println "test : survive a list of files which is binary garbage (--filelist=FILE)"
|
||||
./datagen -P0 -g1M > tmp_badList
|
||||
$ZSTD -qq -f --filelist=tmp_badList && die "should have failed : list is binary garbage" # let's avoid printing binary garbage on console
|
||||
|
||||
println "test : try to overflow internal list of files (--filelist=FILE)"
|
||||
touch tmp1 tmp2 tmp3 tmp4 tmp5 tmp6
|
||||
ls tmp* > tmpList
|
||||
$ZSTD -f tmp1 --filelist=tmpList --filelist=tmpList tmp2 tmp3 # can trigger an overflow of internal file list
|
||||
rm -rf tmp*
|
||||
|
||||
|
||||
println "\n===> Advanced compression parameters "
|
||||
println "Hello world!" | $ZSTD --zstd=windowLog=21, - -o tmp.zst && die "wrong parameters not detected!"
|
||||
println "Hello world!" | $ZSTD --zstd=windowLo=21 - -o tmp.zst && die "wrong parameters not detected!"
|
||||
|
@ -464,28 +567,6 @@ $DIFF tmpSparse2M tmpSparseRegenerated
|
|||
rm tmpSparse*
|
||||
|
||||
|
||||
println "\n===> multiple files tests "
|
||||
|
||||
./datagen -s1 > tmp1 2> $INTOVOID
|
||||
./datagen -s2 -g100K > tmp2 2> $INTOVOID
|
||||
./datagen -s3 -g1M > tmp3 2> $INTOVOID
|
||||
println "compress tmp* : "
|
||||
$ZSTD -f tmp*
|
||||
ls -ls tmp*
|
||||
rm tmp1 tmp2 tmp3
|
||||
println "decompress tmp* : "
|
||||
$ZSTD -df ./*.zst
|
||||
ls -ls tmp*
|
||||
println "compress tmp* into stdout > tmpall : "
|
||||
$ZSTD -c tmp1 tmp2 tmp3 > tmpall
|
||||
ls -ls tmp* # check size of tmpall (should be tmp1.zst + tmp2.zst + tmp3.zst)
|
||||
println "decompress tmpall* into stdout > tmpdec : "
|
||||
cp tmpall tmpall2
|
||||
$ZSTD -dc tmpall* > tmpdec
|
||||
ls -ls tmp* # check size of tmpdec (should be 2*(tmp1 + tmp2 + tmp3))
|
||||
println "compress multiple files including a missing one (notHere) : "
|
||||
$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
|
||||
|
||||
println "\n===> stream-size mode"
|
||||
|
||||
./datagen -g11000 > tmp
|
||||
|
@ -726,7 +807,6 @@ $ZSTD -t tmpSplit.* && die "bad file not detected !"
|
|||
./datagen | $ZSTD -c | $ZSTD -t
|
||||
|
||||
|
||||
|
||||
println "\n===> golden files tests "
|
||||
|
||||
$ZSTD -t -r "$TESTDIR/golden-compression"
|
||||
|
@ -748,6 +828,7 @@ println "benchmark decompression only"
|
|||
$ZSTD -f tmp1
|
||||
$ZSTD -b -d -i0 tmp1.zst
|
||||
|
||||
|
||||
println "\n===> zstd compatibility tests "
|
||||
|
||||
./datagen > tmp
|
||||
|
@ -755,6 +836,7 @@ rm -f tmp.zst
|
|||
$ZSTD --format=zstd -f tmp
|
||||
test -f tmp.zst
|
||||
|
||||
|
||||
println "\n===> gzip compatibility tests "
|
||||
|
||||
GZIPMODE=1
|
||||
|
@ -882,9 +964,8 @@ else
|
|||
fi
|
||||
|
||||
|
||||
println "\n===> lz4 frame tests "
|
||||
|
||||
if [ $LZ4MODE -eq 1 ]; then
|
||||
println "\n===> lz4 frame tests "
|
||||
./datagen > tmp
|
||||
$ZSTD -f --format=lz4 tmp
|
||||
$ZSTD -f tmp
|
||||
|
@ -892,9 +973,10 @@ if [ $LZ4MODE -eq 1 ]; then
|
|||
truncateLastByte tmp.lz4 | $ZSTD -t > $INTOVOID && die "incomplete frame not detected !"
|
||||
rm tmp*
|
||||
else
|
||||
println "lz4 mode not supported"
|
||||
println "\nlz4 mode not supported"
|
||||
fi
|
||||
|
||||
|
||||
println "\n===> suffix list test"
|
||||
|
||||
! $ZSTD -d tmp.abc 2> tmplg
|
||||
|
@ -912,6 +994,7 @@ if [ $LZ4MODE -ne 1 ]; then
|
|||
grep ".lz4" tmplg > $INTOVOID && die "Unsupported suffix listed"
|
||||
fi
|
||||
|
||||
|
||||
println "\n===> tar extension tests "
|
||||
|
||||
rm -f tmp tmp.tar tmp.tzst tmp.tgz tmp.txz tmp.tlz4
|
||||
|
@ -950,7 +1033,6 @@ touch tmp.t tmp.tz tmp.tzs
|
|||
! $ZSTD -d tmp.tz
|
||||
! $ZSTD -d tmp.tzs
|
||||
|
||||
exit
|
||||
|
||||
println "\n===> zstd round-trip tests "
|
||||
|
||||
|
@ -1205,18 +1287,4 @@ $ZSTD --train-cover "$TESTDIR"/*.c "$PRGDIR"/*.c
|
|||
test -f dictionary
|
||||
rm -f tmp* dictionary
|
||||
|
||||
if [ "$isWindows" = false ] ; then
|
||||
|
||||
println "\n===> zstd fifo named pipe test "
|
||||
dd bs=1 count=10 if=/dev/zero of=tmp_original
|
||||
mkfifo named_pipe
|
||||
dd bs=1 count=10 if=/dev/zero of=named_pipe &
|
||||
$ZSTD named_pipe -o tmp_compressed
|
||||
$ZSTD -d -o tmp_decompressed tmp_compressed
|
||||
$DIFF -s tmp_original tmp_decompressed
|
||||
rm -rf tmp*
|
||||
rm -rf named_pipe
|
||||
|
||||
fi
|
||||
|
||||
rm -f tmp*
|
||||
|
|
|
@ -173,21 +173,10 @@ void data_buffer_free(data_buffer_t buffer) {
|
|||
* data filenames helpers.
|
||||
*/
|
||||
|
||||
data_filenames_t data_filenames_get(data_t const* data) {
|
||||
data_filenames_t filenames = {.buffer = NULL, .size = 0};
|
||||
char const* path = data->data.path;
|
||||
|
||||
filenames.filenames = UTIL_createFileList(
|
||||
&path,
|
||||
1,
|
||||
&filenames.buffer,
|
||||
&filenames.size,
|
||||
/* followLinks */ 0);
|
||||
return filenames;
|
||||
}
|
||||
|
||||
void data_filenames_free(data_filenames_t filenames) {
|
||||
UTIL_freeFileList(filenames.filenames, filenames.buffer);
|
||||
FileNamesTable* data_filenames_get(data_t const* data)
|
||||
{
|
||||
char const* const path = data->data.path;
|
||||
return UTIL_createExpandedFNT(&path, 1, 0 /* followLinks */ );
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -196,26 +185,33 @@ void data_filenames_free(data_filenames_t filenames) {
|
|||
|
||||
data_buffers_t data_buffers_get(data_t const* data) {
|
||||
data_buffers_t buffers = {.size = 0};
|
||||
data_filenames_t filenames = data_filenames_get(data);
|
||||
if (filenames.size == 0)
|
||||
FileNamesTable* const filenames = data_filenames_get(data);
|
||||
if (filenames == NULL) return buffers;
|
||||
if (filenames->tableSize == 0) {
|
||||
UTIL_freeFileNamesTable(filenames);
|
||||
return buffers;
|
||||
}
|
||||
|
||||
data_buffer_t* buffersPtr =
|
||||
(data_buffer_t*)malloc(filenames.size * sizeof(data_buffer_t));
|
||||
if (buffersPtr == NULL)
|
||||
(data_buffer_t*)malloc(filenames->tableSize * sizeof(*buffersPtr));
|
||||
if (buffersPtr == NULL) {
|
||||
UTIL_freeFileNamesTable(filenames);
|
||||
return buffers;
|
||||
}
|
||||
buffers.buffers = (data_buffer_t const*)buffersPtr;
|
||||
buffers.size = filenames.size;
|
||||
buffers.size = filenames->tableSize;
|
||||
|
||||
for (size_t i = 0; i < filenames.size; ++i) {
|
||||
buffersPtr[i] = data_buffer_read(filenames.filenames[i]);
|
||||
for (size_t i = 0; i < filenames->tableSize; ++i) {
|
||||
buffersPtr[i] = data_buffer_read(filenames->fileNames[i]);
|
||||
if (buffersPtr[i].data == NULL) {
|
||||
data_buffers_t const kEmptyBuffer = {};
|
||||
data_buffers_free(buffers);
|
||||
UTIL_freeFileNamesTable(filenames);
|
||||
return kEmptyBuffer;
|
||||
}
|
||||
}
|
||||
|
||||
UTIL_freeFileNamesTable(filenames);
|
||||
return buffers;
|
||||
}
|
||||
|
||||
|
|
|
@ -102,25 +102,6 @@ int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2);
|
|||
*/
|
||||
void data_buffer_free(data_buffer_t buffer);
|
||||
|
||||
typedef struct {
|
||||
char* buffer;
|
||||
char const** filenames;
|
||||
unsigned size;
|
||||
} data_filenames_t;
|
||||
|
||||
/**
|
||||
* Get a recursive list of filenames in the data object. If it is a file, it
|
||||
* will only contain one entry. If it is a directory, it will recursively walk
|
||||
* the directory.
|
||||
*
|
||||
* @returns The list of filenames, which has size 0 and NULL pointers on error.
|
||||
*/
|
||||
data_filenames_t data_filenames_get(data_t const* data);
|
||||
|
||||
/**
|
||||
* Frees the filenames table.
|
||||
*/
|
||||
void data_filenames_free(data_filenames_t filenames);
|
||||
|
||||
typedef struct {
|
||||
data_buffer_t const* buffers;
|
||||
|
|
|
@ -74,7 +74,7 @@ static U32 g_compressibilityDefault = 50;
|
|||
#define DEFAULT_DISPLAY_LEVEL 2
|
||||
#define DISPLAY(...) fprintf(displayOut, __VA_ARGS__)
|
||||
#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||
static int g_displayLevel = DEFAULT_DISPLAY_LEVEL; /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */
|
||||
static unsigned g_displayLevel = DEFAULT_DISPLAY_LEVEL; /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */
|
||||
static FILE* displayOut;
|
||||
|
||||
#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
|
||||
|
@ -848,7 +848,7 @@ static unsigned readU32FromChar(const char** stringPtr)
|
|||
{
|
||||
unsigned result = 0;
|
||||
while ((**stringPtr >='0') && (**stringPtr <='9'))
|
||||
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
|
||||
result *= 10, result += (unsigned)(**stringPtr - '0'), (*stringPtr)++ ;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -865,24 +865,18 @@ int main(int argCount, char** argv)
|
|||
int cLevel = ZSTDCLI_CLEVEL_DEFAULT;
|
||||
int cLevelLast = 1;
|
||||
unsigned recursive = 0;
|
||||
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*)); /* argCount >= 1 */
|
||||
unsigned filenameIdx = 0;
|
||||
FileNamesTable* filenames = UTIL_allocateFileNamesTable((size_t)argCount);
|
||||
const char* programName = argv[0];
|
||||
const char* dictFileName = NULL;
|
||||
char* dynNameSpace = NULL;
|
||||
#ifdef UTIL_HAS_CREATEFILELIST
|
||||
const char** fileNamesTable = NULL;
|
||||
char* fileNamesBuf = NULL;
|
||||
unsigned fileNamesNb;
|
||||
#endif
|
||||
|
||||
/* init */
|
||||
if (filenameTable==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); }
|
||||
if (filenames==NULL) { DISPLAY("zstd: %s \n", strerror(errno)); exit(1); }
|
||||
displayOut = stderr;
|
||||
|
||||
/* Pick out program name from path. Don't rely on stdlib because of conflicting behavior */
|
||||
{ size_t pos;
|
||||
for (pos = (int)strlen(programName); pos > 0; pos--) { if (programName[pos] == '/') { pos++; break; } }
|
||||
for (pos = strlen(programName); pos > 0; pos--) { if (programName[pos] == '/') { pos++; break; } }
|
||||
programName += pos;
|
||||
}
|
||||
|
||||
|
@ -930,14 +924,14 @@ int main(int argCount, char** argv)
|
|||
case 'b':
|
||||
/* first compression Level */
|
||||
argument++;
|
||||
cLevel = readU32FromChar(&argument);
|
||||
cLevel = (int)readU32FromChar(&argument);
|
||||
break;
|
||||
|
||||
/* range bench (benchmark only) */
|
||||
case 'e':
|
||||
/* last compression Level */
|
||||
argument++;
|
||||
cLevelLast = readU32FromChar(&argument);
|
||||
cLevelLast = (int)readU32FromChar(&argument);
|
||||
break;
|
||||
|
||||
/* Modify Nb Iterations (benchmark only) */
|
||||
|
@ -964,7 +958,7 @@ int main(int argCount, char** argv)
|
|||
/* Pause at the end (-p) or set an additional param (-p#) (hidden option) */
|
||||
case 'p': argument++;
|
||||
if ((*argument>='0') && (*argument<='9')) {
|
||||
BMK_setAdditionalParam(readU32FromChar(&argument));
|
||||
BMK_setAdditionalParam((int)readU32FromChar(&argument));
|
||||
} else
|
||||
main_pause=1;
|
||||
break;
|
||||
|
@ -984,7 +978,7 @@ int main(int argCount, char** argv)
|
|||
}
|
||||
|
||||
/* add filename to list */
|
||||
filenameTable[filenameIdx++] = argument;
|
||||
UTIL_refFilename(filenames, argument);
|
||||
}
|
||||
|
||||
/* Welcome message (if verbose) */
|
||||
|
@ -992,28 +986,16 @@ int main(int argCount, char** argv)
|
|||
|
||||
#ifdef UTIL_HAS_CREATEFILELIST
|
||||
if (recursive) {
|
||||
fileNamesTable = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf, &fileNamesNb, 1);
|
||||
if (fileNamesTable) {
|
||||
unsigned u;
|
||||
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, fileNamesTable[u]);
|
||||
free((void*)filenameTable);
|
||||
filenameTable = fileNamesTable;
|
||||
filenameIdx = fileNamesNb;
|
||||
}
|
||||
UTIL_expandFNT(&filenames, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
BMK_setNotificationLevel(g_displayLevel);
|
||||
BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast);
|
||||
BMK_benchFiles(filenames->fileNames, (unsigned)filenames->tableSize, dictFileName, cLevel, cLevelLast);
|
||||
|
||||
_end:
|
||||
if (main_pause) waitEnter();
|
||||
free(dynNameSpace);
|
||||
#ifdef UTIL_HAS_CREATEFILELIST
|
||||
if (fileNamesTable)
|
||||
UTIL_freeFileList(fileNamesTable, fileNamesBuf);
|
||||
else
|
||||
#endif
|
||||
free((void*)filenameTable);
|
||||
UTIL_freeFileNamesTable(filenames);
|
||||
return operationResult;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue