From 246982e782849d8646b2d5df6648319935669228 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 20 Jan 2022 22:41:47 -0800 Subject: [PATCH 1/8] [dibio] Fix assertion triggered by no inputs Passing 0 inputs to `DiB_shuffle()` caused an assertion failure where it should just return. A test is added in a later commit, with the initial introduction of the new testing framework. Fixes #3007. --- programs/dibio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/programs/dibio.c b/programs/dibio.c index d19f9544..147d1e7b 100644 --- a/programs/dibio.c +++ b/programs/dibio.c @@ -27,9 +27,9 @@ #include /* memset */ #include /* fprintf, fopen, ftello64 */ #include /* errno */ -#include #include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */ +#include "../lib/common/debug.h" /* assert */ #include "../lib/common/mem.h" /* read */ #include "dibio.h" @@ -193,7 +193,8 @@ static U32 DiB_rand(U32* src) static void DiB_shuffle(const char** fileNamesTable, unsigned nbFiles) { U32 seed = 0xFD2FB528; unsigned i; - assert(nbFiles >= 1); + if (nbFiles == 0) + return; for (i = nbFiles - 1; i > 0; --i) { unsigned const j = DiB_rand(&seed) % (i + 1); const char* const tmp = fileNamesTable[j]; From 495dcb839ab6ab40c4156b99be16b010389f2214 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 20 Jan 2022 22:45:03 -0800 Subject: [PATCH 2/8] [zstdcli] Fix option detection for --auto-threads The option `--auto-threads` should still be accepted and parsed, even if `ZSTD_MULTITHREAD` is not defined. It doesn't mean anything, but we should still accept the option. Since we want scripts to be able to work generically. This bug was caught by tests I added to the new testing framework. --- programs/zstdcli.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index fd563e1c..53b47f87 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -802,9 +802,7 @@ int main(int argCount, const char* argv[]) separateFiles = 0, setRealTimePrio = 0, singleThread = 0, -#ifdef ZSTD_MULTITHREAD defaultLogicalCores = 0, -#endif showDefaultCParams = 0, ultra=0, contentSize=1; @@ -996,7 +994,6 @@ int main(int argCount, const char* argv[]) if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readSizeTFromChar(&argument); continue; } if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readSizeTFromChar(&argument); continue; } if (longCommandWArg(&argument, "--output-dir-flat")) { NEXT_FIELD(outDirName); continue; } -#ifdef ZSTD_MULTITHREAD if (longCommandWArg(&argument, "--auto-threads")) { const char* threadDefault = NULL; NEXT_FIELD(threadDefault); @@ -1004,7 +1001,6 @@ int main(int argCount, const char* argv[]) defaultLogicalCores = 1; continue; } -#endif #ifdef UTIL_HAS_MIRRORFILELIST if (longCommandWArg(&argument, "--output-dir-mirror")) { NEXT_FIELD(outMirroredDirName); continue; } #endif @@ -1220,7 +1216,7 @@ int main(int argCount, const char* argv[]) } } #else - (void)singleThread; (void)nbWorkers; + (void)singleThread; (void)nbWorkers; (void)defaultLogicalCores; #endif g_utilDisplayLevel = g_displayLevel; From f088c430e35d8b97d11aa38c5c78a72931ec7bad Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 20 Jan 2022 22:46:45 -0800 Subject: [PATCH 3/8] [datagen] Remove extra newline printed `datagen` was printing a `\n` even when it had no other output. Raise the output level for the final `\n` to the minimum output level used. This minor bug was caught by the new testing framework. --- tests/datagencli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/datagencli.c b/tests/datagencli.c index ecc05f95..7300fdb7 100644 --- a/tests/datagencli.c +++ b/tests/datagencli.c @@ -124,7 +124,7 @@ int main(int argc, const char** argv) DISPLAYLEVEL(3, "Seed = %u \n", (unsigned)seed); RDG_genStdout(size, (double)probaU32/100, litProba, seed); - DISPLAYLEVEL(1, "\n"); + DISPLAYLEVEL(3, "\n"); return 0; } From f3096ff6d1fcf87eeec876da13c06a97343ed6cf Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 18 Jan 2022 13:31:27 -0800 Subject: [PATCH 4/8] [test] Add new CLI testing platform Adds the new CLI testing platform that I'm proposing. See the added `README.md` for details. --- tests/cli-tests/.gitignore | 4 + tests/cli-tests/README.md | 248 +++++++ tests/cli-tests/basic/help.sh | 7 + tests/cli-tests/basic/help.sh.stdout.glob | 34 + tests/cli-tests/basic/version.sh | 3 + tests/cli-tests/basic/version.sh.stdout.glob | 2 + tests/cli-tests/bin/cmp_size | 46 ++ tests/cli-tests/bin/datagen | 3 + tests/cli-tests/bin/die | 4 + tests/cli-tests/bin/println | 2 + tests/cli-tests/bin/unzstd | 1 + tests/cli-tests/bin/zstd | 7 + tests/cli-tests/bin/zstdcat | 1 + tests/cli-tests/bin/zstdgrep | 2 + tests/cli-tests/common/format.sh | 19 + tests/cli-tests/common/mtime.sh | 13 + tests/cli-tests/common/permissions.sh | 18 + tests/cli-tests/common/platform.sh | 37 + tests/cli-tests/compression/adapt.sh | 6 + tests/cli-tests/compression/basic.sh | 28 + .../compression/compress-literals.sh | 10 + tests/cli-tests/compression/format.sh | 16 + tests/cli-tests/compression/levels.sh | 64 ++ .../compression/levels.sh.stderr.exact | 75 ++ .../compression/long-distance-matcher.sh | 7 + tests/cli-tests/compression/multi-threaded.sh | 11 + .../cli-tests/compression/row-match-finder.sh | 7 + tests/cli-tests/compression/setup | 7 + tests/cli-tests/compression/stream-size.sh | 7 + tests/cli-tests/dict-builder/no-inputs | 3 + tests/cli-tests/dict-builder/no-inputs.exit | 1 + .../dict-builder/no-inputs.stderr.exact | 5 + .../dictionaries/dictionary-mismatch.sh | 29 + .../dictionary-mismatch.sh.stderr.exact | 8 + tests/cli-tests/dictionaries/setup | 6 + tests/cli-tests/dictionaries/setup_once | 24 + tests/cli-tests/run.py | 687 ++++++++++++++++++ 37 files changed, 1452 insertions(+) create mode 100644 tests/cli-tests/.gitignore create mode 100644 tests/cli-tests/README.md create mode 100755 tests/cli-tests/basic/help.sh create mode 100644 tests/cli-tests/basic/help.sh.stdout.glob create mode 100755 tests/cli-tests/basic/version.sh create mode 100644 tests/cli-tests/basic/version.sh.stdout.glob create mode 100755 tests/cli-tests/bin/cmp_size create mode 100755 tests/cli-tests/bin/datagen create mode 100755 tests/cli-tests/bin/die create mode 100755 tests/cli-tests/bin/println create mode 120000 tests/cli-tests/bin/unzstd create mode 100755 tests/cli-tests/bin/zstd create mode 120000 tests/cli-tests/bin/zstdcat create mode 100755 tests/cli-tests/bin/zstdgrep create mode 100644 tests/cli-tests/common/format.sh create mode 100644 tests/cli-tests/common/mtime.sh create mode 100644 tests/cli-tests/common/permissions.sh create mode 100644 tests/cli-tests/common/platform.sh create mode 100755 tests/cli-tests/compression/adapt.sh create mode 100755 tests/cli-tests/compression/basic.sh create mode 100755 tests/cli-tests/compression/compress-literals.sh create mode 100755 tests/cli-tests/compression/format.sh create mode 100755 tests/cli-tests/compression/levels.sh create mode 100644 tests/cli-tests/compression/levels.sh.stderr.exact create mode 100755 tests/cli-tests/compression/long-distance-matcher.sh create mode 100755 tests/cli-tests/compression/multi-threaded.sh create mode 100755 tests/cli-tests/compression/row-match-finder.sh create mode 100755 tests/cli-tests/compression/setup create mode 100755 tests/cli-tests/compression/stream-size.sh create mode 100755 tests/cli-tests/dict-builder/no-inputs create mode 100644 tests/cli-tests/dict-builder/no-inputs.exit create mode 100644 tests/cli-tests/dict-builder/no-inputs.stderr.exact create mode 100755 tests/cli-tests/dictionaries/dictionary-mismatch.sh create mode 100644 tests/cli-tests/dictionaries/dictionary-mismatch.sh.stderr.exact create mode 100755 tests/cli-tests/dictionaries/setup create mode 100755 tests/cli-tests/dictionaries/setup_once create mode 100755 tests/cli-tests/run.py diff --git a/tests/cli-tests/.gitignore b/tests/cli-tests/.gitignore new file mode 100644 index 00000000..4bb425b6 --- /dev/null +++ b/tests/cli-tests/.gitignore @@ -0,0 +1,4 @@ +scratch/ +!bin/ +!datagen +!zstdcat diff --git a/tests/cli-tests/README.md b/tests/cli-tests/README.md new file mode 100644 index 00000000..3098f466 --- /dev/null +++ b/tests/cli-tests/README.md @@ -0,0 +1,248 @@ +# CLI tests + +The CLI tests are focused on testing the zstd CLI. +They are intended to be simple tests that the CLI and arguments work as advertised. +They are not intended to test the library, only the code in `programs/`. +The library will get incidental coverage, but if you find yourself trying to trigger a specific condition in the library, this is the wrong tool. + +## Test runner usage + +The test runner `run.py` will run tests against the in-tree build of `zstd` and `datagen` by default. Which means that `zstd` and `datagen` must be built. + +The `zstd` binary used can be passed with `--zstd /path/to/zstd`. +Additionally, to run `zstd` through a tool like `valgrind` or `qemu`, set the `--exec-prefix 'valgrind -q'` flag. + +Similarly, the `--datagen`, and `--zstdgrep` flags can be set to specify +the paths to their respective binaries. However, these tools do not use +the `EXEC_PREFIX`. + +Each test executes in its own scratch directory under `scratch/test/name`. E.g. `scratch/basic/help.sh/`. Normally these directories are removed after the test executes. However, the `--preserve` flag will preserve these directories after execution, and save the tests exit code, stdout, and stderr in the scratch directory to `exit`, `stderr`, and `stdout` respectively. This can be useful for debugging/editing a test and updating the expected output. + +### Running all the tests + +By default the test runner `run.py` will run all the tests, and report the results. + +Examples: + +``` +./run.py +./run.py --preserve +./run.py --zstd ../../build/programs/zstd --datagen ../../build/tests/datagen +``` + +### Running specific tests + +A set of test names can be passed to the test runner `run.py` to only execute those tests. +This can be useful for writing or debugging a test, especially with `--preserve`. + +The test name can either be the path to the test file, or the test name, which is the path relative to the test directory. + +Examples: + +``` +./run.py basic/help.sh +./run.py --preserve basic/help.sh basic/version.sh +./run.py --preserve --verbose basic/help.sh +``` + +## Writing a test + +Test cases are arbitrary executables, and can be written in any language, but are generally shell scripts. +After the script executes, the exit code, stderr, and stdout are compared against the expectations. + +Each test is run in a clean directory that the test can use for intermediate files. This directory will be cleaned up at the end of the test, unless `--preserve` is passed to the test runner. Additionally, the `setup` script can prepare the directory before the test runs. + +### Calling zstd, utilities, and environment variables + +The `$PATH` for tests is prepended with the `bin/` sub-directory, which contains helper scripts for ease of testing. +The `zstd` binary will call the zstd binary specified by `run.py` with the correct `$EXEC_PREFIX`. +Similarly, `datagen`, `unzstd`, `zstdgrep`, `zstdcat`, etc, are provided. + +Helper utilities like `cmp_size`, `println`, and `die` are provided here too. See their scripts for details. + +Common shell script libraries are provided under `common/`, with helper variables and functions. They can be sourced with `source "$COMMON/library.sh`. + +Lastly, environment variables are provided for testing, which can be listed when calling `run.py` with `--verbose`. +They are generally used by the helper scripts in `bin/` to coordinate everything. + +### Basic test case + +When executing your `$TEST` executable, by default the exit code is expected to be `0`. However, you can provide an alterate expected exit code in a `$TEST.exit` file. + +When executing your `$TEST` exectuable, by default the expected stderr and stdout are empty. However, you can override the default by providing one of three files: + +* `$TEST.{stdout,stderr}.exact` +* `$TEST.{stdout,stderr}.glob` +* `$TEST.{stdout,stderr}.ignore` + +If you provide a `.exact` file, the output is expected to exactly match, byte-for-byte. + +If you provide a `.glob` file, the output is expected to match the expected file, where each line is interpreted as a glob syntax. Additionally, a line containing only `...` matches all lines until the next expected line matches. + +If you provide a `.ignore` file, the output is ignored. + +#### Passing examples + +All these examples pass. + +Exit 1, and change the expectation to be 1. + +``` +exit-1.sh +--- +#!/bin/sh +exit 1 +--- + +exit-1.sh.exit +--- +1 +--- +``` + +Check the stdout output exactly matches. + +``` +echo.sh +--- +#!/bin/sh +echo "hello world" +--- + +echo.sh.stdout.exact +--- +hello world +--- +``` + +Check the stderr output using a glob. + +``` +random.sh +--- +#!/bin/sh +head -c 10 < /dev/urandom | xxd >&2 +--- + +random.sh.stderr.glob +--- +00000000: * * * * * * +``` + +Multiple lines can be matched with ... + +``` +random-num-lines.sh +--- +#!/bin/sh +echo hello +seq 0 $RANDOM +echo world +--- + +random-num-lines.sh.stdout.glob +--- +hello +0 +... +world +--- +``` + +#### Failing examples + +Exit code is expected to be 0, but is 1. + +``` +exit-1.sh +--- +#!/bin/sh +exit 1 +--- +``` + +Stdout is expected to be empty, but isn't. + +``` +echo.sh +--- +#!/bin/sh +echo hello world +``` + +Stderr is expected to be hello but is world. + +``` +hello.sh +--- +#!/bin/sh +echo world >&2 +--- + +hello.sh.stderr.exact +--- +hello +--- +``` + +### Setup & teardown scripts + +Finally, test writing can be eased with setup and teardown scripts. +Each directory in the test directory is a test-suite consisting of all tests within that directory (but not sub-directories). +This test suite can come with 4 scripts to help test writing: + +* `setup_once` +* `teardown_once` +* `setup` +* `teardown` + +The `setup_once` and `teardown_once` are run once before and after all the tests in the suite respectively. +They operate in the scratch directory for the test suite, which is the parent directory of each scratch directory for each test case. +They can do work that is shared between tests to improve test efficiency. +For example, the `dictionaries/setup_once` script builds several dictionaries, for use in the `dictionaries` tests. + +The `setup` and `teardown` scripts run before and after each test case respectively, in the test case's scratch directory. +These scripts can do work that is shared between test cases to make tests more succinct. +For example, the `dictionaries/setup` script copies the dictionaries built by the `dictionaries/setup_once` script into the test's scratch directory, to make them easier to use, and make sure they aren't accidentally modified. + +#### Examples + +``` +basic/setup +--- +#!/bin/sh +# Create some files for testing with +datagen > file +datagen > file0 +datagen > file1 +--- + +basic/test.sh +--- +#!/bin/sh +zstd file file0 file1 +--- + +dictionaries/setup_once +--- +#!/bin/sh +set -e + +mkdir files/ dicts/ +for i in $(seq 10); do + datagen -g1000 > files/$i +done + +zstd --train -r files/ -o dicts/0 +--- + +dictionaries/setup +--- +#!/bin/sh + +# Runs in the test case's scratch directory. +# The test suite's scratch directory that +# `setup_once` operates in is the parent directory. +cp -r ../files ../dicts . +--- +``` diff --git a/tests/cli-tests/basic/help.sh b/tests/cli-tests/basic/help.sh new file mode 100755 index 00000000..c683b6a6 --- /dev/null +++ b/tests/cli-tests/basic/help.sh @@ -0,0 +1,7 @@ +#!/bin/sh -e +println "+ zstd -h" +zstd -h +println "+ zstd -H" +zstd -H +println "+ zstd --help" +zstd --help diff --git a/tests/cli-tests/basic/help.sh.stdout.glob b/tests/cli-tests/basic/help.sh.stdout.glob new file mode 100644 index 00000000..5b2f8e45 --- /dev/null +++ b/tests/cli-tests/basic/help.sh.stdout.glob @@ -0,0 +1,34 @@ ++ zstd -h +*** zstd command line interface *-bits v1.*.*, by Yann Collet *** +Usage : + zstd *args* *FILE(s)* *-o file* + +FILE : a filename + with no FILE, or when FILE is - , read standard input +Arguments : + -# : # compression level* + -d : decompression + -D DICT: use DICT as Dictionary for compression or decompression + -o file: result stored into `file` (only 1 output file) + -f : disable input and output checks. Allows overwriting existing files, + input from console, output to stdout, operating on links, + block devices, etc. +--rm : remove source file(s) after successful de/compression + -k : preserve source file(s) (default) + -h/-H : display help/long help and exit + +Advanced arguments : + -V : display Version number and exit +... ++ zstd -H +... +Arguments : +... +Advanced arguments : +... ++ zstd --help +... +Arguments : +... +Advanced arguments : +... diff --git a/tests/cli-tests/basic/version.sh b/tests/cli-tests/basic/version.sh new file mode 100755 index 00000000..d50de0f3 --- /dev/null +++ b/tests/cli-tests/basic/version.sh @@ -0,0 +1,3 @@ +#!/bin/sh -e +zstd -V +zstd --version diff --git a/tests/cli-tests/basic/version.sh.stdout.glob b/tests/cli-tests/basic/version.sh.stdout.glob new file mode 100644 index 00000000..54968fa4 --- /dev/null +++ b/tests/cli-tests/basic/version.sh.stdout.glob @@ -0,0 +1,2 @@ +*** zstd command line interface *-bits v1.*.*, by Yann Collet *** +*** zstd command line interface *-bits v1.*.*, by Yann Collet *** diff --git a/tests/cli-tests/bin/cmp_size b/tests/cli-tests/bin/cmp_size new file mode 100755 index 00000000..5afa1c59 --- /dev/null +++ b/tests/cli-tests/bin/cmp_size @@ -0,0 +1,46 @@ +#!/bin/sh + +# Small utility to + +set -e + +usage() +{ + printf "USAGE:\n\t$0 [-eq|-ne|-lt|-le|-gt|-ge] FILE1 FILE2\n" +} + +help() +{ + printf "Small utility to compare file sizes without printing them with set -x.\n\n" + usage +} + +case "$1" in + -h) help; exit 0 ;; + --help) help; exit 0 ;; +esac + +if ! test -f $2; then + printf "FILE1='%b' is not a file\n\n" "$2" + usage + exit 1 +fi + +if ! test -f $3; then + printf "FILE2='%b' is not a file\n\n" "$3" + usage + exit 1 +fi + + +size1=$(wc -c < $2) +size2=$(wc -c < $3) + +case "$1" in + -eq) [ "$size1" -eq "$size2" ] ;; + -ne) [ "$size1" -ne "$size2" ] ;; + -lt) [ "$size1" -lt "$size2" ] ;; + -le) [ "$size1" -le "$size2" ] ;; + -gt) [ "$size1" -gt "$size2" ] ;; + -ge) [ "$size1" -ge "$size2" ] ;; +esac diff --git a/tests/cli-tests/bin/datagen b/tests/cli-tests/bin/datagen new file mode 100755 index 00000000..8c60cbcc --- /dev/null +++ b/tests/cli-tests/bin/datagen @@ -0,0 +1,3 @@ +#!/bin/sh + +"$DATAGEN_BIN" $@ diff --git a/tests/cli-tests/bin/die b/tests/cli-tests/bin/die new file mode 100755 index 00000000..8633bc97 --- /dev/null +++ b/tests/cli-tests/bin/die @@ -0,0 +1,4 @@ +#!/bin/sh + +println "${*}" 1>&2 +exit 1 diff --git a/tests/cli-tests/bin/println b/tests/cli-tests/bin/println new file mode 100755 index 00000000..1da24604 --- /dev/null +++ b/tests/cli-tests/bin/println @@ -0,0 +1,2 @@ +#!/bin/env sh +printf '%b\n' "${*}" diff --git a/tests/cli-tests/bin/unzstd b/tests/cli-tests/bin/unzstd new file mode 120000 index 00000000..613f917f --- /dev/null +++ b/tests/cli-tests/bin/unzstd @@ -0,0 +1 @@ +zstd \ No newline at end of file diff --git a/tests/cli-tests/bin/zstd b/tests/cli-tests/bin/zstd new file mode 100755 index 00000000..198fc6d2 --- /dev/null +++ b/tests/cli-tests/bin/zstd @@ -0,0 +1,7 @@ +#!/bin/sh + +if [ -z "$EXEC_PREFIX" ]; then + "$ZSTD_BIN" $@ +else + $EXEC_PREFIX "$ZSTD_BIN" $@ +fi diff --git a/tests/cli-tests/bin/zstdcat b/tests/cli-tests/bin/zstdcat new file mode 120000 index 00000000..613f917f --- /dev/null +++ b/tests/cli-tests/bin/zstdcat @@ -0,0 +1 @@ +zstd \ No newline at end of file diff --git a/tests/cli-tests/bin/zstdgrep b/tests/cli-tests/bin/zstdgrep new file mode 100755 index 00000000..8821ebb5 --- /dev/null +++ b/tests/cli-tests/bin/zstdgrep @@ -0,0 +1,2 @@ +#!/bin/sh +"$ZSTDGREP_BIN" $@ diff --git a/tests/cli-tests/common/format.sh b/tests/cli-tests/common/format.sh new file mode 100644 index 00000000..20ff0f05 --- /dev/null +++ b/tests/cli-tests/common/format.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +source "$COMMON/platform.sh" + +zstd_supports_format() +{ + zstd -h | grep > $INTOVOID -- "--format=$1" +} + +format_extension() +{ + if [ "$1" = "zstd" ]; then + printf "zst" + elif [ "$1" = "gzip" ]; then + printf "gz" + else + printf "$1" + fi +} diff --git a/tests/cli-tests/common/mtime.sh b/tests/cli-tests/common/mtime.sh new file mode 100644 index 00000000..7ce931a9 --- /dev/null +++ b/tests/cli-tests/common/mtime.sh @@ -0,0 +1,13 @@ +source "$COMMON/platform.sh" + +MTIME="stat -c %Y" +case "$UNAME" in + Darwin | FreeBSD | OpenBSD | NetBSD) MTIME="stat -f %m" ;; +esac + +assertSameMTime() { + MT1=$($MTIME "$1") + MT2=$($MTIME "$2") + echo MTIME $MT1 $MT2 + [ "$MT1" = "$MT2" ] || die "mtime on $1 doesn't match mtime on $2 ($MT1 != $MT2)" +} diff --git a/tests/cli-tests/common/permissions.sh b/tests/cli-tests/common/permissions.sh new file mode 100644 index 00000000..b1f6ea3b --- /dev/null +++ b/tests/cli-tests/common/permissions.sh @@ -0,0 +1,18 @@ +source "$COMMON/platform.sh" + +GET_PERMS="stat -c %a" +case "$UNAME" in + Darwin | FreeBSD | OpenBSD | NetBSD) GET_PERMS="stat -f %Lp" ;; +esac + +assertFilePermissions() { + STAT1=$($GET_PERMS "$1") + STAT2=$2 + [ "$STAT1" = "$STAT2" ] || die "permissions on $1 don't match expected ($STAT1 != $STAT2)" +} + +assertSamePermissions() { + STAT1=$($GET_PERMS "$1") + STAT2=$($GET_PERMS "$2") + [ "$STAT1" = "$STAT2" ] || die "permissions on $1 don't match those on $2 ($STAT1 != $STAT2)" +} diff --git a/tests/cli-tests/common/platform.sh b/tests/cli-tests/common/platform.sh new file mode 100644 index 00000000..6eb45eab --- /dev/null +++ b/tests/cli-tests/common/platform.sh @@ -0,0 +1,37 @@ +#!/bin/sh + +UNAME=$(uname) + +isWindows=false +INTOVOID="/dev/null" +case "$UNAME" in + GNU) DEVDEVICE="/dev/random" ;; + *) DEVDEVICE="/dev/zero" ;; +esac +case "$OS" in + Windows*) + isWindows=true + INTOVOID="NUL" + DEVDEVICE="NUL" + ;; +esac + +case "$UNAME" in + Darwin) MD5SUM="md5 -r" ;; + FreeBSD) MD5SUM="gmd5sum" ;; + NetBSD) MD5SUM="md5 -n" ;; + OpenBSD) MD5SUM="md5" ;; + *) MD5SUM="md5sum" ;; +esac + +DIFF="diff" +case "$UNAME" in + SunOS) DIFF="gdiff" ;; +esac + +if echo hello | zstd -v -T2 2>&1 > $INTOVOID | grep -q 'multi-threading is disabled' +then + hasMT="" +else + hasMT="true" +fi diff --git a/tests/cli-tests/compression/adapt.sh b/tests/cli-tests/compression/adapt.sh new file mode 100755 index 00000000..564e955b --- /dev/null +++ b/tests/cli-tests/compression/adapt.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +set -e + +# Test --adapt +zstd -f file --adapt -c | zstd -t diff --git a/tests/cli-tests/compression/basic.sh b/tests/cli-tests/compression/basic.sh new file mode 100755 index 00000000..6f0f8793 --- /dev/null +++ b/tests/cli-tests/compression/basic.sh @@ -0,0 +1,28 @@ +#!/bin/sh -e + +# Uncomment the set -x line for debugging +# set -x + +# Test compression flags and check that they work +zstd file ; zstd -t file.zst +zstd -f file ; zstd -t file.zst +zstd -f -z file ; zstd -t file.zst +zstd -f -k file ; zstd -t file.zst +zstd -f -C file ; zstd -t file.zst +zstd -f --check file ; zstd -t file.zst +zstd -f --no-check file ; zstd -t file.zst +zstd -f -- file ; zstd -t file.zst + +# Test output file compression +zstd -o file-out.zst ; zstd -t file-out.zst +zstd -fo file-out.zst; zstd -t file-out.zst + +# Test compression to stdout +zstd -c file | zstd -t +zstd --stdout file | zstd -t +println bob | zstd | zstd -t + +# Test --rm +cp file file-rm +zstd --rm file-rm; zstd -t file-rm.zst +test ! -f file-rm diff --git a/tests/cli-tests/compression/compress-literals.sh b/tests/cli-tests/compression/compress-literals.sh new file mode 100755 index 00000000..573481a3 --- /dev/null +++ b/tests/cli-tests/compression/compress-literals.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +set -e + +# Test --[no-]compress-literals +zstd file --no-compress-literals -1 -c | zstd -t +zstd file --no-compress-literals -19 -c | zstd -t +zstd file --no-compress-literals --fast=1 -c | zstd -t +zstd file --compress-literals -1 -c | zstd -t +zstd file --compress-literals --fast=1 -c | zstd -t diff --git a/tests/cli-tests/compression/format.sh b/tests/cli-tests/compression/format.sh new file mode 100755 index 00000000..86fb4008 --- /dev/null +++ b/tests/cli-tests/compression/format.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +source "$COMMON/format.sh" + +set -e + +# Test --format +zstd --format=zstd file -f +zstd -t file.zst +for format in "gzip" "lz4" "xz" "lzma"; do + if zstd_supports_format $format; then + zstd --format=$format file + zstd -t file.$(format_extension $format) + zstd -c --format=$format file | zstd -t --format=$format + fi +done diff --git a/tests/cli-tests/compression/levels.sh b/tests/cli-tests/compression/levels.sh new file mode 100755 index 00000000..6bd0aca0 --- /dev/null +++ b/tests/cli-tests/compression/levels.sh @@ -0,0 +1,64 @@ +#!/bin/sh + +set -e +set -x + +datagen > file + +# Compress with various levels and ensure that their sizes are ordered +zstd --fast=10 file -o file-f10.zst +zstd --fast=1 file -o file-f1.zst +zstd -1 file -o file-1.zst +zstd -19 file -o file-19.zst +zstd -22 --ultra file -o file-22.zst + +zstd -t file-{f10,f1,1,19,22}.zst + +cmp_size -ne file-19.zst file-22.zst +cmp_size -lt file-19.zst file-1.zst +cmp_size -lt file-1.zst file-f1.zst +cmp_size -lt file-f1.zst file-f10.zst + +# Test default levels +zstd --fast file -f +cmp file.zst file-f1.zst || die "--fast is not level -1" + +zstd -0 file -o file-0.zst +zstd -f file +cmp file.zst file-0.zst || die "Level 0 is not the default level" + +# Test level clamping +zstd -99 file -o file-99.zst +cmp file-19.zst file-99.zst || die "Level 99 is clamped to 19" +zstd --fast=200000 file -c | zstd -t + +zstd -5000000000 -f file && die "Level too large, must fail" ||: +zstd --fast=5000000000 -f file && die "Level too large, must fail" ||: + +# Test setting a level through the environment variable +ZSTD_CLEVEL=-10 zstd file -o file-f10-env.zst +ZSTD_CLEVEL=1 zstd file -o file-1-env.zst +ZSTD_CLEVEL=+19 zstd file -o file-19-env.zst +ZSTD_CLEVEL=+99 zstd file -o file-99-env.zst + +cmp file-f10{,-env}.zst || die "Environment variable failed to set level" +cmp file-1{,-env}.zst || die "Environment variable failed to set level" +cmp file-19{,-env}.zst || die "Environment variable failed to set level" +cmp file-99{,-env}.zst || die "Environment variable failed to set level" + +# Test invalid environment clevel is the default level +zstd -f file +ZSTD_CLEVEL=- zstd -f file -o file-env.zst ; cmp file.zst file-env.zst +ZSTD_CLEVEL=+ zstd -f file -o file-env.zst ; cmp file.zst file-env.zst +ZSTD_CLEVEL=a zstd -f file -o file-env.zst ; cmp file.zst file-env.zst +ZSTD_CLEVEL=-a zstd -f file -o file-env.zst ; cmp file.zst file-env.zst +ZSTD_CLEVEL=+a zstd -f file -o file-env.zst ; cmp file.zst file-env.zst +ZSTD_CLEVEL=3a7 zstd -f file -o file-env.zst ; cmp file.zst file-env.zst +ZSTD_CLEVEL=5000000000 zstd -f file -o file-env.zst; cmp file.zst file-env.zst + +# Test environment clevel is overridden by command line +ZSTD_CLEVEL=10 zstd -f file -1 -o file-1-env.zst +ZSTD_CLEVEL=10 zstd -f file --fast=1 -o file-f1-env.zst + +cmp file-1{,-env}.zst || die "Environment variable not overridden" +cmp file-f1{,-env}.zst || die "Environment variable not overridden" diff --git a/tests/cli-tests/compression/levels.sh.stderr.exact b/tests/cli-tests/compression/levels.sh.stderr.exact new file mode 100644 index 00000000..c0b7066f --- /dev/null +++ b/tests/cli-tests/compression/levels.sh.stderr.exact @@ -0,0 +1,75 @@ ++ datagen ++ zstd --fast=10 file -o file-f10.zst ++ zstd --fast=1 file -o file-f1.zst ++ zstd -1 file -o file-1.zst ++ zstd -19 file -o file-19.zst ++ zstd -22 --ultra file -o file-22.zst ++ zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst file-22.zst ++ cmp_size -ne file-19.zst file-22.zst ++ cmp_size -lt file-19.zst file-1.zst ++ cmp_size -lt file-1.zst file-f1.zst ++ cmp_size -lt file-f1.zst file-f10.zst ++ zstd --fast file -f ++ cmp file.zst file-f1.zst ++ zstd -0 file -o file-0.zst ++ zstd -f file ++ cmp file.zst file-0.zst ++ zstd -99 file -o file-99.zst +Warning : compression level higher than max, reduced to 19 ++ cmp file-19.zst file-99.zst ++ zstd --fast=200000 file -c ++ zstd -t ++ zstd -5000000000 -f file +error: numeric value overflows 32-bit unsigned int ++ : ++ zstd --fast=5000000000 -f file +error: numeric value overflows 32-bit unsigned int ++ : ++ ZSTD_CLEVEL=-10 ++ zstd file -o file-f10-env.zst ++ ZSTD_CLEVEL=1 ++ zstd file -o file-1-env.zst ++ ZSTD_CLEVEL=+19 ++ zstd file -o file-19-env.zst ++ ZSTD_CLEVEL=+99 ++ zstd file -o file-99-env.zst +Warning : compression level higher than max, reduced to 19 ++ cmp file-f10.zst file-f10-env.zst ++ cmp file-1.zst file-1-env.zst ++ cmp file-19.zst file-19-env.zst ++ cmp file-99.zst file-99-env.zst ++ zstd -f file ++ ZSTD_CLEVEL=- ++ zstd -f file -o file-env.zst +Ignore environment variable setting ZSTD_CLEVEL=-: not a valid integer value ++ cmp file.zst file-env.zst ++ ZSTD_CLEVEL=+ ++ zstd -f file -o file-env.zst +Ignore environment variable setting ZSTD_CLEVEL=+: not a valid integer value ++ cmp file.zst file-env.zst ++ ZSTD_CLEVEL=a ++ zstd -f file -o file-env.zst +Ignore environment variable setting ZSTD_CLEVEL=a: not a valid integer value ++ cmp file.zst file-env.zst ++ ZSTD_CLEVEL=-a ++ zstd -f file -o file-env.zst +Ignore environment variable setting ZSTD_CLEVEL=-a: not a valid integer value ++ cmp file.zst file-env.zst ++ ZSTD_CLEVEL=+a ++ zstd -f file -o file-env.zst +Ignore environment variable setting ZSTD_CLEVEL=+a: not a valid integer value ++ cmp file.zst file-env.zst ++ ZSTD_CLEVEL=3a7 ++ zstd -f file -o file-env.zst +Ignore environment variable setting ZSTD_CLEVEL=3a7: not a valid integer value ++ cmp file.zst file-env.zst ++ ZSTD_CLEVEL=5000000000 ++ zstd -f file -o file-env.zst +Ignore environment variable setting ZSTD_CLEVEL=5000000000: numeric value too large ++ cmp file.zst file-env.zst ++ ZSTD_CLEVEL=10 ++ zstd -f file -1 -o file-1-env.zst ++ ZSTD_CLEVEL=10 ++ zstd -f file --fast=1 -o file-f1-env.zst ++ cmp file-1.zst file-1-env.zst ++ cmp file-f1.zst file-f1-env.zst diff --git a/tests/cli-tests/compression/long-distance-matcher.sh b/tests/cli-tests/compression/long-distance-matcher.sh new file mode 100755 index 00000000..8f2c61bf --- /dev/null +++ b/tests/cli-tests/compression/long-distance-matcher.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +set -e + +# Test --long +zstd -f file --long ; zstd -t file.zst +zstd -f file --long=20; zstd -t file.zst diff --git a/tests/cli-tests/compression/multi-threaded.sh b/tests/cli-tests/compression/multi-threaded.sh new file mode 100755 index 00000000..e3961330 --- /dev/null +++ b/tests/cli-tests/compression/multi-threaded.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +set -e + +# Test multi-threaded flags +zstd --single-thread file -f ; zstd -t file.zst +zstd -T2 -f file ; zstd -t file.zst +zstd --rsyncable -f file ; zstd -t file.zst +zstd -T0 -f file ; zstd -t file.zst +zstd -T0 --auto-threads=logical -f file ; zstd -t file.zst +zstd -T0 --auto-threads=physical -f file; zstd -t file.zst diff --git a/tests/cli-tests/compression/row-match-finder.sh b/tests/cli-tests/compression/row-match-finder.sh new file mode 100755 index 00000000..5b36017a --- /dev/null +++ b/tests/cli-tests/compression/row-match-finder.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +set -e + +# Test --[no-]row-match-finder +zstd file -7f --row-match-finder +zstd file -7f --no-row-match-finder diff --git a/tests/cli-tests/compression/setup b/tests/cli-tests/compression/setup new file mode 100755 index 00000000..96e2309b --- /dev/null +++ b/tests/cli-tests/compression/setup @@ -0,0 +1,7 @@ +#!/bin/sh + +set -e + +datagen > file +datagen > file0 +datagen > file1 diff --git a/tests/cli-tests/compression/stream-size.sh b/tests/cli-tests/compression/stream-size.sh new file mode 100755 index 00000000..7344769a --- /dev/null +++ b/tests/cli-tests/compression/stream-size.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +set -e + +# Test stream size & hint +datagen -g7654 | zstd --stream-size=7654 | zstd -t +datagen -g7654 | zstd --size-hint=7000 | zstd -t diff --git a/tests/cli-tests/dict-builder/no-inputs b/tests/cli-tests/dict-builder/no-inputs new file mode 100755 index 00000000..d37fdce5 --- /dev/null +++ b/tests/cli-tests/dict-builder/no-inputs @@ -0,0 +1,3 @@ +#!/bin/sh +set -x +zstd --train diff --git a/tests/cli-tests/dict-builder/no-inputs.exit b/tests/cli-tests/dict-builder/no-inputs.exit new file mode 100644 index 00000000..8351c193 --- /dev/null +++ b/tests/cli-tests/dict-builder/no-inputs.exit @@ -0,0 +1 @@ +14 diff --git a/tests/cli-tests/dict-builder/no-inputs.stderr.exact b/tests/cli-tests/dict-builder/no-inputs.stderr.exact new file mode 100644 index 00000000..b3b5599d --- /dev/null +++ b/tests/cli-tests/dict-builder/no-inputs.stderr.exact @@ -0,0 +1,5 @@ ++ zstd --train +! Warning : nb of samples too low for proper processing ! +! Please provide _one file per sample_. +! Alternatively, split files into fixed-size blocks representative of samples, with -B# +Error 14 : nb of samples too low diff --git a/tests/cli-tests/dictionaries/dictionary-mismatch.sh b/tests/cli-tests/dictionaries/dictionary-mismatch.sh new file mode 100755 index 00000000..2b5d5a0d --- /dev/null +++ b/tests/cli-tests/dictionaries/dictionary-mismatch.sh @@ -0,0 +1,29 @@ +#!/bin/sh + +source "$COMMON/platform.sh" + +set -e + +if [ false ]; then + for seed in $(seq 100); do + datagen -g1000 -s$seed > file$seed + done + + zstd --train -r . -o dict0 -qq + + for seed in $(seq 101 200); do + datagen -g1000 -s$seed > file$seed + done + + zstd --train -r . -o dict1 -qq + + [ "$($MD5SUM < dict0)" != "$($MD5SUM < dict1)" ] || die "dictionaries must not match" + + datagen -g1000 -s0 > file0 +fi + +set -x +zstd files/0 -D dicts/0 +zstd -t files/0.zst -D dicts/0 +zstd -t files/0.zst -D dicts/1 && die "Must fail" ||: +zstd -t files/0.zst && die "Must fail" ||: diff --git a/tests/cli-tests/dictionaries/dictionary-mismatch.sh.stderr.exact b/tests/cli-tests/dictionaries/dictionary-mismatch.sh.stderr.exact new file mode 100644 index 00000000..399a3207 --- /dev/null +++ b/tests/cli-tests/dictionaries/dictionary-mismatch.sh.stderr.exact @@ -0,0 +1,8 @@ ++ zstd files/0 -D dicts/0 ++ zstd -t files/0.zst -D dicts/0 ++ zstd -t files/0.zst -D dicts/1 +files/0.zst : Decoding error (36) : Dictionary mismatch ++ : ++ zstd -t files/0.zst +files/0.zst : Decoding error (36) : Dictionary mismatch ++ : diff --git a/tests/cli-tests/dictionaries/setup b/tests/cli-tests/dictionaries/setup new file mode 100755 index 00000000..616c73eb --- /dev/null +++ b/tests/cli-tests/dictionaries/setup @@ -0,0 +1,6 @@ +#!/bin/sh + +set -e + +cp -r ../files . +cp -r ../dicts . diff --git a/tests/cli-tests/dictionaries/setup_once b/tests/cli-tests/dictionaries/setup_once new file mode 100755 index 00000000..6316df16 --- /dev/null +++ b/tests/cli-tests/dictionaries/setup_once @@ -0,0 +1,24 @@ +#!/bin/sh + +set -e + +source "$COMMON/platform.sh" + + +mkdir files/ dicts/ + +for seed in $(seq 50); do + datagen -g1000 -s$seed > files/$seed +done + +zstd --train -r files -o dicts/0 -qq + +for seed in $(seq 51 100); do + datagen -g1000 -s$seed > files/$seed +done + +zstd --train -r files -o dicts/1 -qq + +cmp dicts/0 dicts/1 && die "dictionaries must not match!" + +datagen -g1000 > files/0 diff --git a/tests/cli-tests/run.py b/tests/cli-tests/run.py new file mode 100755 index 00000000..6791918a --- /dev/null +++ b/tests/cli-tests/run.py @@ -0,0 +1,687 @@ +#!/usr/bin/env python3 +# ################################################################ +# Copyright (c) Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ########################################################################## + +import argparse +import contextlib +import copy +import fnmatch +import os +import shutil +import subprocess +import sys +import tempfile +import typing + + +EXCLUDED_DIRS = { + "bin", + "common", + "scratch", +} + + +EXCLUDED_BASENAMES = { + "setup", + "setup_once", + "teardown", + "teardown_once", + "README.md", + "run.py", + ".gitignore", +} + +EXCLUDED_SUFFIXES = [ + ".exact", + ".glob", + ".ignore", + ".exit", +] + + +def exclude_dir(dirname: str) -> bool: + """ + Should files under the directory :dirname: be excluded from the test runner? + """ + if dirname in EXCLUDED_DIRS: + return True + return False + + +def exclude_file(filename: str) -> bool: + """Should the file :filename: be excluded from the test runner?""" + if filename in EXCLUDED_BASENAMES: + return True + for suffix in EXCLUDED_SUFFIXES: + if filename.endswith(suffix): + return True + return False + +def read_file(filename: str) -> bytes: + """Reads the file :filename: and returns the contents as bytes.""" + with open(filename, "rb") as f: + return f.read() + + +def diff(a: bytes, b: bytes) -> str: + """Returns a diff between two different byte-strings :a: and :b:.""" + assert a != b + with tempfile.NamedTemporaryFile("wb") as fa: + fa.write(a) + fa.flush() + with tempfile.NamedTemporaryFile("wb") as fb: + fb.write(b) + fb.flush() + + diff_bytes = subprocess.run(["diff", fa.name, fb.name], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).stdout + return diff_bytes.decode("utf8") + + +def pop_line(data: bytes) -> typing.Tuple[typing.Optional[bytes], bytes]: + """ + Pop the first line from :data: and returns the first line and the remainder + of the data as a tuple. If :data: is empty, returns :(None, data):. Otherwise + the first line always ends in a :\n:, even if it is the last line and :data: + doesn't end in :\n:. + """ + NEWLINE = b"\n"[0] + + if data == b'': + return (None, data) + + newline_idx = data.find(b"\n") + if newline_idx == -1: + end_idx = len(data) + else: + end_idx = newline_idx + 1 + + line = data[:end_idx] + data = data[end_idx:] + + assert len(line) != 0 + if line[-1] != NEWLINE: + line += NEWLINE + + return (line, data) + + +def glob_line_matches(actual: bytes, expect: bytes) -> bool: + """ + Does the `actual` line match the expected glob line `expect`? + """ + return fnmatch.fnmatchcase(actual.strip(), expect.strip()) + + +def glob_diff(actual: bytes, expect: bytes) -> bytes: + """ + Returns None if the :actual: content matches the expected glob :expect:, + otherwise returns the diff bytes. + """ + diff = b'' + actual_line, actual = pop_line(actual) + expect_line, expect = pop_line(expect) + while True: + # Handle end of file conditions - allow extra newlines + while expect_line is None and actual_line == b"\n": + actual_line, actual = pop_line(actual) + while actual_line is None and expect_line == b"\n": + expect_line, expect = pop_line(expect) + + if expect_line is None and actual_line is None: + if diff == b'': + return None + return diff + elif expect_line is None: + diff += b"---\n" + while actual_line != None: + diff += b"> " + diff += actual_line + actual_line, actual = pop_line(actual) + return diff + elif actual_line is None: + diff += b"---\n" + while expect_line != None: + diff += b"< " + diff += expect_line + expect_line, expect = pop_line(expect) + return diff + + assert expect_line is not None + assert actual_line is not None + + if expect_line == b'...\n': + next_expect_line, expect = pop_line(expect) + if next_expect_line is None: + if diff == b'': + return None + return diff + while not glob_line_matches(actual_line, next_expect_line): + actual_line, actual = pop_line(actual) + if actual_line is None: + diff += b"---\n" + diff += b"< " + diff += next_expect_line + return diff + expect_line = next_expect_line + continue + + if not glob_line_matches(actual_line, expect_line): + diff += b'---\n' + diff += b'< ' + expect_line + diff += b'> ' + actual_line + + actual_line, actual = pop_line(actual) + expect_line, expect = pop_line(expect) + + +class Options: + """Options configuring how to run a :TestCase:.""" + def __init__( + self, + env: typing.Dict[str, str], + timeout: typing.Optional[int], + verbose: bool, + preserve: bool, + scratch_dir: str, + test_dir: str, + ) -> None: + self.env = env + self.timeout = timeout + self.verbose = verbose + self.preserve = preserve + self.scratch_dir = scratch_dir + self.test_dir = test_dir + + +class TestCase: + """ + Logic and state related to running a single test case. + + 1. Initialize the test case. + 2. Launch the test case with :TestCase.launch():. + This will start the test execution in a subprocess, but + not wait for completion. So you could launch multiple test + cases in parallel. This will now print any test output. + 3. Analyze the results with :TestCase.analyze():. This will + join the test subprocess, check the results against the + expectations, and print the results to stdout. + + :TestCase.run(): is also provided which combines the launch & analyze + steps for single-threaded use-cases. + + All other methods, prefixed with _, are private helper functions. + """ + def __init__(self, test_filename: str, options: Options) -> None: + """ + Initialize the :TestCase: for the test located in :test_filename: + with the given :options:. + """ + self._opts = options + self._test_file = test_filename + self._test_name = os.path.normpath( + os.path.relpath(test_filename, start=self._opts.test_dir) + ) + self._success = {} + self._message = {} + self._test_stdin = None + self._scratch_dir = os.path.abspath(os.path.join(self._opts.scratch_dir, self._test_name)) + + @property + def name(self) -> str: + """Returns the unique name for the test.""" + return self._test_name + + def launch(self) -> None: + """ + Launch the test case as a subprocess, but do not block on completion. + This allows users to run multiple tests in parallel. Results aren't yet + printed out. + """ + self._launch_test() + + def analyze(self) -> bool: + """ + Must be called after :TestCase.launch():. Joins the test subprocess and + checks the results against expectations. Finally prints the results to + stdout and returns the success. + """ + self._join_test() + self._check_exit() + self._check_stderr() + self._check_stdout() + self._analyze_results() + return self._succeeded + + def run(self) -> bool: + """Shorthand for combining both :TestCase.launch(): and :TestCase.analyze():.""" + self.launch() + return self.analyze() + + def _log(self, *args, **kwargs) -> None: + """Logs test output.""" + print(file=sys.stdout, *args, **kwargs) + + def _vlog(self, *args, **kwargs) -> None: + """Logs verbose test output.""" + if self._opts.verbose: + print(file=sys.stdout, *args, **kwargs) + + def _test_environment(self) -> typing.Dict[str, str]: + """ + Returns the environment to be used for the + test subprocess. + """ + env = copy.copy(os.environ) + for k, v in self._opts.env.items(): + self._vlog(f"${k}='{v}'") + env[k] = v + + def _launch_test(self) -> None: + """Launch the test subprocess, but do not join it.""" + args = [os.path.abspath(self._test_file)] + stdin_name = f"{self._test_file}.stdin" + if os.path.exists(stdin_name): + self._test_stdin = open(stdin_name, "rb") + stdin = self._test_stdin + else: + stdin = subprocess.DEVNULL + cwd = self._scratch_dir + env = self._test_environment() + self._test_process = subprocess.Popen( + args=args, + stdin=stdin, + cwd=cwd, + env=env, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE + ) + + def _join_test(self) -> None: + """Join the test process and save stderr, stdout, and the exit code.""" + (stdout, stderr) = self._test_process.communicate(timeout=self._opts.timeout) + self._output = {} + self._output["stdout"] = stdout + self._output["stderr"] = stderr + self._exit_code = self._test_process.returncode + self._test_process = None + if self._test_stdin is not None: + self._test_stdin.close() + self._test_stdin = None + + def _check_output_exact(self, out_name: str, expected: bytes) -> None: + """ + Check the output named :out_name: for an exact match against the :expected: content. + Saves the success and message. + """ + check_name = f"check_{out_name}" + actual = self._output[out_name] + if actual == expected: + self._success[check_name] = True + self._message[check_name] = f"{out_name} matches!" + else: + self._success[check_name] = False + self._message[check_name] = f"{out_name} does not match!\n> diff expected actual\n{diff(expected, actual)}" + + def _check_output_glob(self, out_name: str, expected: bytes) -> None: + """ + Check the output named :out_name: for a glob match against the :expected: glob. + Saves the success and message. + """ + check_name = f"check_{out_name}" + actual = self._output[out_name] + diff = glob_diff(actual, expected) + if diff is None: + self._success[check_name] = True + self._message[check_name] = f"{out_name} matches!" + else: + utf8_diff = diff.decode('utf8') + self._success[check_name] = False + self._message[check_name] = f"{out_name} does not match!\n> diff expected actual\n{utf8_diff}" + + def _check_output(self, out_name: str) -> None: + """ + Checks the output named :out_name: for a match against the expectation. + We check for a .exact, .glob, and a .ignore file. If none are found we + expect that the output should be empty. + + If :Options.preserve: was set then we save the scratch directory and + save the stderr, stdout, and exit code to the scratch directory for + debugging. + """ + if self._opts.preserve: + # Save the output to the scratch directory + actual_name = os.path.join(self._scratch_dir, f"{out_name}") + with open(actual_name, "wb") as f: + f.write(self._output[out_name]) + + exact_name = f"{self._test_file}.{out_name}.exact" + glob_name = f"{self._test_file}.{out_name}.glob" + ignore_name = f"{self._test_file}.{out_name}.ignore" + + if os.path.exists(exact_name): + return self._check_output_exact(out_name, read_file(exact_name)) + elif os.path.exists(glob_name): + return self._check_output_glob(out_name, read_file(glob_name)) + elif os.path.exists(ignore_name): + check_name = f"check_{out_name}" + self._success[check_name] = True + self._message[check_name] = f"{out_name} ignored!" + else: + return self._check_output_exact(out_name, bytes()) + + def _check_stderr(self) -> None: + """Checks the stderr output against the expectation.""" + self._check_output("stderr") + + def _check_stdout(self) -> None: + """Checks the stdout output against the expectation.""" + self._check_output("stdout") + + def _check_exit(self) -> None: + """ + Checks the exit code against expectations. If a .exit file + exists, we expect that the exit code matches the contents. + Otherwise we expect the exit code to be zero. + + If :Options.preserve: is set we save the exit code to the + scratch directory under the filename "exit". + """ + if self._opts.preserve: + exit_name = os.path.join(self._scratch_dir, "exit") + with open(exit_name, "w") as f: + f.write(str(self._exit_code) + "\n") + exit_name = f"{self._test_file}.exit" + if os.path.exists(exit_name): + exit_code: int = int(read_file(exit_name)) + else: + exit_code: int = 0 + if exit_code == self._exit_code: + self._success["check_exit"] = True + self._message["check_exit"] = "Exit code matches!" + else: + self._success["check_exit"] = False + self._message["check_exit"] = f"Exit code mismatch! Expected {exit_code} but got {self._exit_code}" + + def _analyze_results(self) -> None: + """ + After all tests have been checked, collect all the successes + and messages, and print the results to stdout. + """ + STATUS = {True: "PASS", False: "FAIL"} + checks = sorted(self._success.keys()) + self._succeeded = all(self._success.values()) + self._log(f"{STATUS[self._succeeded]}: {self._test_name}") + + if not self._succeeded or self._opts.verbose: + for check in checks: + if self._opts.verbose or not self._success[check]: + self._log(f"{STATUS[self._success[check]]}: {self._test_name}.{check}") + self._log(self._message[check]) + + self._log("----------------------------------------") + + +class TestSuite: + """ + Setup & teardown test suite & cases. + This class is intended to be used as a context manager. + + TODO: Make setup/teardown failure emit messages, not throw exceptions. + """ + def __init__(self, test_directory: str, options: Options) -> None: + self._opts = options + self._test_dir = os.path.abspath(test_directory) + rel_test_dir = os.path.relpath(test_directory, start=self._opts.test_dir) + assert not rel_test_dir.startswith(os.path.sep) + self._scratch_dir = os.path.normpath(os.path.join(self._opts.scratch_dir, rel_test_dir)) + + def __enter__(self) -> 'TestSuite': + self._setup_once() + return self + + def __exit__(self, _exc_type, _exc_value, _traceback) -> None: + self._teardown_once() + + @contextlib.contextmanager + def test_case(self, test_basename: str) -> TestCase: + """ + Context manager for a test case in the test suite. + Pass the basename of the test relative to the :test_directory:. + """ + assert os.path.dirname(test_basename) == "" + try: + self._setup(test_basename) + test_filename = os.path.join(self._test_dir, test_basename) + yield TestCase(test_filename, self._opts) + finally: + self._teardown(test_basename) + + def _remove_scratch_dir(self, dir: str) -> None: + """Helper to remove a scratch directory with sanity checks""" + assert "scratch" in dir + assert dir.startswith(self._scratch_dir) + assert os.path.exists(dir) + shutil.rmtree(dir) + + def _setup_once(self) -> None: + if os.path.exists(self._scratch_dir): + self._remove_scratch_dir(self._scratch_dir) + os.makedirs(self._scratch_dir) + setup_script = os.path.join(self._test_dir, "setup_once") + if os.path.exists(setup_script): + self._run_script(setup_script, cwd=self._scratch_dir) + + def _teardown_once(self) -> None: + assert os.path.exists(self._scratch_dir) + teardown_script = os.path.join(self._test_dir, "teardown_once") + if os.path.exists(teardown_script): + self._run_script(teardown_script, cwd=self._scratch_dir) + if not self._opts.preserve: + self._remove_scratch_dir(self._scratch_dir) + + def _setup(self, test_basename: str) -> None: + test_scratch_dir = os.path.join(self._scratch_dir, test_basename) + assert not os.path.exists(test_scratch_dir) + os.makedirs(test_scratch_dir) + setup_script = os.path.join(self._test_dir, "setup") + if os.path.exists(setup_script): + self._run_script(setup_script, cwd=test_scratch_dir) + + def _teardown(self, test_basename: str) -> None: + test_scratch_dir = os.path.join(self._scratch_dir, test_basename) + assert os.path.exists(test_scratch_dir) + teardown_script = os.path.join(self._test_dir, "teardown") + if os.path.exists(teardown_script): + self._run_script(teardown_script, cwd=test_scratch_dir) + if not self._opts.preserve: + self._remove_scratch_dir(test_scratch_dir) + + def _run_script(self, script: str, cwd: str) -> None: + env = copy.copy(os.environ) + for k, v in self._opts.env.items(): + env[k] = v + try: + subprocess.run( + args=[script], + stdin=subprocess.DEVNULL, + capture_output=True, + cwd=cwd, + env=env, + check=True, + ) + except subprocess.CalledProcessError as e: + print(f"{script} failed with exit code {e.returncode}!") + print(f"stderr:\n{e.stderr}") + print(f"stdout:\n{e.stdout}") + raise + +TestSuites = typing.Dict[str, typing.List[str]] + +def get_all_tests(options: Options) -> TestSuites: + """ + Find all the test in the test directory and return the test suites. + """ + test_suites = {} + for root, dirs, files in os.walk(options.test_dir, topdown=True): + dirs[:] = [d for d in dirs if not exclude_dir(d)] + test_cases = [] + for file in files: + if not exclude_file(file): + test_cases.append(file) + assert root == os.path.normpath(root) + test_suites[root] = test_cases + return test_suites + + +def resolve_listed_tests( + tests: typing.List[str], options: Options +) -> TestSuites: + """ + Resolve the list of tests passed on the command line into their + respective test suites. Tests can either be paths, or test names + relative to the test directory. + """ + test_suites = {} + for test in tests: + if not os.path.exists(test): + test = os.path.join(options.test_dir, test) + if not os.path.exists(test): + raise RuntimeError(f"Test {test} does not exist!") + + test = os.path.normpath(os.path.abspath(test)) + assert test.startswith(options.test_dir) + test_suite = os.path.dirname(test) + test_case = os.path.basename(test) + test_suites.setdefault(test_suite, []).append(test_case) + + return test_suites + +def run_tests(test_suites: TestSuites, options: Options) -> bool: + """ + Runs all the test in the :test_suites: with the given :options:. + Prints the results to stdout. + """ + tests = {} + for test_dir, test_files in test_suites.items(): + with TestSuite(test_dir, options) as test_suite: + test_files = sorted(set(test_files)) + for test_file in test_files: + with test_suite.test_case(test_file) as test_case: + tests[test_case.name] = test_case.run() + + successes = 0 + for test, status in tests.items(): + if status: + successes += 1 + else: + print(f"FAIL: {test}") + if successes == len(tests): + print(f"PASSED all {len(tests)} tests!") + return True + else: + print(f"FAILED {len(tests) - successes} / {len(tests)} tests!") + return False + + +if __name__ == "__main__": + CLI_TEST_DIR = os.path.dirname(sys.argv[0]) + REPO_DIR = os.path.join(CLI_TEST_DIR, "..", "..") + PROGRAMS_DIR = os.path.join(REPO_DIR, "programs") + TESTS_DIR = os.path.join(REPO_DIR, "tests") + ZSTD_PATH = os.path.join(PROGRAMS_DIR, "zstd") + ZSTDGREP_PATH = os.path.join(PROGRAMS_DIR, "zstdgrep") + DATAGEN_PATH = os.path.join(TESTS_DIR, "datagen") + + parser = argparse.ArgumentParser( + ( + "Runs the zstd CLI tests. Exits nonzero on failure. Default arguments are\n" + "generally correct. Pass --preserve to preserve test output for debugging,\n" + "and --verbose to get verbose test output.\n" + ) + ) + parser.add_argument( + "--preserve", + action="store_true", + help="Preserve the scratch directory TEST_DIR/scratch/ for debugging purposes." + ) + parser.add_argument("--verbose", action="store_true", help="Verbose test output.") + parser.add_argument("--timeout", default=60, type=int, help="Test case timeout in seconds. Set to 0 to disable timeouts.") + parser.add_argument( + "--exec-prefix", + default=None, + help="Sets the EXEC_PREFIX environment variable. Prefix to invocations of the zstd CLI." + ) + parser.add_argument( + "--zstd", + default=ZSTD_PATH, + help="Sets the ZSTD_BIN environment variable. Path of the zstd CLI." + ) + parser.add_argument( + "--zstdgrep", + default=ZSTDGREP_PATH, + help="Sets the ZSTDGREP_BIN environment variable. Path of the zstdgrep CLI." + ) + parser.add_argument( + "--datagen", + default=DATAGEN_PATH, + help="Sets the DATAGEN_BIN environment variable. Path to the datagen CLI." + ) + parser.add_argument( + "--test-dir", + default=CLI_TEST_DIR, + help=( + "Runs the tests under this directory. " + "Adds TEST_DIR/bin/ to path. " + "Scratch directory located in TEST_DIR/scratch/." + ) + ) + parser.add_argument( + "tests", + nargs="*", + help="Run only these test cases. Can either be paths or test names relative to TEST_DIR/" + ) + args = parser.parse_args() + + if args.timeout <= 0: + args.timeout = None + + args.test_dir = os.path.normpath(os.path.abspath(args.test_dir)) + bin_dir = os.path.join(args.test_dir, "bin") + scratch_dir = os.path.join(args.test_dir, "scratch") + + env = {} + if args.exec_prefix is not None: + env["EXEC_PREFIX"] = args.exec_prefix + env["ZSTD_BIN"] = os.path.abspath(args.zstd) + env["DATAGEN_BIN"] = os.path.abspath(args.datagen) + env["ZSTDGREP_BIN"] = os.path.abspath(args.zstdgrep) + env["COMMON"] = os.path.abspath(os.path.join(args.test_dir, "common")) + env["PATH"] = os.path.abspath(bin_dir) + ":" + os.getenv("PATH", "") + + opts = Options( + env=env, + timeout=args.timeout, + verbose=args.verbose, + preserve=args.preserve, + test_dir=args.test_dir, + scratch_dir=scratch_dir, + ) + + if len(args.tests) == 0: + tests = get_all_tests(opts) + else: + tests = resolve_listed_tests(args.tests, opts) + + success = run_tests(tests, opts) + if success: + sys.exit(0) + else: + sys.exit(1) + From 1fc42de86a53320c056c9a3ca9847eae7ce1262b Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 24 Jan 2022 13:52:08 -0800 Subject: [PATCH 5/8] [CI] Hook cli-tests up to CI Add cli-tests to `make test`. This adds a `python3` dependency to `make test`, but not `make check`. We could make this dependency optional by skipping the tests if `python3` is not present. --- tests/Makefile | 8 +- tests/cli-tests/basic/help.sh | 5 +- tests/cli-tests/basic/version.sh | 5 +- tests/cli-tests/bin/cmp_size | 2 - tests/cli-tests/bin/println | 2 +- tests/cli-tests/common/format.sh | 2 +- tests/cli-tests/common/mtime.sh | 2 +- tests/cli-tests/common/permissions.sh | 2 +- tests/cli-tests/compression/basic.sh | 8 +- tests/cli-tests/compression/format.sh | 2 +- tests/cli-tests/compression/levels.sh | 16 +-- .../compression/levels.sh.stderr.exact | 124 +++++++++--------- .../dict-builder/{no-inputs => no-inputs.sh} | 2 +- .../{no-inputs.exit => no-inputs.sh.exit} | 0 ...stderr.exact => no-inputs.sh.stderr.exact} | 2 +- .../dictionaries/dictionary-mismatch.sh | 4 +- .../dictionary-mismatch.sh.stderr.exact | 10 +- tests/cli-tests/dictionaries/setup_once | 2 +- 18 files changed, 102 insertions(+), 96 deletions(-) rename tests/cli-tests/dict-builder/{no-inputs => no-inputs.sh} (76%) rename tests/cli-tests/dict-builder/{no-inputs.exit => no-inputs.sh.exit} (100%) rename tests/cli-tests/dict-builder/{no-inputs.stderr.exact => no-inputs.sh.stderr.exact} (93%) diff --git a/tests/Makefile b/tests/Makefile index 132fa7a0..cb77b016 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -297,7 +297,7 @@ check: shortest fuzztest: test-fuzzer test-zstream test-decodecorpus .PHONY: test -test: test-zstd test-fullbench test-fuzzer test-zstream test-invalidDictionaries test-legacy test-decodecorpus +test: test-zstd test-fullbench test-fuzzer test-zstream test-invalidDictionaries test-legacy test-decodecorpus test-cli-tests ifeq ($(QEMU_SYS),) test: test-pool endif @@ -322,6 +322,12 @@ test-zstd test-zstd32 test-zstd-nolegacy: datagen file $(ZSTD) EXE_PREFIX="$(QEMU_SYS)" ZSTD_BIN="$(ZSTD)" DATAGEN_BIN=./datagen ./playTests.sh $(ZSTDRTTEST) +test-cli-tests: ZSTD = $(PRGDIR)/zstd +test-cli-tests: zstd datagen + file $(ZSTD) + ./cli-tests/run.py --exec-prefix="$(QEMU_SYS)" --zstd="$(ZSTD)" --datagen=./datagen + + test-fullbench: fullbench datagen $(QEMU_SYS) ./fullbench -i1 $(QEMU_SYS) ./fullbench -i1 -P0 diff --git a/tests/cli-tests/basic/help.sh b/tests/cli-tests/basic/help.sh index c683b6a6..927c3ffb 100755 --- a/tests/cli-tests/basic/help.sh +++ b/tests/cli-tests/basic/help.sh @@ -1,4 +1,7 @@ -#!/bin/sh -e +#!/bin/sh + +set -e + println "+ zstd -h" zstd -h println "+ zstd -H" diff --git a/tests/cli-tests/basic/version.sh b/tests/cli-tests/basic/version.sh index d50de0f3..f75eaa84 100755 --- a/tests/cli-tests/basic/version.sh +++ b/tests/cli-tests/basic/version.sh @@ -1,3 +1,6 @@ -#!/bin/sh -e +#!/bin/sh + +set -e + zstd -V zstd --version diff --git a/tests/cli-tests/bin/cmp_size b/tests/cli-tests/bin/cmp_size index 5afa1c59..8e4bef88 100755 --- a/tests/cli-tests/bin/cmp_size +++ b/tests/cli-tests/bin/cmp_size @@ -1,7 +1,5 @@ #!/bin/sh -# Small utility to - set -e usage() diff --git a/tests/cli-tests/bin/println b/tests/cli-tests/bin/println index 1da24604..494eb18c 100755 --- a/tests/cli-tests/bin/println +++ b/tests/cli-tests/bin/println @@ -1,2 +1,2 @@ -#!/bin/env sh +#!/bin/sh printf '%b\n' "${*}" diff --git a/tests/cli-tests/common/format.sh b/tests/cli-tests/common/format.sh index 20ff0f05..e574e973 100644 --- a/tests/cli-tests/common/format.sh +++ b/tests/cli-tests/common/format.sh @@ -1,6 +1,6 @@ #!/bin/sh -source "$COMMON/platform.sh" +. "$COMMON/platform.sh" zstd_supports_format() { diff --git a/tests/cli-tests/common/mtime.sh b/tests/cli-tests/common/mtime.sh index 7ce931a9..344074d3 100644 --- a/tests/cli-tests/common/mtime.sh +++ b/tests/cli-tests/common/mtime.sh @@ -1,4 +1,4 @@ -source "$COMMON/platform.sh" +. "$COMMON/platform.sh" MTIME="stat -c %Y" case "$UNAME" in diff --git a/tests/cli-tests/common/permissions.sh b/tests/cli-tests/common/permissions.sh index b1f6ea3b..6bce1f0b 100644 --- a/tests/cli-tests/common/permissions.sh +++ b/tests/cli-tests/common/permissions.sh @@ -1,4 +1,4 @@ -source "$COMMON/platform.sh" +. "$COMMON/platform.sh" GET_PERMS="stat -c %a" case "$UNAME" in diff --git a/tests/cli-tests/compression/basic.sh b/tests/cli-tests/compression/basic.sh index 6f0f8793..8b63e407 100755 --- a/tests/cli-tests/compression/basic.sh +++ b/tests/cli-tests/compression/basic.sh @@ -1,7 +1,9 @@ -#!/bin/sh -e +#!/bin/sh -# Uncomment the set -x line for debugging -# set -x +set -e + +# Uncomment the set -v line for debugging +# set -v # Test compression flags and check that they work zstd file ; zstd -t file.zst diff --git a/tests/cli-tests/compression/format.sh b/tests/cli-tests/compression/format.sh index 86fb4008..192fa2cf 100755 --- a/tests/cli-tests/compression/format.sh +++ b/tests/cli-tests/compression/format.sh @@ -1,6 +1,6 @@ #!/bin/sh -source "$COMMON/format.sh" +. "$COMMON/format.sh" set -e diff --git a/tests/cli-tests/compression/levels.sh b/tests/cli-tests/compression/levels.sh index 6bd0aca0..4837790c 100755 --- a/tests/cli-tests/compression/levels.sh +++ b/tests/cli-tests/compression/levels.sh @@ -1,7 +1,7 @@ #!/bin/sh set -e -set -x +set -v datagen > file @@ -12,7 +12,7 @@ zstd -1 file -o file-1.zst zstd -19 file -o file-19.zst zstd -22 --ultra file -o file-22.zst -zstd -t file-{f10,f1,1,19,22}.zst +zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst file-22.zst cmp_size -ne file-19.zst file-22.zst cmp_size -lt file-19.zst file-1.zst @@ -41,10 +41,10 @@ ZSTD_CLEVEL=1 zstd file -o file-1-env.zst ZSTD_CLEVEL=+19 zstd file -o file-19-env.zst ZSTD_CLEVEL=+99 zstd file -o file-99-env.zst -cmp file-f10{,-env}.zst || die "Environment variable failed to set level" -cmp file-1{,-env}.zst || die "Environment variable failed to set level" -cmp file-19{,-env}.zst || die "Environment variable failed to set level" -cmp file-99{,-env}.zst || die "Environment variable failed to set level" +cmp file-f10.zst file-f10-env.zst || die "Environment variable failed to set level" +cmp file-1.zst file-1-env.zst || die "Environment variable failed to set level" +cmp file-19.zst file-19-env.zst || die "Environment variable failed to set level" +cmp file-99.zst file-99-env.zst || die "Environment variable failed to set level" # Test invalid environment clevel is the default level zstd -f file @@ -60,5 +60,5 @@ ZSTD_CLEVEL=5000000000 zstd -f file -o file-env.zst; cmp file.zst file-env.zst ZSTD_CLEVEL=10 zstd -f file -1 -o file-1-env.zst ZSTD_CLEVEL=10 zstd -f file --fast=1 -o file-f1-env.zst -cmp file-1{,-env}.zst || die "Environment variable not overridden" -cmp file-f1{,-env}.zst || die "Environment variable not overridden" +cmp file-1.zst file-1-env.zst || die "Environment variable not overridden" +cmp file-f1.zst file-f1-env.zst || die "Environment variable not overridden" diff --git a/tests/cli-tests/compression/levels.sh.stderr.exact b/tests/cli-tests/compression/levels.sh.stderr.exact index c0b7066f..cb00433e 100644 --- a/tests/cli-tests/compression/levels.sh.stderr.exact +++ b/tests/cli-tests/compression/levels.sh.stderr.exact @@ -1,75 +1,71 @@ -+ datagen -+ zstd --fast=10 file -o file-f10.zst -+ zstd --fast=1 file -o file-f1.zst -+ zstd -1 file -o file-1.zst -+ zstd -19 file -o file-19.zst -+ zstd -22 --ultra file -o file-22.zst -+ zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst file-22.zst -+ cmp_size -ne file-19.zst file-22.zst -+ cmp_size -lt file-19.zst file-1.zst -+ cmp_size -lt file-1.zst file-f1.zst -+ cmp_size -lt file-f1.zst file-f10.zst -+ zstd --fast file -f -+ cmp file.zst file-f1.zst -+ zstd -0 file -o file-0.zst -+ zstd -f file -+ cmp file.zst file-0.zst -+ zstd -99 file -o file-99.zst + +datagen > file + +# Compress with various levels and ensure that their sizes are ordered +zstd --fast=10 file -o file-f10.zst +zstd --fast=1 file -o file-f1.zst +zstd -1 file -o file-1.zst +zstd -19 file -o file-19.zst +zstd -22 --ultra file -o file-22.zst + +zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst file-22.zst + +cmp_size -ne file-19.zst file-22.zst +cmp_size -lt file-19.zst file-1.zst +cmp_size -lt file-1.zst file-f1.zst +cmp_size -lt file-f1.zst file-f10.zst + +# Test default levels +zstd --fast file -f +cmp file.zst file-f1.zst || die "--fast is not level -1" + +zstd -0 file -o file-0.zst +zstd -f file +cmp file.zst file-0.zst || die "Level 0 is not the default level" + +# Test level clamping +zstd -99 file -o file-99.zst Warning : compression level higher than max, reduced to 19 -+ cmp file-19.zst file-99.zst -+ zstd --fast=200000 file -c -+ zstd -t -+ zstd -5000000000 -f file +cmp file-19.zst file-99.zst || die "Level 99 is clamped to 19" +zstd --fast=200000 file -c | zstd -t + +zstd -5000000000 -f file && die "Level too large, must fail" ||: error: numeric value overflows 32-bit unsigned int -+ : -+ zstd --fast=5000000000 -f file +zstd --fast=5000000000 -f file && die "Level too large, must fail" ||: error: numeric value overflows 32-bit unsigned int -+ : -+ ZSTD_CLEVEL=-10 -+ zstd file -o file-f10-env.zst -+ ZSTD_CLEVEL=1 -+ zstd file -o file-1-env.zst -+ ZSTD_CLEVEL=+19 -+ zstd file -o file-19-env.zst -+ ZSTD_CLEVEL=+99 -+ zstd file -o file-99-env.zst + +# Test setting a level through the environment variable +ZSTD_CLEVEL=-10 zstd file -o file-f10-env.zst +ZSTD_CLEVEL=1 zstd file -o file-1-env.zst +ZSTD_CLEVEL=+19 zstd file -o file-19-env.zst +ZSTD_CLEVEL=+99 zstd file -o file-99-env.zst Warning : compression level higher than max, reduced to 19 -+ cmp file-f10.zst file-f10-env.zst -+ cmp file-1.zst file-1-env.zst -+ cmp file-19.zst file-19-env.zst -+ cmp file-99.zst file-99-env.zst -+ zstd -f file -+ ZSTD_CLEVEL=- -+ zstd -f file -o file-env.zst + +cmp file-f10.zst file-f10-env.zst || die "Environment variable failed to set level" +cmp file-1.zst file-1-env.zst || die "Environment variable failed to set level" +cmp file-19.zst file-19-env.zst || die "Environment variable failed to set level" +cmp file-99.zst file-99-env.zst || die "Environment variable failed to set level" + +# Test invalid environment clevel is the default level +zstd -f file +ZSTD_CLEVEL=- zstd -f file -o file-env.zst ; cmp file.zst file-env.zst Ignore environment variable setting ZSTD_CLEVEL=-: not a valid integer value -+ cmp file.zst file-env.zst -+ ZSTD_CLEVEL=+ -+ zstd -f file -o file-env.zst +ZSTD_CLEVEL=+ zstd -f file -o file-env.zst ; cmp file.zst file-env.zst Ignore environment variable setting ZSTD_CLEVEL=+: not a valid integer value -+ cmp file.zst file-env.zst -+ ZSTD_CLEVEL=a -+ zstd -f file -o file-env.zst +ZSTD_CLEVEL=a zstd -f file -o file-env.zst ; cmp file.zst file-env.zst Ignore environment variable setting ZSTD_CLEVEL=a: not a valid integer value -+ cmp file.zst file-env.zst -+ ZSTD_CLEVEL=-a -+ zstd -f file -o file-env.zst +ZSTD_CLEVEL=-a zstd -f file -o file-env.zst ; cmp file.zst file-env.zst Ignore environment variable setting ZSTD_CLEVEL=-a: not a valid integer value -+ cmp file.zst file-env.zst -+ ZSTD_CLEVEL=+a -+ zstd -f file -o file-env.zst +ZSTD_CLEVEL=+a zstd -f file -o file-env.zst ; cmp file.zst file-env.zst Ignore environment variable setting ZSTD_CLEVEL=+a: not a valid integer value -+ cmp file.zst file-env.zst -+ ZSTD_CLEVEL=3a7 -+ zstd -f file -o file-env.zst +ZSTD_CLEVEL=3a7 zstd -f file -o file-env.zst ; cmp file.zst file-env.zst Ignore environment variable setting ZSTD_CLEVEL=3a7: not a valid integer value -+ cmp file.zst file-env.zst -+ ZSTD_CLEVEL=5000000000 -+ zstd -f file -o file-env.zst +ZSTD_CLEVEL=5000000000 zstd -f file -o file-env.zst; cmp file.zst file-env.zst Ignore environment variable setting ZSTD_CLEVEL=5000000000: numeric value too large -+ cmp file.zst file-env.zst -+ ZSTD_CLEVEL=10 -+ zstd -f file -1 -o file-1-env.zst -+ ZSTD_CLEVEL=10 -+ zstd -f file --fast=1 -o file-f1-env.zst -+ cmp file-1.zst file-1-env.zst -+ cmp file-f1.zst file-f1-env.zst + +# Test environment clevel is overridden by command line +ZSTD_CLEVEL=10 zstd -f file -1 -o file-1-env.zst +ZSTD_CLEVEL=10 zstd -f file --fast=1 -o file-f1-env.zst + +cmp file-1.zst file-1-env.zst || die "Environment variable not overridden" +cmp file-f1.zst file-f1-env.zst || die "Environment variable not overridden" diff --git a/tests/cli-tests/dict-builder/no-inputs b/tests/cli-tests/dict-builder/no-inputs.sh similarity index 76% rename from tests/cli-tests/dict-builder/no-inputs rename to tests/cli-tests/dict-builder/no-inputs.sh index d37fdce5..416b8374 100755 --- a/tests/cli-tests/dict-builder/no-inputs +++ b/tests/cli-tests/dict-builder/no-inputs.sh @@ -1,3 +1,3 @@ #!/bin/sh -set -x +set -v zstd --train diff --git a/tests/cli-tests/dict-builder/no-inputs.exit b/tests/cli-tests/dict-builder/no-inputs.sh.exit similarity index 100% rename from tests/cli-tests/dict-builder/no-inputs.exit rename to tests/cli-tests/dict-builder/no-inputs.sh.exit diff --git a/tests/cli-tests/dict-builder/no-inputs.stderr.exact b/tests/cli-tests/dict-builder/no-inputs.sh.stderr.exact similarity index 93% rename from tests/cli-tests/dict-builder/no-inputs.stderr.exact rename to tests/cli-tests/dict-builder/no-inputs.sh.stderr.exact index b3b5599d..d7b3ea02 100644 --- a/tests/cli-tests/dict-builder/no-inputs.stderr.exact +++ b/tests/cli-tests/dict-builder/no-inputs.sh.stderr.exact @@ -1,4 +1,4 @@ -+ zstd --train +zstd --train ! Warning : nb of samples too low for proper processing ! ! Please provide _one file per sample_. ! Alternatively, split files into fixed-size blocks representative of samples, with -B# diff --git a/tests/cli-tests/dictionaries/dictionary-mismatch.sh b/tests/cli-tests/dictionaries/dictionary-mismatch.sh index 2b5d5a0d..8264ccca 100755 --- a/tests/cli-tests/dictionaries/dictionary-mismatch.sh +++ b/tests/cli-tests/dictionaries/dictionary-mismatch.sh @@ -1,6 +1,6 @@ #!/bin/sh -source "$COMMON/platform.sh" +. "$COMMON/platform.sh" set -e @@ -22,7 +22,7 @@ if [ false ]; then datagen -g1000 -s0 > file0 fi -set -x +set -v zstd files/0 -D dicts/0 zstd -t files/0.zst -D dicts/0 zstd -t files/0.zst -D dicts/1 && die "Must fail" ||: diff --git a/tests/cli-tests/dictionaries/dictionary-mismatch.sh.stderr.exact b/tests/cli-tests/dictionaries/dictionary-mismatch.sh.stderr.exact index 399a3207..0afea722 100644 --- a/tests/cli-tests/dictionaries/dictionary-mismatch.sh.stderr.exact +++ b/tests/cli-tests/dictionaries/dictionary-mismatch.sh.stderr.exact @@ -1,8 +1,6 @@ -+ zstd files/0 -D dicts/0 -+ zstd -t files/0.zst -D dicts/0 -+ zstd -t files/0.zst -D dicts/1 +zstd files/0 -D dicts/0 +zstd -t files/0.zst -D dicts/0 +zstd -t files/0.zst -D dicts/1 && die "Must fail" ||: files/0.zst : Decoding error (36) : Dictionary mismatch -+ : -+ zstd -t files/0.zst +zstd -t files/0.zst && die "Must fail" ||: files/0.zst : Decoding error (36) : Dictionary mismatch -+ : diff --git a/tests/cli-tests/dictionaries/setup_once b/tests/cli-tests/dictionaries/setup_once index 6316df16..1241c578 100755 --- a/tests/cli-tests/dictionaries/setup_once +++ b/tests/cli-tests/dictionaries/setup_once @@ -2,7 +2,7 @@ set -e -source "$COMMON/platform.sh" +. "$COMMON/platform.sh" mkdir files/ dicts/ From 8d65f87416740444da4a713d2778b78c11c6b38b Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Sun, 30 Jan 2022 12:16:16 -0800 Subject: [PATCH 6/8] Fix static analysis false-positives * It couldn't detect that the `fastCoverParams` can't be non-null, since it was just an assertion. * It thought we were accesing `wksp->dtable` beyond the bounds because we were using it to set the `workSpace` value. Instead, compute the workspace size used in a different way. --- lib/common/fse.h | 2 +- lib/common/fse_decompress.c | 3 ++- programs/dibio.c | 8 +++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/common/fse.h b/lib/common/fse.h index 714bfd3e..bd29e9ac 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -353,7 +353,7 @@ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); /**< build a fake FSE_DTable, designed to always generate the same symbolValue */ -#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) +#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c index a5a35801..bc0c1be2 100644 --- a/lib/common/fse_decompress.c +++ b/lib/common/fse_decompress.c @@ -342,7 +342,8 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( } if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); - workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog); + assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize); + workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); diff --git a/programs/dibio.c b/programs/dibio.c index 147d1e7b..fddbc9e5 100644 --- a/programs/dibio.c +++ b/programs/dibio.c @@ -31,6 +31,7 @@ #include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */ #include "../lib/common/debug.h" /* assert */ #include "../lib/common/mem.h" /* read */ +#include "../lib/zstd_errors.h" #include "dibio.h" @@ -380,7 +381,7 @@ int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize, srcBuffer, &loadedSize, sampleSizes, fs.nbSamples, fileNamesTable, nbFiles, chunkSize, displayLevel); - { size_t dictSize; + { size_t dictSize = ZSTD_error_GENERIC; if (params) { DiB_fillNoise((char*)srcBuffer + loadedSize, NOISELENGTH); /* guard band, for end of buffer condition */ dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, @@ -400,8 +401,7 @@ int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize, dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer, sampleSizes, nbSamplesLoaded, *coverParams); } - } else { - assert(fastCoverParams != NULL); + } else if (fastCoverParams != NULL) { if (optimize) { dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, srcBuffer, sampleSizes, nbSamplesLoaded, @@ -416,6 +416,8 @@ int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize, dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, srcBuffer, sampleSizes, nbSamplesLoaded, *fastCoverParams); } + } else { + assert(0 /* Impossible */); } if (ZDICT_isError(dictSize)) { DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */ From cc0657f27d81da8a7db3aa199d24a566b95c4dfe Mon Sep 17 00:00:00 2001 From: Yonatan Komornik <11005061+yoniko@users.noreply.github.com> Date: Mon, 31 Jan 2022 15:43:41 -0800 Subject: [PATCH 7/8] AsyncIO compression part 2 - added async read and asyncio to compression code (#3022) * Compression asyncio: - Added asyncio functionality for compression flow - Added ReadPool for async reads, implemented in both comp and decomp flows --- programs/README.md | 1 + programs/fileio.c | 423 ++++++++++++++++++-------------------- programs/fileio_asyncio.c | 353 ++++++++++++++++++++++++++----- programs/fileio_asyncio.h | 85 ++++++-- programs/fileio_common.h | 2 +- programs/fileio_types.h | 4 +- programs/zstdcli.c | 8 +- tests/playTests.sh | 15 +- 8 files changed, 590 insertions(+), 301 deletions(-) diff --git a/programs/README.md b/programs/README.md index 5570f90c..b88cf78d 100644 --- a/programs/README.md +++ b/programs/README.md @@ -164,6 +164,7 @@ Advanced arguments : --filelist FILE : read list of files to operate upon from FILE --output-dir-flat DIR : processed files are stored into DIR --output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure +--[no-]asyncio : use asynchronous IO (default: enabled) --[no-]check : during compression, add XXH64 integrity checksum to frame (default: enabled). If specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate). -- : All arguments after "--" are treated as files diff --git a/programs/fileio.c b/programs/fileio.c index 64909b96..502f69c1 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -289,7 +289,7 @@ FIO_prefs_t* FIO_createPreferences(void) ret->literalCompressionMode = ZSTD_ps_auto; ret->excludeCompressedFiles = 0; ret->allowBlockDevices = 0; - ret->asyncIO = 0; + ret->asyncIO = AIO_supported(); return ret; } @@ -848,16 +848,12 @@ static int FIO_removeMultiFilesWarning(FIO_ctx_t* const fCtx, const FIO_prefs_t* * Compression ************************************************************************/ typedef struct { - FILE* srcFile; - FILE* dstFile; - void* srcBuffer; - size_t srcBufferSize; - void* dstBuffer; - size_t dstBufferSize; void* dictBuffer; size_t dictBufferSize; const char* dictFileName; ZSTD_CStream* cctx; + WritePoolCtx_t *writeCtx; + ReadPoolCtx_t *readCtx; } cRess_t; /** ZSTD_cycleLog() : @@ -906,9 +902,6 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, if (ress.cctx == NULL) EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx", strerror(errno)); - ress.srcBufferSize = ZSTD_CStreamInSize(); - ress.srcBuffer = malloc(ress.srcBufferSize); - ress.dstBufferSize = ZSTD_CStreamOutSize(); /* need to update memLimit before calling createDictBuffer * because of memLimit check inside it */ @@ -916,10 +909,10 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize; FIO_adjustParamsForPatchFromMode(prefs, &comprParams, UTIL_getFileSize(dictFileName), ssSize > 0 ? ssSize : maxSrcFileSize, cLevel); } - ress.dstBuffer = malloc(ress.dstBufferSize); ress.dictBufferSize = FIO_createDictBuffer(&ress.dictBuffer, dictFileName, prefs); /* works with dictFileName==NULL */ - if (!ress.srcBuffer || !ress.dstBuffer) - EXM_THROW(31, "allocation error : not enough memory"); + + ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_CStreamOutSize()); + ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_CStreamInSize()); /* Advanced parameters, including dictionary */ if (dictFileName && (ress.dictBuffer==NULL)) @@ -982,9 +975,9 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, static void FIO_freeCResources(const cRess_t* const ress) { - free(ress->srcBuffer); - free(ress->dstBuffer); free(ress->dictBuffer); + AIO_WritePool_free(ress->writeCtx); + AIO_ReadPool_free(ress->readCtx); ZSTD_freeCStream(ress->cctx); /* never fails */ } @@ -997,6 +990,7 @@ FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but no { unsigned long long inFileSize = 0, outFileSize = 0; z_stream strm; + IOJob_t *writeJob = NULL; if (compressionLevel > Z_BEST_COMPRESSION) compressionLevel = Z_BEST_COMPRESSION; @@ -1012,51 +1006,58 @@ FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but no EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret); } } + writeJob = AIO_WritePool_acquireJob(ress->writeCtx); strm.next_in = 0; strm.avail_in = 0; - strm.next_out = (Bytef*)ress->dstBuffer; - strm.avail_out = (uInt)ress->dstBufferSize; + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = (uInt)writeJob->bufferSize; while (1) { int ret; if (strm.avail_in == 0) { - size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile); - if (inSize == 0) break; - inFileSize += inSize; - strm.next_in = (z_const unsigned char*)ress->srcBuffer; - strm.avail_in = (uInt)inSize; + AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize()); + if (ress->readCtx->srcBufferLoaded == 0) break; + inFileSize += ress->readCtx->srcBufferLoaded; + strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer; + strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded; } - ret = deflate(&strm, Z_NO_FLUSH); + + { + size_t const availBefore = strm.avail_in; + ret = deflate(&strm, Z_NO_FLUSH); + AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in); + } + if (ret != Z_OK) EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret); - { size_t const cSize = ress->dstBufferSize - strm.avail_out; + { size_t const cSize = writeJob->bufferSize - strm.avail_out; if (cSize) { - if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize) - EXM_THROW(73, "Write error : cannot write to output file : %s ", strerror(errno)); + writeJob->usedBufferSize = cSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); outFileSize += cSize; - strm.next_out = (Bytef*)ress->dstBuffer; - strm.avail_out = (uInt)ress->dstBufferSize; - } } + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = (uInt)writeJob->bufferSize; + } } if (srcFileSize == UTIL_FILESIZE_UNKNOWN) { DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", - (unsigned)(inFileSize>>20), - (double)outFileSize/inFileSize*100) + (unsigned)(inFileSize>>20), + (double)outFileSize/inFileSize*100) } else { DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%% ", - (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), - (double)outFileSize/inFileSize*100); - } } + (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), + (double)outFileSize/inFileSize*100); + } } while (1) { int const ret = deflate(&strm, Z_FINISH); - { size_t const cSize = ress->dstBufferSize - strm.avail_out; + { size_t const cSize = writeJob->bufferSize - strm.avail_out; if (cSize) { - if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize) - EXM_THROW(75, "Write error : %s ", strerror(errno)); + writeJob->usedBufferSize = cSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); outFileSize += cSize; - strm.next_out = (Bytef*)ress->dstBuffer; - strm.avail_out = (uInt)ress->dstBufferSize; - } } + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = (uInt)writeJob->bufferSize; + } } if (ret == Z_STREAM_END) break; if (ret != Z_BUF_ERROR) EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret); @@ -1067,6 +1068,8 @@ FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but no EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret); } } *readsize = inFileSize; + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); return outFileSize; } #endif @@ -1082,6 +1085,7 @@ FIO_compressLzmaFrame(cRess_t* ress, lzma_stream strm = LZMA_STREAM_INIT; lzma_action action = LZMA_RUN; lzma_ret ret; + IOJob_t *writeJob = NULL; if (compressionLevel < 0) compressionLevel = 0; if (compressionLevel > 9) compressionLevel = 9; @@ -1099,31 +1103,37 @@ FIO_compressLzmaFrame(cRess_t* ress, EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret); } + writeJob =AIO_WritePool_acquireJob(ress->writeCtx); + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = (uInt)writeJob->bufferSize; strm.next_in = 0; strm.avail_in = 0; - strm.next_out = (BYTE*)ress->dstBuffer; - strm.avail_out = ress->dstBufferSize; while (1) { if (strm.avail_in == 0) { - size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile); - if (inSize == 0) action = LZMA_FINISH; + size_t const inSize = AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize()); + if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH; inFileSize += inSize; - strm.next_in = (BYTE const*)ress->srcBuffer; - strm.avail_in = inSize; + strm.next_in = (BYTE const*)ress->readCtx->srcBuffer; + strm.avail_in = ress->readCtx->srcBufferLoaded; + } + + { + size_t const availBefore = strm.avail_in; + ret = lzma_code(&strm, action); + AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in); } - ret = lzma_code(&strm, action); if (ret != LZMA_OK && ret != LZMA_STREAM_END) EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret); - { size_t const compBytes = ress->dstBufferSize - strm.avail_out; + { size_t const compBytes = writeJob->bufferSize - strm.avail_out; if (compBytes) { - if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes) - EXM_THROW(85, "Write error : %s", strerror(errno)); + writeJob->usedBufferSize = compBytes; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); outFileSize += compBytes; - strm.next_out = (BYTE*)ress->dstBuffer; - strm.avail_out = ress->dstBufferSize; + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = writeJob->bufferSize; } } if (srcFileSize == UTIL_FILESIZE_UNKNOWN) DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", @@ -1139,6 +1149,9 @@ FIO_compressLzmaFrame(cRess_t* ress, lzma_end(&strm); *readsize = inFileSize; + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); + return outFileSize; } #endif @@ -1164,15 +1177,18 @@ FIO_compressLz4Frame(cRess_t* ress, LZ4F_preferences_t prefs; LZ4F_compressionContext_t ctx; + IOJob_t* writeJob = AIO_WritePool_acquireJob(ress->writeCtx); + LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION); if (LZ4F_isError(errorCode)) EXM_THROW(31, "zstd: failed to create lz4 compression context"); memset(&prefs, 0, sizeof(prefs)); - assert(blockSize <= ress->srcBufferSize); + assert(blockSize <= ress->readCtx->base.jobBufferSize); - prefs.autoFlush = 1; + /* autoflush off to mitigate a bug in lz4<=1.9.3 for compression level 12 */ + prefs.autoFlush = 0; prefs.compressionLevel = compressionLevel; prefs.frameInfo.blockMode = LZ4F_blockLinked; prefs.frameInfo.blockSizeID = LZ4F_max64KB; @@ -1180,27 +1196,25 @@ FIO_compressLz4Frame(cRess_t* ress, #if LZ4_VERSION_NUMBER >= 10600 prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize; #endif - assert(LZ4F_compressBound(blockSize, &prefs) <= ress->dstBufferSize); + assert(LZ4F_compressBound(blockSize, &prefs) <= writeJob->bufferSize); { - size_t readSize; - size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs); + size_t headerSize = LZ4F_compressBegin(ctx, writeJob->buffer, writeJob->bufferSize, &prefs); if (LZ4F_isError(headerSize)) EXM_THROW(33, "File header generation failed : %s", LZ4F_getErrorName(headerSize)); - if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize) - EXM_THROW(34, "Write error : %s (cannot write header)", strerror(errno)); + writeJob->usedBufferSize = headerSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); outFileSize += headerSize; /* Read first block */ - readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile); - inFileSize += readSize; + inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize); /* Main Loop */ - while (readSize>0) { - size_t const outSize = LZ4F_compressUpdate(ctx, - ress->dstBuffer, ress->dstBufferSize, - ress->srcBuffer, readSize, NULL); + while (ress->readCtx->srcBufferLoaded) { + size_t inSize = MIN(blockSize, ress->readCtx->srcBufferLoaded); + size_t const outSize = LZ4F_compressUpdate(ctx, writeJob->buffer, writeJob->bufferSize, + ress->readCtx->srcBuffer, inSize, NULL); if (LZ4F_isError(outSize)) EXM_THROW(35, "zstd: %s: lz4 compression failed : %s", srcFileName, LZ4F_getErrorName(outSize)); @@ -1216,33 +1230,29 @@ FIO_compressLz4Frame(cRess_t* ress, } /* Write Block */ - { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile); - if (sizeCheck != outSize) - EXM_THROW(36, "Write error : %s", strerror(errno)); - } + writeJob->usedBufferSize = outSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); /* Read next block */ - readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile); - inFileSize += readSize; + AIO_ReadPool_consumeBytes(ress->readCtx, inSize); + inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize); } - if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName); /* End of Stream mark */ - headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL); + headerSize = LZ4F_compressEnd(ctx, writeJob->buffer, writeJob->bufferSize, NULL); if (LZ4F_isError(headerSize)) EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s", srcFileName, LZ4F_getErrorName(headerSize)); - { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile); - if (sizeCheck != headerSize) - EXM_THROW(39, "Write error : %s (cannot write end of stream)", - strerror(errno)); - } + writeJob->usedBufferSize = headerSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); outFileSize += headerSize; } *readsize = inFileSize; LZ4F_freeCompressionContext(ctx); + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); return outFileSize; } @@ -1257,8 +1267,8 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx, int compressionLevel, U64* readsize) { cRess_t const ress = *ressPtr; - FILE* const srcFile = ress.srcFile; - FILE* const dstFile = ress.dstFile; + IOJob_t *writeJob = AIO_WritePool_acquireJob(ressPtr->writeCtx); + U64 compressedfilesize = 0; ZSTD_EndDirective directive = ZSTD_e_continue; U64 pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; @@ -1303,12 +1313,12 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx, do { size_t stillToFlush; /* Fill input Buffer */ - size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile); - ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 }; + size_t const inSize = AIO_ReadPool_fillBuffer(ress.readCtx, ZSTD_CStreamInSize()); + ZSTD_inBuffer inBuff = { ress.readCtx->srcBuffer, ress.readCtx->srcBufferLoaded, 0 }; DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize); *readsize += inSize; - if ((inSize == 0) || (*readsize == fileSize)) + if ((ress.readCtx->srcBufferLoaded == 0) || (*readsize == fileSize)) directive = ZSTD_e_end; stillToFlush = 1; @@ -1316,9 +1326,10 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx, || (directive == ZSTD_e_end && stillToFlush != 0) ) { size_t const oldIPos = inBuff.pos; - ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 }; + ZSTD_outBuffer outBuff= { writeJob->buffer, writeJob->bufferSize, 0 }; size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx); CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive)); + AIO_ReadPool_consumeBytes(ress.readCtx, inBuff.pos - oldIPos); /* count stats */ inputPresented++; @@ -1327,12 +1338,10 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx, /* Write compressed stream */ DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n", - (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos); + (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos); if (outBuff.pos) { - size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile); - if (sizeCheck != outBuff.pos) - EXM_THROW(25, "Write error : %s (cannot write compressed block)", - strerror(errno)); + writeJob->usedBufferSize = outBuff.pos; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); compressedfilesize += outBuff.pos; } @@ -1464,14 +1473,14 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx, } /* while ((inBuff.pos != inBuff.size) */ } while (directive != ZSTD_e_end); - if (ferror(srcFile)) { - EXM_THROW(26, "Read error : I/O error"); - } if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) { EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B", (unsigned long long)*readsize, (unsigned long long)fileSize); } + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ressPtr->writeCtx); + return compressedfilesize; } @@ -1572,7 +1581,7 @@ FIO_compressFilename_internal(FIO_ctx_t* const fCtx, /*! FIO_compressFilename_dstFile() : - * open dstFileName, or pass-through if ress.dstFile != NULL, + * open dstFileName, or pass-through if ress.file != NULL, * then start compression with FIO_compressFilename_internal(). * Manages source removal (--rm) and file permissions transfer. * note : ress.srcFile must be != NULL, @@ -1591,8 +1600,9 @@ static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx, int result; stat_t statbuf; int transferMTime = 0; - assert(ress.srcFile != NULL); - if (ress.dstFile == NULL) { + FILE *dstFile; + assert(AIO_ReadPool_getFile(ress.readCtx) != NULL); + if (AIO_WritePool_getFile(ress.writeCtx) == NULL) { int dstFilePermissions = DEFAULT_FILE_PERMISSIONS; if ( strcmp (srcFileName, stdinmark) && strcmp (dstFileName, stdoutmark) @@ -1604,8 +1614,9 @@ static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx, closeDstFile = 1; DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName); - ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions); - if (ress.dstFile==NULL) return 1; /* could not open dstFileName */ + dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions); + if (dstFile==NULL) return 1; /* could not open dstFileName */ + AIO_WritePool_setFile(ress.writeCtx, dstFile); /* Must only be added after FIO_openDstFile() succeeds. * Otherwise we may delete the destination file if it already exists, * and the user presses Ctrl-C when asked if they wish to overwrite. @@ -1616,13 +1627,10 @@ static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx, result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); if (closeDstFile) { - FILE* const dstFile = ress.dstFile; - ress.dstFile = NULL; - clearHandler(); DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName); - if (fclose(dstFile)) { /* error closing dstFile */ + if (AIO_WritePool_closeFile(ress.writeCtx)) { /* error closing file */ DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); result=1; } @@ -1668,6 +1676,7 @@ FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx, int compressionLevel) { int result; + FILE* srcFile; DISPLAYLEVEL(6, "FIO_compressFilename_srcFile: %s \n", srcFileName); /* ensure src is not a directory */ @@ -1691,13 +1700,13 @@ FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx, return 0; } - ress.srcFile = FIO_openSrcFile(prefs, srcFileName); - if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */ + srcFile = FIO_openSrcFile(prefs, srcFileName); + if (srcFile == NULL) return 1; /* srcFile could not be opened */ + AIO_ReadPool_setFile(ress.readCtx, srcFile); result = FIO_compressFilename_dstFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); + AIO_ReadPool_closeFile(ress.readCtx); - fclose(ress.srcFile); - ress.srcFile = NULL; if ( prefs->removeSrcFile /* --rm */ && result == 0 /* success */ && strcmp(srcFileName, stdinmark) /* exception : don't erase stdin */ @@ -1844,23 +1853,24 @@ int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx, /* init */ assert(outFileName != NULL || suffix != NULL); if (outFileName != NULL) { /* output into a single destination (stdout typically) */ + FILE *dstFile; if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) { FIO_freeCResources(&ress); return 1; } - ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS); - if (ress.dstFile == NULL) { /* could not open outFileName */ + dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS); + if (dstFile == NULL) { /* could not open outFileName */ error = 1; } else { + AIO_WritePool_setFile(ress.writeCtx, dstFile); for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) { status = FIO_compressFilename_srcFile(fCtx, prefs, ress, outFileName, inFileNamesTable[fCtx->currFileIdx], compressionLevel); if (!status) fCtx->nbFilesProcessed++; error |= status; } - if (fclose(ress.dstFile)) + if (AIO_WritePool_closeFile(ress.writeCtx)) EXM_THROW(29, "Write error (%s) : cannot properly close %s", strerror(errno), outFileName); - ress.dstFile = NULL; } } else { if (outMirroredRootDirName) @@ -1916,13 +1926,10 @@ int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx, /* ************************************************************************** * Decompression ***************************************************************************/ - typedef struct { - void* srcBuffer; - size_t srcBufferSize; - size_t srcBufferLoaded; ZSTD_DStream* dctx; WritePoolCtx_t *writeCtx; + ReadPoolCtx_t *readCtx; } dRess_t; static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName) @@ -1940,11 +1947,6 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) ); CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag)); - ress.srcBufferSize = ZSTD_DStreamInSize(); - ress.srcBuffer = malloc(ress.srcBufferSize); - if (!ress.srcBuffer) - EXM_THROW(61, "Allocation error : not enough memory"); - /* dictionary */ { void* dictBuffer; size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs); @@ -1953,6 +1955,7 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi } ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_DStreamOutSize()); + ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_DStreamInSize()); return ress; } @@ -1960,47 +1963,31 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi static void FIO_freeDResources(dRess_t ress) { CHECK( ZSTD_freeDStream(ress.dctx) ); - free(ress.srcBuffer); AIO_WritePool_free(ress.writeCtx); -} - -/* FIO_consumeDSrcBuffer: - * Consumes len bytes from srcBuffer's start and moves the remaining data and srcBufferLoaded accordingly. */ -static void FIO_consumeDSrcBuffer(dRess_t *ress, size_t len) { - assert(ress->srcBufferLoaded >= len); - ress->srcBufferLoaded -= len; - memmove(ress->srcBuffer, (char *)ress->srcBuffer + len, ress->srcBufferLoaded); + AIO_ReadPool_free(ress.readCtx); } /** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode @return : 0 (no error) */ -static int FIO_passThrough(const FIO_prefs_t* const prefs, - FILE* foutput, FILE* finput, - void* buffer, size_t bufferSize, - size_t alreadyLoaded) +static int FIO_passThrough(dRess_t *ress) { - size_t const blockSize = MIN(64 KB, bufferSize); - size_t readFromInput; - unsigned storedSkips = 0; + size_t const blockSize = MIN(MIN(64 KB, ZSTD_DStreamInSize()), ZSTD_DStreamOutSize()); + IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx); + AIO_ReadPool_fillBuffer(ress->readCtx, blockSize); - /* assumption : ress->srcBufferLoaded bytes already loaded and stored within buffer */ - { size_t const sizeCheck = fwrite(buffer, 1, alreadyLoaded, foutput); - if (sizeCheck != alreadyLoaded) { - DISPLAYLEVEL(1, "Pass-through write error : %s\n", strerror(errno)); - return 1; - } } - - do { - readFromInput = fread(buffer, 1, blockSize, finput); - storedSkips = AIO_fwriteSparse(foutput, buffer, readFromInput, prefs, storedSkips); - } while (readFromInput == blockSize); - if (ferror(finput)) { - DISPLAYLEVEL(1, "Pass-through read error : %s\n", strerror(errno)); - return 1; + while(ress->readCtx->srcBufferLoaded) { + size_t writeSize; + writeSize = MIN(blockSize, ress->readCtx->srcBufferLoaded); + assert(writeSize <= writeJob->bufferSize); + memcpy(writeJob->buffer, ress->readCtx->srcBuffer, writeSize); + writeJob->usedBufferSize = writeSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); + AIO_ReadPool_consumeBytes(ress->readCtx, writeSize); + AIO_ReadPool_fillBuffer(ress->readCtx, blockSize); } - assert(feof(finput)); - - AIO_fwriteSparseEnd(prefs, foutput, storedSkips); + assert(ress->readCtx->reachedEof); + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); return 0; } @@ -2018,7 +2005,7 @@ FIO_zstdErrorHelp(const FIO_prefs_t* const prefs, return; /* Try to decode the frame header */ - err = ZSTD_getFrameHeader(&header, ress->srcBuffer, ress->srcBufferLoaded); + err = ZSTD_getFrameHeader(&header, ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded); if (err == 0) { unsigned long long const windowSize = header.windowSize; unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0); @@ -2041,7 +2028,7 @@ FIO_zstdErrorHelp(const FIO_prefs_t* const prefs, */ #define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2)) static unsigned long long -FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput, +FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, const FIO_prefs_t* const prefs, const char* srcFileName, U64 alreadyDecoded) /* for multi-frames streams */ @@ -2057,16 +2044,11 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput, ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only); /* Header loading : ensures ZSTD_getFrameHeader() will succeed */ - { size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX; - if (ress->srcBufferLoaded < toDecode) { - size_t const toRead = toDecode - ress->srcBufferLoaded; - void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded; - ress->srcBufferLoaded += fread(startPosition, 1, toRead, finput); - } } + AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_FRAMEHEADERSIZE_MAX); /* Main decompression Loop */ while (1) { - ZSTD_inBuffer inBuff = { ress->srcBuffer, ress->srcBufferLoaded, 0 }; + ZSTD_inBuffer inBuff = { ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded, 0 }; ZSTD_outBuffer outBuff= { writeJob->buffer, writeJob->bufferSize, 0 }; size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff); const int displayLevel = (g_display_prefs.progressSetting == FIO_ps_always) ? 1 : 2; @@ -2088,7 +2070,7 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput, if (srcFileNameSize > 18) { const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15; DISPLAYUPDATE(displayLevel, "\rDecompress: %2u/%2u files. Current: ...%s : %.*f%s... ", - fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix); + fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix); } else { DISPLAYUPDATE(displayLevel, "\rDecompress: %2u/%2u files. Current: %s : %.*f%s... ", fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, hrs.precision, hrs.value, hrs.suffix); @@ -2098,23 +2080,21 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput, srcFileName, hrs.precision, hrs.value, hrs.suffix); } - FIO_consumeDSrcBuffer(ress, inBuff.pos); + AIO_ReadPool_consumeBytes(ress->readCtx, inBuff.pos); if (readSizeHint == 0) break; /* end of frame */ /* Fill input buffer */ - { size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize); /* support large skippable frames */ - if (ress->srcBufferLoaded < toDecode) { - size_t const toRead = toDecode - ress->srcBufferLoaded; /* > 0 */ - void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded; - size_t const readSize = fread(startPosition, 1, toRead, finput); + { size_t const toDecode = MIN(readSizeHint, ZSTD_DStreamInSize()); /* support large skippable frames */ + if (ress->readCtx->srcBufferLoaded < toDecode) { + size_t const readSize = AIO_ReadPool_fillBuffer(ress->readCtx, toDecode); if (readSize==0) { DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n", - srcFileName); + srcFileName); + AIO_WritePool_releaseIoJob(writeJob); return FIO_ERROR_FRAME_DECODING; } - ress->srcBufferLoaded += readSize; - } } } + } } } AIO_WritePool_releaseIoJob(writeJob); AIO_WritePool_sparseWriteEnd(ress->writeCtx); @@ -2125,7 +2105,7 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput, #ifdef ZSTD_GZDECOMPRESS static unsigned long long -FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName) +FIO_decompressGzFrame(dRess_t* ress, const char* srcFileName) { unsigned long long outFileSize = 0; z_stream strm; @@ -2145,16 +2125,16 @@ FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName) writeJob = AIO_WritePool_acquireJob(ress->writeCtx); strm.next_out = (Bytef*)writeJob->buffer; strm.avail_out = (uInt)writeJob->bufferSize; - strm.avail_in = (uInt)ress->srcBufferLoaded; - strm.next_in = (z_const unsigned char*)ress->srcBuffer; + strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded; + strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer; for ( ; ; ) { int ret; if (strm.avail_in == 0) { - ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile); - if (ress->srcBufferLoaded == 0) flush = Z_FINISH; - strm.next_in = (z_const unsigned char*)ress->srcBuffer; - strm.avail_in = (uInt)ress->srcBufferLoaded; + AIO_ReadPool_consumeAndRefill(ress->readCtx); + if (ress->readCtx->srcBufferLoaded == 0) flush = Z_FINISH; + strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer; + strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded; } ret = inflate(&strm, flush); if (ret == Z_BUF_ERROR) { @@ -2177,7 +2157,7 @@ FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName) if (ret == Z_STREAM_END) break; } - FIO_consumeDSrcBuffer(ress, ress->srcBufferLoaded - strm.avail_in); + AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in); if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */ && (decodingError==0) ) { @@ -2192,7 +2172,7 @@ FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName) #ifdef ZSTD_LZMADECOMPRESS static unsigned long long -FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, +FIO_decompressLzmaFrame(dRess_t* ress, const char* srcFileName, int plain_lzma) { unsigned long long outFileSize = 0; @@ -2220,16 +2200,16 @@ FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, writeJob = AIO_WritePool_acquireJob(ress->writeCtx); strm.next_out = (Bytef*)writeJob->buffer; strm.avail_out = (uInt)writeJob->bufferSize; - strm.next_in = (BYTE const*)ress->srcBuffer; - strm.avail_in = ress->srcBufferLoaded; + strm.next_in = (BYTE const*)ress->readCtx->srcBuffer; + strm.avail_in = ress->readCtx->srcBufferLoaded; for ( ; ; ) { lzma_ret ret; if (strm.avail_in == 0) { - ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile); - if (ress->srcBufferLoaded == 0) action = LZMA_FINISH; - strm.next_in = (BYTE const*)ress->srcBuffer; - strm.avail_in = ress->srcBufferLoaded; + AIO_ReadPool_consumeAndRefill(ress->readCtx); + if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH; + strm.next_in = (BYTE const*)ress->readCtx->srcBuffer; + strm.avail_in = ress->readCtx->srcBufferLoaded; } ret = lzma_code(&strm, action); @@ -2253,7 +2233,7 @@ FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, if (ret == LZMA_STREAM_END) break; } - FIO_consumeDSrcBuffer(ress, ress->srcBufferLoaded - strm.avail_in); + AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in); lzma_end(&strm); AIO_WritePool_releaseIoJob(writeJob); AIO_WritePool_sparseWriteEnd(ress->writeCtx); @@ -2263,8 +2243,7 @@ FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, #ifdef ZSTD_LZ4DECOMPRESS static unsigned long long -FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile, - const char* srcFileName) +FIO_decompressLz4Frame(dRess_t* ress, const char* srcFileName) { unsigned long long filesize = 0; LZ4F_errorCode_t nextToLoad = 4; @@ -2282,34 +2261,27 @@ FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile, /* Main Loop */ for (;nextToLoad;) { - size_t readSize; size_t pos = 0; size_t decodedBytes = writeJob->bufferSize; int fullBufferDecoded = 0; /* Read input */ - nextToLoad = MIN(nextToLoad, ress->srcBufferSize-ress->srcBufferLoaded); - readSize = fread((char *)ress->srcBuffer + ress->srcBufferLoaded, 1, nextToLoad, srcFile); - if(!readSize && ferror(srcFile)) { - DISPLAYLEVEL(1, "zstd: %s: read error \n", srcFileName); - decodingError=1; - break; - } - if(!readSize && !ress->srcBufferLoaded) break; /* reached end of file */ - ress->srcBufferLoaded += readSize; + AIO_ReadPool_fillBuffer(ress->readCtx, nextToLoad); + if(!ress->readCtx->srcBufferLoaded) break; /* reached end of file */ - while ((pos < ress->srcBufferLoaded) || fullBufferDecoded) { /* still to read, or still to flush */ + while ((pos < ress->readCtx->srcBufferLoaded) || fullBufferDecoded) { /* still to read, or still to flush */ /* Decode Input (at least partially) */ - size_t remaining = ress->srcBufferLoaded - pos; + size_t remaining = ress->readCtx->srcBufferLoaded - pos; decodedBytes = writeJob->bufferSize; - nextToLoad = LZ4F_decompress(dCtx, writeJob->buffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL); + nextToLoad = LZ4F_decompress(dCtx, writeJob->buffer, &decodedBytes, (char*)(ress->readCtx->srcBuffer)+pos, + &remaining, NULL); if (LZ4F_isError(nextToLoad)) { DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n", srcFileName, LZ4F_getErrorName(nextToLoad)); decodingError = 1; nextToLoad = 0; break; } pos += remaining; - assert(pos <= ress->srcBufferLoaded); + assert(pos <= ress->readCtx->srcBufferLoaded); fullBufferDecoded = decodedBytes == writeJob->bufferSize; /* Write Block */ @@ -2324,7 +2296,7 @@ FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile, if (!nextToLoad) break; } - FIO_consumeDSrcBuffer(ress, pos); + AIO_ReadPool_consumeBytes(ress->readCtx, pos); } if (nextToLoad!=0) { DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName); @@ -2348,23 +2320,20 @@ FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile, * 1 : error */ static int FIO_decompressFrames(FIO_ctx_t* const fCtx, - dRess_t ress, FILE* srcFile, - const FIO_prefs_t* const prefs, - const char* dstFileName, const char* srcFileName) + dRess_t ress, const FIO_prefs_t* const prefs, + const char* dstFileName, const char* srcFileName) { unsigned readSomething = 0; unsigned long long filesize = 0; - assert(srcFile != NULL); /* for each frame */ for ( ; ; ) { /* check magic number -> version */ size_t const toRead = 4; - const BYTE* const buf = (const BYTE*)ress.srcBuffer; - if (ress.srcBufferLoaded < toRead) /* load up to 4 bytes for header */ - ress.srcBufferLoaded += fread((char*)ress.srcBuffer + ress.srcBufferLoaded, - (size_t)1, toRead - ress.srcBufferLoaded, srcFile); - if (ress.srcBufferLoaded==0) { + const BYTE* buf; + AIO_ReadPool_fillBuffer(ress.readCtx, toRead); + buf = (const BYTE*)ress.readCtx->srcBuffer; + if (ress.readCtx->srcBufferLoaded==0) { if (readSomething==0) { /* srcFile is empty (which is invalid) */ DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName); return 1; @@ -2372,17 +2341,17 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx, break; /* no more input */ } readSomething = 1; /* there is at least 1 byte in srcFile */ - if (ress.srcBufferLoaded < toRead) { + if (ress.readCtx->srcBufferLoaded < toRead) { DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName); return 1; } - if (ZSTD_isFrame(buf, ress.srcBufferLoaded)) { - unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, srcFile, prefs, srcFileName, filesize); + if (ZSTD_isFrame(buf, ress.readCtx->srcBufferLoaded)) { + unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, prefs, srcFileName, filesize); if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; filesize += frameSize; } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */ #ifdef ZSTD_GZDECOMPRESS - unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFile, srcFileName); + unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFileName); if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; filesize += frameSize; #else @@ -2392,7 +2361,7 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx, } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */ || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */ #ifdef ZSTD_LZMADECOMPRESS - unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFile, srcFileName, buf[0] != 0xFD); + unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFileName, buf[0] != 0xFD); if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; filesize += frameSize; #else @@ -2401,7 +2370,7 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx, #endif } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) { #ifdef ZSTD_LZ4DECOMPRESS - unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFile, srcFileName); + unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFileName); if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; filesize += frameSize; #else @@ -2409,10 +2378,7 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx, return 1; #endif } else if ((prefs->overwrite) && !strcmp (dstFileName, stdoutmark)) { /* pass-through mode */ - return FIO_passThrough(prefs, - AIO_WritePool_getFile(ress.writeCtx), srcFile, - ress.srcBuffer, ress.srcBufferSize, - ress.srcBufferLoaded); + return FIO_passThrough(&ress); } else { DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName); return 1; @@ -2432,15 +2398,14 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx, } /** FIO_decompressDstFile() : - open `dstFileName`, - or path-through if ress.dstFile is already != 0, + open `dstFileName`, or pass-through if writeCtx's file is already != 0, then start decompression process (FIO_decompressFrames()). @return : 0 : OK 1 : operation aborted */ static int FIO_decompressDstFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, - dRess_t ress, FILE* srcFile, + dRess_t ress, const char* dstFileName, const char* srcFileName) { int result; @@ -2472,7 +2437,7 @@ static int FIO_decompressDstFile(FIO_ctx_t* const fCtx, addHandler(dstFileName); } - result = FIO_decompressFrames(fCtx, ress, srcFile, prefs, dstFileName, srcFileName); + result = FIO_decompressFrames(fCtx, ress, prefs, dstFileName, srcFileName); if (releaseDstFile) { clearHandler(); @@ -2513,9 +2478,11 @@ static int FIO_decompressSrcFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs srcFile = FIO_openSrcFile(prefs, srcFileName); if (srcFile==NULL) return 1; - ress.srcBufferLoaded = 0; + AIO_ReadPool_setFile(ress.readCtx, srcFile); - result = FIO_decompressDstFile(fCtx, prefs, ress, srcFile, dstFileName, srcFileName); + result = FIO_decompressDstFile(fCtx, prefs, ress, dstFileName, srcFileName); + + AIO_ReadPool_setFile(ress.readCtx, NULL); /* Close file */ if (fclose(srcFile)) { diff --git a/programs/fileio_asyncio.c b/programs/fileio_asyncio.c index 868720a1..332292bb 100644 --- a/programs/fileio_asyncio.c +++ b/programs/fileio_asyncio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -29,7 +29,8 @@ /** AIO_fwriteSparse() : * @return : storedSkips, * argument for next call to AIO_fwriteSparse() or AIO_fwriteSparseEnd() */ -unsigned AIO_fwriteSparse(FILE* file, +static unsigned +AIO_fwriteSparse(FILE* file, const void* buffer, size_t bufferSize, const FIO_prefs_t* const prefs, unsigned storedSkips) @@ -45,7 +46,7 @@ unsigned AIO_fwriteSparse(FILE* file, if (!prefs->sparseFileSupport) { /* normal write */ size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file); if (sizeCheck != bufferSize) - EXM_THROW(70, "Write error : cannot write decoded block : %s", + EXM_THROW(70, "Write error : cannot write block : %s", strerror(errno)); return 0; } @@ -77,7 +78,7 @@ unsigned AIO_fwriteSparse(FILE* file, storedSkips = 0; /* write the rest */ if (fwrite(ptrT + nb0T, sizeof(size_t), nbNon0ST, file) != nbNon0ST) - EXM_THROW(93, "Write error : cannot write decoded block : %s", + EXM_THROW(93, "Write error : cannot write block : %s", strerror(errno)); } ptrT += seg0SizeT; @@ -106,7 +107,8 @@ unsigned AIO_fwriteSparse(FILE* file, return storedSkips; } -void AIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips) +static void +AIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips) { if (prefs->testMode) assert(storedSkips == 0); if (storedSkips>0) { @@ -127,17 +129,25 @@ void AIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned st * AsyncIO functionality ************************************************************************/ +/* AIO_supported: + * Returns 1 if AsyncIO is supported on the system, 0 otherwise. */ +int AIO_supported(void) { +#ifdef ZSTD_MULTITHREAD + return 1; +#else + return 0; +#endif +} + /* *********************************** * General IoPool implementation *************************************/ static IOJob_t *AIO_IOPool_createIoJob(IOPoolCtx_t *ctx, size_t bufferSize) { - void *buffer; - IOJob_t *job; - job = (IOJob_t*) malloc(sizeof(IOJob_t)); - buffer = malloc(bufferSize); + IOJob_t* const job = (IOJob_t*) malloc(sizeof(IOJob_t)); + void* const buffer = malloc(bufferSize); if(!job || !buffer) - EXM_THROW(101, "Allocation error : not enough memory"); + EXM_THROW(101, "Allocation error : not enough memory"); job->buffer = buffer; job->bufferSize = bufferSize; job->usedBufferSize = 0; @@ -151,49 +161,47 @@ static IOJob_t *AIO_IOPool_createIoJob(IOPoolCtx_t *ctx, size_t bufferSize) { /* AIO_IOPool_createThreadPool: * Creates a thread pool and a mutex for threaded IO pool. * Displays warning if asyncio is requested but MT isn't available. */ -static void AIO_IOPool_createThreadPool(IOPoolCtx_t *ctx, const FIO_prefs_t *prefs) { +static void AIO_IOPool_createThreadPool(IOPoolCtx_t* ctx, const FIO_prefs_t* prefs) { ctx->threadPool = NULL; if(prefs->asyncIO) { if (ZSTD_pthread_mutex_init(&ctx->ioJobsMutex, NULL)) - EXM_THROW(102,"Failed creating write availableJobs mutex"); + EXM_THROW(102,"Failed creating write availableJobs mutex"); /* We want MAX_IO_JOBS-2 queue items because we need to always have 1 free buffer to * decompress into and 1 buffer that's actively written to disk and owned by the writing thread. */ assert(MAX_IO_JOBS >= 2); ctx->threadPool = POOL_create(1, MAX_IO_JOBS - 2); if (!ctx->threadPool) - EXM_THROW(104, "Failed creating writer thread pool"); + EXM_THROW(104, "Failed creating writer thread pool"); } } /* AIO_IOPool_init: * Allocates and sets and a new write pool including its included availableJobs. */ -static void AIO_IOPool_init(IOPoolCtx_t *ctx, FIO_prefs_t* const prefs, POOL_function poolFunction, size_t bufferSize) { +static void AIO_IOPool_init(IOPoolCtx_t* ctx, const FIO_prefs_t* prefs, POOL_function poolFunction, size_t bufferSize) { int i; AIO_IOPool_createThreadPool(ctx, prefs); ctx->prefs = prefs; ctx->poolFunction = poolFunction; - ctx->totalIoJobs = ctx->threadPool ? MAX_IO_JOBS : 1; + ctx->totalIoJobs = ctx->threadPool ? MAX_IO_JOBS : 2; ctx->availableJobsCount = ctx->totalIoJobs; for(i=0; i < ctx->availableJobsCount; i++) { ctx->availableJobs[i] = AIO_IOPool_createIoJob(ctx, bufferSize); } + ctx->jobBufferSize = bufferSize; ctx->file = NULL; } /* AIO_IOPool_releaseIoJob: * Releases an acquired job back to the pool. Doesn't execute the job. */ -static void AIO_IOPool_releaseIoJob(IOJob_t *job) { - IOPoolCtx_t *ctx = (IOPoolCtx_t *) job->ctx; - if(ctx->threadPool) { +static void AIO_IOPool_releaseIoJob(IOJob_t* job) { + IOPoolCtx_t* const ctx = (IOPoolCtx_t *) job->ctx; + if(ctx->threadPool) ZSTD_pthread_mutex_lock(&ctx->ioJobsMutex); - assert(ctx->availableJobsCount < MAX_IO_JOBS); - ctx->availableJobs[ctx->availableJobsCount++] = job; + assert(ctx->availableJobsCount < ctx->totalIoJobs); + ctx->availableJobs[ctx->availableJobsCount++] = job; + if(ctx->threadPool) ZSTD_pthread_mutex_unlock(&ctx->ioJobsMutex); - } else { - assert(ctx->availableJobsCount == 0); - ctx->availableJobsCount++; - } } /* AIO_IOPool_join: @@ -225,19 +233,15 @@ static void AIO_IOPool_destroy(IOPoolCtx_t* ctx) { /* AIO_IOPool_acquireJob: * Returns an available io job to be used for a future io. */ -static IOJob_t* AIO_IOPool_acquireJob(IOPoolCtx_t *ctx) { +static IOJob_t* AIO_IOPool_acquireJob(IOPoolCtx_t* ctx) { IOJob_t *job; assert(ctx->file != NULL || ctx->prefs->testMode); - if(ctx->threadPool) { + if(ctx->threadPool) ZSTD_pthread_mutex_lock(&ctx->ioJobsMutex); - assert(ctx->availableJobsCount > 0); - job = (IOJob_t*) ctx->availableJobs[--ctx->availableJobsCount]; + assert(ctx->availableJobsCount > 0); + job = (IOJob_t*) ctx->availableJobs[--ctx->availableJobsCount]; + if(ctx->threadPool) ZSTD_pthread_mutex_unlock(&ctx->ioJobsMutex); - } else { - assert(ctx->availableJobsCount == 1); - ctx->availableJobsCount--; - job = (IOJob_t*)ctx->availableJobs[0]; - } job->usedBufferSize = 0; job->file = ctx->file; job->offset = 0; @@ -249,22 +253,22 @@ static IOJob_t* AIO_IOPool_acquireJob(IOPoolCtx_t *ctx) { * Sets the destination file for future files in the pool. * Requires completion of all queues write jobs and release of all otherwise acquired jobs. * Also requires ending of sparse write if a previous file was used in sparse mode. */ -static void AIO_IOPool_setFile(IOPoolCtx_t *ctx, FILE* file) { +static void AIO_IOPool_setFile(IOPoolCtx_t* ctx, FILE* file) { assert(ctx!=NULL); AIO_IOPool_join(ctx); assert(ctx->availableJobsCount == ctx->totalIoJobs); ctx->file = file; } -static FILE* AIO_IOPool_getFile(IOPoolCtx_t *ctx) { +static FILE* AIO_IOPool_getFile(const IOPoolCtx_t* ctx) { return ctx->file; } /* AIO_IOPool_enqueueJob: * Enqueues an io job for execution. * The queued job shouldn't be used directly after queueing it. */ -static void AIO_IOPool_enqueueJob(IOJob_t *job) { - IOPoolCtx_t* ctx = (IOPoolCtx_t *)job->ctx; +static void AIO_IOPool_enqueueJob(IOJob_t* job) { + IOPoolCtx_t* const ctx = (IOPoolCtx_t *)job->ctx; if(ctx->threadPool) POOL_add(ctx->threadPool, ctx->poolFunction, job); else @@ -277,7 +281,7 @@ static void AIO_IOPool_enqueueJob(IOJob_t *job) { /* AIO_WritePool_acquireJob: * Returns an available write job to be used for a future write. */ -IOJob_t* AIO_WritePool_acquireJob(WritePoolCtx_t *ctx) { +IOJob_t* AIO_WritePool_acquireJob(WritePoolCtx_t* ctx) { return AIO_IOPool_acquireJob(&ctx->base); } @@ -294,7 +298,7 @@ void AIO_WritePool_enqueueAndReacquireWriteJob(IOJob_t **job) { /* AIO_WritePool_sparseWriteEnd: * Ends sparse writes to the current file. * Blocks on completion of all current write jobs before executing. */ -void AIO_WritePool_sparseWriteEnd(WritePoolCtx_t *ctx) { +void AIO_WritePool_sparseWriteEnd(WritePoolCtx_t* ctx) { assert(ctx != NULL); if(ctx->base.threadPool) POOL_joinJobs(ctx->base.threadPool); @@ -306,28 +310,28 @@ void AIO_WritePool_sparseWriteEnd(WritePoolCtx_t *ctx) { * Sets the destination file for future writes in the pool. * Requires completion of all queues write jobs and release of all otherwise acquired jobs. * Also requires ending of sparse write if a previous file was used in sparse mode. */ -void AIO_WritePool_setFile(WritePoolCtx_t *ctx, FILE* file) { +void AIO_WritePool_setFile(WritePoolCtx_t* ctx, FILE* file) { AIO_IOPool_setFile(&ctx->base, file); assert(ctx->storedSkips == 0); } /* AIO_WritePool_getFile: * Returns the file the writePool is currently set to write to. */ -FILE* AIO_WritePool_getFile(WritePoolCtx_t *ctx) { +FILE* AIO_WritePool_getFile(const WritePoolCtx_t* ctx) { return AIO_IOPool_getFile(&ctx->base); } /* AIO_WritePool_releaseIoJob: * Releases an acquired job back to the pool. Doesn't execute the job. */ -void AIO_WritePool_releaseIoJob(IOJob_t *job) { +void AIO_WritePool_releaseIoJob(IOJob_t* job) { AIO_IOPool_releaseIoJob(job); } /* AIO_WritePool_closeFile: * Ends sparse write and closes the writePool's current file and sets the file to NULL. * Requires completion of all queues write jobs and release of all otherwise acquired jobs. */ -int AIO_WritePool_closeFile(WritePoolCtx_t *ctx) { - FILE *dstFile = ctx->base.file; +int AIO_WritePool_closeFile(WritePoolCtx_t* ctx) { + FILE* const dstFile = ctx->base.file; assert(dstFile!=NULL || ctx->base.prefs->testMode!=0); AIO_WritePool_sparseWriteEnd(ctx); AIO_IOPool_setFile(&ctx->base, NULL); @@ -337,16 +341,16 @@ int AIO_WritePool_closeFile(WritePoolCtx_t *ctx) { /* AIO_WritePool_executeWriteJob: * Executes a write job synchronously. Can be used as a function for a thread pool. */ static void AIO_WritePool_executeWriteJob(void* opaque){ - IOJob_t* job = (IOJob_t*) opaque; - WritePoolCtx_t* ctx = (WritePoolCtx_t*) job->ctx; + IOJob_t* const job = (IOJob_t*) opaque; + WritePoolCtx_t* const ctx = (WritePoolCtx_t*) job->ctx; ctx->storedSkips = AIO_fwriteSparse(job->file, job->buffer, job->usedBufferSize, ctx->base.prefs, ctx->storedSkips); AIO_IOPool_releaseIoJob(job); } /* AIO_WritePool_create: * Allocates and sets and a new write pool including its included jobs. */ -WritePoolCtx_t* AIO_WritePool_create(FIO_prefs_t* const prefs, size_t bufferSize) { - WritePoolCtx_t* ctx = (WritePoolCtx_t*) malloc(sizeof(WritePoolCtx_t)); +WritePoolCtx_t* AIO_WritePool_create(const FIO_prefs_t* prefs, size_t bufferSize) { + WritePoolCtx_t* const ctx = (WritePoolCtx_t*) malloc(sizeof(WritePoolCtx_t)); if(!ctx) EXM_THROW(100, "Allocation error : not enough memory"); AIO_IOPool_init(&ctx->base, prefs, AIO_WritePool_executeWriteJob, bufferSize); ctx->storedSkips = 0; @@ -363,3 +367,256 @@ void AIO_WritePool_free(WritePoolCtx_t* ctx) { assert(ctx->storedSkips==0); free(ctx); } + + +/* *********************************** + * ReadPool implementation + *************************************/ +static void AIO_ReadPool_releaseAllCompletedJobs(ReadPoolCtx_t* ctx) { + int i; + for(i=0; icompletedJobsCount; i++) { + IOJob_t* job = (IOJob_t*) ctx->completedJobs[i]; + AIO_IOPool_releaseIoJob(job); + } + ctx->completedJobsCount = 0; +} + +static void AIO_ReadPool_addJobToCompleted(IOJob_t* job) { + ReadPoolCtx_t* const ctx = (ReadPoolCtx_t *)job->ctx; + if(ctx->base.threadPool) + ZSTD_pthread_mutex_lock(&ctx->base.ioJobsMutex); + assert(ctx->completedJobsCount < MAX_IO_JOBS); + ctx->completedJobs[ctx->completedJobsCount++] = job; + if(ctx->base.threadPool) { + ZSTD_pthread_cond_signal(&ctx->jobCompletedCond); + ZSTD_pthread_mutex_unlock(&ctx->base.ioJobsMutex); + } +} + +/* AIO_ReadPool_findNextWaitingOffsetCompletedJob_locked: + * Looks through the completed jobs for a job matching the waitingOnOffset and returns it, + * if job wasn't found returns NULL. + * IMPORTANT: assumes ioJobsMutex is locked. */ +static IOJob_t* AIO_ReadPool_findNextWaitingOffsetCompletedJob_locked(ReadPoolCtx_t* ctx) { + IOJob_t *job = NULL; + int i; + /* This implementation goes through all completed jobs and looks for the one matching the next offset. + * While not strictly needed for a single threaded reader implementation (as in such a case we could expect + * reads to be completed in order) this implementation was chosen as it better fits other asyncio + * interfaces (such as io_uring) that do not provide promises regarding order of completion. */ + for (i=0; icompletedJobsCount; i++) { + job = (IOJob_t *) ctx->completedJobs[i]; + if (job->offset == ctx->waitingOnOffset) { + ctx->completedJobs[i] = ctx->completedJobs[--ctx->completedJobsCount]; + return job; + } + } + return NULL; +} + +/* AIO_ReadPool_numReadsInFlight: + * Returns the number of IO read jobs currrently in flight. */ +static size_t AIO_ReadPool_numReadsInFlight(ReadPoolCtx_t* ctx) { + const size_t jobsHeld = (ctx->currentJobHeld==NULL ? 0 : 1); + return ctx->base.totalIoJobs - (ctx->base.availableJobsCount + ctx->completedJobsCount + jobsHeld); +} + +/* AIO_ReadPool_getNextCompletedJob: + * Returns a completed IOJob_t for the next read in line based on waitingOnOffset and advances waitingOnOffset. + * Would block. */ +static IOJob_t* AIO_ReadPool_getNextCompletedJob(ReadPoolCtx_t* ctx) { + IOJob_t *job = NULL; + if (ctx->base.threadPool) + ZSTD_pthread_mutex_lock(&ctx->base.ioJobsMutex); + + job = AIO_ReadPool_findNextWaitingOffsetCompletedJob_locked(ctx); + + /* As long as we didn't find the job matching the next read, and we have some reads in flight continue waiting */ + while (!job && (AIO_ReadPool_numReadsInFlight(ctx) > 0)) { + assert(ctx->base.threadPool != NULL); /* we shouldn't be here if we work in sync mode */ + ZSTD_pthread_cond_wait(&ctx->jobCompletedCond, &ctx->base.ioJobsMutex); + job = AIO_ReadPool_findNextWaitingOffsetCompletedJob_locked(ctx); + } + + if(job) { + assert(job->offset == ctx->waitingOnOffset); + ctx->waitingOnOffset += job->usedBufferSize; + } + + if (ctx->base.threadPool) + ZSTD_pthread_mutex_unlock(&ctx->base.ioJobsMutex); + return job; +} + + +/* AIO_ReadPool_executeReadJob: + * Executes a read job synchronously. Can be used as a function for a thread pool. */ +static void AIO_ReadPool_executeReadJob(void* opaque){ + IOJob_t* const job = (IOJob_t*) opaque; + ReadPoolCtx_t* const ctx = (ReadPoolCtx_t *)job->ctx; + if(ctx->reachedEof) { + job->usedBufferSize = 0; + AIO_ReadPool_addJobToCompleted(job); + return; + } + job->usedBufferSize = fread(job->buffer, 1, job->bufferSize, job->file); + if(job->usedBufferSize < job->bufferSize) { + if(ferror(job->file)) { + EXM_THROW(37, "Read error"); + } else if(feof(job->file)) { + ctx->reachedEof = 1; + } else { + EXM_THROW(37, "Unexpected short read"); + } + } + AIO_ReadPool_addJobToCompleted(job); +} + +static void AIO_ReadPool_enqueueRead(ReadPoolCtx_t* ctx) { + IOJob_t* const job = AIO_IOPool_acquireJob(&ctx->base); + job->offset = ctx->nextReadOffset; + ctx->nextReadOffset += job->bufferSize; + AIO_IOPool_enqueueJob(job); +} + +static void AIO_ReadPool_startReading(ReadPoolCtx_t* ctx) { + int i; + for (i = 0; i < ctx->base.availableJobsCount; i++) { + AIO_ReadPool_enqueueRead(ctx); + } +} + +/* AIO_ReadPool_setFile: + * Sets the source file for future read in the pool. Initiates reading immediately if file is not NULL. + * Waits for all current enqueued tasks to complete if a previous file was set. */ +void AIO_ReadPool_setFile(ReadPoolCtx_t* ctx, FILE* file) { + assert(ctx!=NULL); + AIO_IOPool_join(&ctx->base); + AIO_ReadPool_releaseAllCompletedJobs(ctx); + if (ctx->currentJobHeld) { + AIO_IOPool_releaseIoJob((IOJob_t *)ctx->currentJobHeld); + ctx->currentJobHeld = NULL; + } + AIO_IOPool_setFile(&ctx->base, file); + ctx->nextReadOffset = 0; + ctx->waitingOnOffset = 0; + ctx->srcBuffer = ctx->coalesceBuffer; + ctx->srcBufferLoaded = 0; + ctx->reachedEof = 0; + if(file != NULL) + AIO_ReadPool_startReading(ctx); +} + +/* AIO_ReadPool_create: + * Allocates and sets and a new readPool including its included jobs. + * bufferSize should be set to the maximal buffer we want to read at a time, will also be used + * as our basic read size. */ +ReadPoolCtx_t* AIO_ReadPool_create(const FIO_prefs_t* prefs, size_t bufferSize) { + ReadPoolCtx_t* const ctx = (ReadPoolCtx_t*) malloc(sizeof(ReadPoolCtx_t)); + if(!ctx) EXM_THROW(100, "Allocation error : not enough memory"); + AIO_IOPool_init(&ctx->base, prefs, AIO_ReadPool_executeReadJob, bufferSize); + + ctx->coalesceBuffer = (U8*) malloc(bufferSize * 2); + ctx->srcBuffer = ctx->coalesceBuffer; + ctx->srcBufferLoaded = 0; + ctx->completedJobsCount = 0; + ctx->currentJobHeld = NULL; + + if(ctx->base.threadPool) + if (ZSTD_pthread_cond_init(&ctx->jobCompletedCond, NULL)) + EXM_THROW(103,"Failed creating write jobCompletedCond mutex"); + + return ctx; +} + +/* AIO_ReadPool_free: + * Frees and releases a readPool and its resources. Closes source file. */ +void AIO_ReadPool_free(ReadPoolCtx_t* ctx) { + if(AIO_ReadPool_getFile(ctx)) + AIO_ReadPool_closeFile(ctx); + if(ctx->base.threadPool) + ZSTD_pthread_cond_destroy(&ctx->jobCompletedCond); + AIO_IOPool_destroy(&ctx->base); + free(ctx->coalesceBuffer); + free(ctx); +} + +/* AIO_ReadPool_consumeBytes: + * Consumes byes from srcBuffer's beginning and updates srcBufferLoaded accordingly. */ +void AIO_ReadPool_consumeBytes(ReadPoolCtx_t* ctx, size_t n) { + assert(n <= ctx->srcBufferLoaded); + ctx->srcBufferLoaded -= n; + ctx->srcBuffer += n; +} + +/* AIO_ReadPool_releaseCurrentlyHeldAndGetNext: + * Release the current held job and get the next one, returns NULL if no next job available. */ +static IOJob_t* AIO_ReadPool_releaseCurrentHeldAndGetNext(ReadPoolCtx_t* ctx) { + if (ctx->currentJobHeld) { + AIO_IOPool_releaseIoJob((IOJob_t *)ctx->currentJobHeld); + ctx->currentJobHeld = NULL; + AIO_ReadPool_enqueueRead(ctx); + } + ctx->currentJobHeld = AIO_ReadPool_getNextCompletedJob(ctx); + return (IOJob_t*) ctx->currentJobHeld; +} + +/* AIO_ReadPool_fillBuffer: + * Tries to fill the buffer with at least n or jobBufferSize bytes (whichever is smaller). + * Returns if srcBuffer has at least the expected number of bytes loaded or if we've reached the end of the file. + * Return value is the number of bytes added to the buffer. + * Note that srcBuffer might have up to 2 times jobBufferSize bytes. */ +size_t AIO_ReadPool_fillBuffer(ReadPoolCtx_t* ctx, size_t n) { + IOJob_t *job; + int useCoalesce = 0; + if(n > ctx->base.jobBufferSize) + n = ctx->base.jobBufferSize; + + /* We are good, don't read anything */ + if (ctx->srcBufferLoaded >= n) + return 0; + + /* We still have bytes loaded, but not enough to satisfy caller. We need to get the next job + * and coalesce the remaining bytes with the next job's buffer */ + if (ctx->srcBufferLoaded > 0) { + useCoalesce = 1; + memcpy(ctx->coalesceBuffer, ctx->srcBuffer, ctx->srcBufferLoaded); + ctx->srcBuffer = ctx->coalesceBuffer; + } + + /* Read the next chunk */ + job = AIO_ReadPool_releaseCurrentHeldAndGetNext(ctx); + if(!job) + return 0; + if(useCoalesce) { + assert(ctx->srcBufferLoaded + job->usedBufferSize <= 2*ctx->base.jobBufferSize); + memcpy(ctx->coalesceBuffer + ctx->srcBufferLoaded, job->buffer, job->usedBufferSize); + ctx->srcBufferLoaded += job->usedBufferSize; + } + else { + ctx->srcBuffer = (U8 *) job->buffer; + ctx->srcBufferLoaded = job->usedBufferSize; + } + return job->usedBufferSize; +} + +/* AIO_ReadPool_consumeAndRefill: + * Consumes the current buffer and refills it with bufferSize bytes. */ +size_t AIO_ReadPool_consumeAndRefill(ReadPoolCtx_t* ctx) { + AIO_ReadPool_consumeBytes(ctx, ctx->srcBufferLoaded); + return AIO_ReadPool_fillBuffer(ctx, ctx->base.jobBufferSize); +} + +/* AIO_ReadPool_getFile: + * Returns the current file set for the read pool. */ +FILE* AIO_ReadPool_getFile(const ReadPoolCtx_t* ctx) { + return AIO_IOPool_getFile(&ctx->base); +} + +/* AIO_ReadPool_closeFile: + * Closes the current set file. Waits for all current enqueued tasks to complete and resets state. */ +int AIO_ReadPool_closeFile(ReadPoolCtx_t* ctx) { + FILE* const file = AIO_ReadPool_getFile(ctx); + AIO_ReadPool_setFile(ctx, NULL); + return fclose(file); +} diff --git a/programs/fileio_asyncio.h b/programs/fileio_asyncio.h index 3e91164c..bf07f859 100644 --- a/programs/fileio_asyncio.h +++ b/programs/fileio_asyncio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -28,7 +28,7 @@ typedef struct { /* These struct fields should be set only on creation and not changed afterwards */ POOL_ctx* threadPool; int totalIoJobs; - FIO_prefs_t* prefs; + const FIO_prefs_t* prefs; POOL_function poolFunction; /* Controls the file we currently write to, make changes only by using provided utility functions */ @@ -39,8 +39,36 @@ typedef struct { ZSTD_pthread_mutex_t ioJobsMutex; void* availableJobs[MAX_IO_JOBS]; int availableJobsCount; + size_t jobBufferSize; } IOPoolCtx_t; +typedef struct { + IOPoolCtx_t base; + + /* State regarding the currently read file */ + int reachedEof; + U64 nextReadOffset; + U64 waitingOnOffset; + + /* We may hold an IOJob object as needed if we actively expose its buffer. */ + void *currentJobHeld; + + /* Coalesce buffer is used to join two buffers in case where we need to read more bytes than left in + * the first of them. Shouldn't be accessed from outside ot utility functions. */ + U8 *coalesceBuffer; + + /* Read buffer can be used by consumer code, take care when copying this pointer aside as it might + * change when consuming / refilling buffer. */ + U8 *srcBuffer; + size_t srcBufferLoaded; + + /* We need to know what tasks completed so we can use their buffers when their time comes. + * Should only be accessed after locking base.ioJobsMutex . */ + void* completedJobs[MAX_IO_JOBS]; + int completedJobsCount; + ZSTD_pthread_cond_t jobCompletedCond; +} ReadPoolCtx_t; + typedef struct { IOPoolCtx_t base; unsigned storedSkips; @@ -59,15 +87,10 @@ typedef struct { U64 offset; } IOJob_t; -/** AIO_fwriteSparse() : -* @return : storedSkips, -* argument for next call to AIO_fwriteSparse() or AIO_fwriteSparseEnd() */ -unsigned AIO_fwriteSparse(FILE* file, - const void* buffer, size_t bufferSize, - const FIO_prefs_t* const prefs, - unsigned storedSkips); +/* AIO_supported: + * Returns 1 if AsyncIO is supported on the system, 0 otherwise. */ +int AIO_supported(void); -void AIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips); /* AIO_WritePool_releaseIoJob: * Releases an acquired job back to the pool. Doesn't execute the job. */ @@ -97,7 +120,7 @@ void AIO_WritePool_setFile(WritePoolCtx_t *ctx, FILE* file); /* AIO_WritePool_getFile: * Returns the file the writePool is currently set to write to. */ -FILE* AIO_WritePool_getFile(WritePoolCtx_t *ctx); +FILE* AIO_WritePool_getFile(const WritePoolCtx_t* ctx); /* AIO_WritePool_closeFile: * Ends sparse write and closes the writePool's current file and sets the file to NULL. @@ -107,12 +130,50 @@ int AIO_WritePool_closeFile(WritePoolCtx_t *ctx); /* AIO_WritePool_create: * Allocates and sets and a new write pool including its included jobs. * bufferSize should be set to the maximal buffer we want to write to at a time. */ -WritePoolCtx_t* AIO_WritePool_create(FIO_prefs_t* const prefs, size_t bufferSize); +WritePoolCtx_t* AIO_WritePool_create(const FIO_prefs_t* prefs, size_t bufferSize); /* AIO_WritePool_free: * Frees and releases a writePool and its resources. Closes destination file. */ void AIO_WritePool_free(WritePoolCtx_t* ctx); +/* AIO_ReadPool_create: + * Allocates and sets and a new readPool including its included jobs. + * bufferSize should be set to the maximal buffer we want to read at a time, will also be used + * as our basic read size. */ +ReadPoolCtx_t* AIO_ReadPool_create(const FIO_prefs_t* prefs, size_t bufferSize); + +/* AIO_ReadPool_free: + * Frees and releases a readPool and its resources. Closes source file. */ +void AIO_ReadPool_free(ReadPoolCtx_t* ctx); + +/* AIO_ReadPool_consumeBytes: + * Consumes byes from srcBuffer's beginning and updates srcBufferLoaded accordingly. */ +void AIO_ReadPool_consumeBytes(ReadPoolCtx_t *ctx, size_t n); + +/* AIO_ReadPool_fillBuffer: + * Makes sure buffer has at least n bytes loaded (as long as n is not bigger than the initalized bufferSize). + * Returns if srcBuffer has at least n bytes loaded or if we've reached the end of the file. + * Return value is the number of bytes added to the buffer. + * Note that srcBuffer might have up to 2 times bufferSize bytes. */ +size_t AIO_ReadPool_fillBuffer(ReadPoolCtx_t *ctx, size_t n); + +/* AIO_ReadPool_consumeAndRefill: + * Consumes the current buffer and refills it with bufferSize bytes. */ +size_t AIO_ReadPool_consumeAndRefill(ReadPoolCtx_t *ctx); + +/* AIO_ReadPool_setFile: + * Sets the source file for future read in the pool. Initiates reading immediately if file is not NULL. + * Waits for all current enqueued tasks to complete if a previous file was set. */ +void AIO_ReadPool_setFile(ReadPoolCtx_t *ctx, FILE* file); + +/* AIO_ReadPool_getFile: + * Returns the current file set for the read pool. */ +FILE* AIO_ReadPool_getFile(const ReadPoolCtx_t *ctx); + +/* AIO_ReadPool_closeFile: + * Closes the current set file. Waits for all current enqueued tasks to complete and resets state. */ +int AIO_ReadPool_closeFile(ReadPoolCtx_t *ctx); + #if defined (__cplusplus) } #endif diff --git a/programs/fileio_common.h b/programs/fileio_common.h index d33c19d7..282c2f13 100644 --- a/programs/fileio_common.h +++ b/programs/fileio_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/programs/fileio_types.h b/programs/fileio_types.h index 1909ab1a..cf566aa2 100644 --- a/programs/fileio_types.h +++ b/programs/fileio_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -70,4 +70,4 @@ typedef struct FIO_prefs_s { int allowBlockDevices; } FIO_prefs_t; -#endif /* FILEIO_TYPES_HEADER */ \ No newline at end of file +#endif /* FILEIO_TYPES_HEADER */ diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 2e3f4ddb..29da261d 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -46,6 +46,7 @@ # include "zstdcli_trace.h" #endif #include "../lib/zstd.h" /* ZSTD_VERSION_STRING, ZSTD_minCLevel, ZSTD_maxCLevel */ +#include "fileio_asyncio.h" /*-************************************ @@ -179,7 +180,8 @@ static void usage_advanced(const char* programName) #ifdef UTIL_HAS_MIRRORFILELIST DISPLAYOUT( "--output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure \n"); #endif - + if (AIO_supported()) + DISPLAYOUT( "--[no-]asyncio : use asynchronous IO (default: enabled) \n"); #ifndef ZSTD_NOCOMPRESS DISPLAYOUT( "--[no-]check : during compression, add XXH64 integrity checksum to frame (default: enabled)"); @@ -242,9 +244,6 @@ static void usage_advanced(const char* programName) DISPLAYOUT( " -l : print information about zstd compressed files \n"); DISPLAYOUT( "--test : test compressed file integrity \n"); DISPLAYOUT( " -M# : Set a memory usage limit for decompression \n"); -#ifdef ZSTD_MULTITHREAD - DISPLAYOUT( "--[no-]asyncio : use threaded asynchronous IO for output (default: disabled) \n"); -#endif # if ZSTD_SPARSE_DEFAULT DISPLAYOUT( "--[no-]sparse : sparse mode (default: enabled on file, disabled on stdout) \n"); # else @@ -1459,6 +1458,7 @@ int main(int argCount, const char* argv[]) FIO_setTargetCBlockSize(prefs, targetCBlockSize); FIO_setSrcSizeHint(prefs, srcSizeHint); FIO_setLiteralCompressionMode(prefs, literalCompressionMode); + FIO_setSparseWrite(prefs, 0); if (adaptMin > cLevel) cLevel = adaptMin; if (adaptMax < cLevel) cLevel = adaptMax; diff --git a/tests/playTests.sh b/tests/playTests.sh index f04f1da7..f97e96e3 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -260,10 +260,13 @@ zstd -dc - < tmp.zst > $INTOVOID zstd -d < tmp.zst > $INTOVOID # implicit stdout when stdin is used zstd -d - < tmp.zst > $INTOVOID println "test : impose memory limitation (must fail)" -zstd -d -f tmp.zst -M2K -c > $INTOVOID && die "decompression needs more memory than allowed" -zstd -d -f tmp.zst --memlimit=2K -c > $INTOVOID && die "decompression needs more memory than allowed" # long command -zstd -d -f tmp.zst --memory=2K -c > $INTOVOID && die "decompression needs more memory than allowed" # long command -zstd -d -f tmp.zst --memlimit-decompress=2K -c > $INTOVOID && die "decompression needs more memory than allowed" # long command +datagen -g500K > tmplimit +zstd -f tmplimit +zstd -d -f tmplimit.zst -M2K -c > $INTOVOID && die "decompression needs more memory than allowed" +zstd -d -f tmplimit.zst --memlimit=2K -c > $INTOVOID && die "decompression needs more memory than allowed" # long command +zstd -d -f tmplimit.zst --memory=2K -c > $INTOVOID && die "decompression needs more memory than allowed" # long command +zstd -d -f tmplimit.zst --memlimit-decompress=2K -c > $INTOVOID && die "decompression needs more memory than allowed" # long command +rm -f tmplimit tmplimit.zst println "test : overwrite protection" zstd -q tmp && die "overwrite check failed!" println "test : force overwrite" @@ -1596,11 +1599,11 @@ elif [ "$longCSize19wlog23" -gt "$optCSize19wlog23" ]; then exit 1 fi -println "\n===> zstd asyncio decompression tests " +println "\n===> zstd asyncio tests " addFrame() { datagen -g2M -s$2 >> tmp_uncompressed - datagen -g2M -s$2 | zstd --format=$1 >> tmp_compressed.zst + datagen -g2M -s$2 | zstd -1 --format=$1 >> tmp_compressed.zst } addTwoFrames() { From 4b24ebdcf33cc5c2819272278e2d742b9d0f72fe Mon Sep 17 00:00:00 2001 From: Yonatan Komornik <11005061+yoniko@users.noreply.github.com> Date: Mon, 31 Jan 2022 16:49:49 -0800 Subject: [PATCH 8/8] Travis CI: fix by installing pip compatible with python 3.6 (#3041) Pip install script no longer supports python3.6 by default, switched to a script that does. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 6a1295b4..c49f3ee3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -102,7 +102,7 @@ matrix: travis_retry curl -o ~/ninja.zip -L 'https://github.com/ninja-build/ninja/releases/download/v1.9.0/ninja-linux.zip' && unzip ~/ninja.zip -d ~/.local/bin - | - travis_retry curl -o ~/get-pip.py -L 'https://bootstrap.pypa.io/get-pip.py' && + travis_retry curl -o ~/get-pip.py -L 'https://bootstrap.pypa.io/pip/3.6/get-pip.py' && python3 ~/get-pip.py --user && pip3 install --user meson script: