From 246982e782849d8646b2d5df6648319935669228 Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Thu, 20 Jan 2022 22:41:47 -0800
Subject: [PATCH 1/8] [dibio] Fix assertion triggered by no inputs

Passing 0 inputs to `DiB_shuffle()` caused an assertion failure where
it should just return.

A test is added in a later commit, with the initial introduction of the
new testing framework.

Fixes #3007.
---
 programs/dibio.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/programs/dibio.c b/programs/dibio.c
index d19f9544..147d1e7b 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -27,9 +27,9 @@
 #include <string.h>         /* memset */
 #include <stdio.h>          /* fprintf, fopen, ftello64 */
 #include <errno.h>          /* errno */
-#include <assert.h>
 
 #include "timefn.h"         /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */
+#include "../lib/common/debug.h" /* assert */
 #include "../lib/common/mem.h"  /* read */
 #include "dibio.h"
 
@@ -193,7 +193,8 @@ static U32 DiB_rand(U32* src)
 static void DiB_shuffle(const char** fileNamesTable, unsigned nbFiles) {
     U32 seed = 0xFD2FB528;
     unsigned i;
-    assert(nbFiles >= 1);
+    if (nbFiles == 0)
+        return;
     for (i = nbFiles - 1; i > 0; --i) {
         unsigned const j = DiB_rand(&seed) % (i + 1);
         const char* const tmp = fileNamesTable[j];

From 495dcb839ab6ab40c4156b99be16b010389f2214 Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Thu, 20 Jan 2022 22:45:03 -0800
Subject: [PATCH 2/8] [zstdcli] Fix option detection for --auto-threads

The option `--auto-threads` should still be accepted and parsed, even if
`ZSTD_MULTITHREAD` is not defined. It doesn't mean anything, but we
should still accept the option. Since we want scripts to be able to work
generically.

This bug was caught by tests I added to the new testing framework.
---
 programs/zstdcli.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index fd563e1c..53b47f87 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -802,9 +802,7 @@ int main(int argCount, const char* argv[])
         separateFiles = 0,
         setRealTimePrio = 0,
         singleThread = 0,
-#ifdef ZSTD_MULTITHREAD
         defaultLogicalCores = 0,
-#endif
         showDefaultCParams = 0,
         ultra=0,
         contentSize=1;
@@ -996,7 +994,6 @@ int main(int argCount, const char* argv[])
                 if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readSizeTFromChar(&argument); continue; }
                 if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readSizeTFromChar(&argument); continue; }
                 if (longCommandWArg(&argument, "--output-dir-flat")) { NEXT_FIELD(outDirName); continue; }
-#ifdef ZSTD_MULTITHREAD
                 if (longCommandWArg(&argument, "--auto-threads")) {
                     const char* threadDefault = NULL;
                     NEXT_FIELD(threadDefault);
@@ -1004,7 +1001,6 @@ int main(int argCount, const char* argv[])
                         defaultLogicalCores = 1;
                     continue;
                 }
-#endif
 #ifdef UTIL_HAS_MIRRORFILELIST
                 if (longCommandWArg(&argument, "--output-dir-mirror")) { NEXT_FIELD(outMirroredDirName); continue; }
 #endif
@@ -1220,7 +1216,7 @@ int main(int argCount, const char* argv[])
         }
     }
 #else
-    (void)singleThread; (void)nbWorkers;
+    (void)singleThread; (void)nbWorkers; (void)defaultLogicalCores;
 #endif
 
     g_utilDisplayLevel = g_displayLevel;

From f088c430e35d8b97d11aa38c5c78a72931ec7bad Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Thu, 20 Jan 2022 22:46:45 -0800
Subject: [PATCH 3/8] [datagen] Remove extra newline printed

`datagen` was printing a `\n` even when it had no other output. Raise
the output level for the final `\n` to the minimum output level used.

This minor bug was caught by the new testing framework.
---
 tests/datagencli.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/datagencli.c b/tests/datagencli.c
index ecc05f95..7300fdb7 100644
--- a/tests/datagencli.c
+++ b/tests/datagencli.c
@@ -124,7 +124,7 @@ int main(int argc, const char** argv)
     DISPLAYLEVEL(3, "Seed = %u \n", (unsigned)seed);
 
     RDG_genStdout(size, (double)probaU32/100, litProba, seed);
-    DISPLAYLEVEL(1, "\n");
+    DISPLAYLEVEL(3, "\n");
 
     return 0;
 }

From f3096ff6d1fcf87eeec876da13c06a97343ed6cf Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Tue, 18 Jan 2022 13:31:27 -0800
Subject: [PATCH 4/8] [test] Add new CLI testing platform

Adds the new CLI testing platform that I'm proposing.
See the added `README.md` for details.
---
 tests/cli-tests/.gitignore                    |   4 +
 tests/cli-tests/README.md                     | 248 +++++++
 tests/cli-tests/basic/help.sh                 |   7 +
 tests/cli-tests/basic/help.sh.stdout.glob     |  34 +
 tests/cli-tests/basic/version.sh              |   3 +
 tests/cli-tests/basic/version.sh.stdout.glob  |   2 +
 tests/cli-tests/bin/cmp_size                  |  46 ++
 tests/cli-tests/bin/datagen                   |   3 +
 tests/cli-tests/bin/die                       |   4 +
 tests/cli-tests/bin/println                   |   2 +
 tests/cli-tests/bin/unzstd                    |   1 +
 tests/cli-tests/bin/zstd                      |   7 +
 tests/cli-tests/bin/zstdcat                   |   1 +
 tests/cli-tests/bin/zstdgrep                  |   2 +
 tests/cli-tests/common/format.sh              |  19 +
 tests/cli-tests/common/mtime.sh               |  13 +
 tests/cli-tests/common/permissions.sh         |  18 +
 tests/cli-tests/common/platform.sh            |  37 +
 tests/cli-tests/compression/adapt.sh          |   6 +
 tests/cli-tests/compression/basic.sh          |  28 +
 .../compression/compress-literals.sh          |  10 +
 tests/cli-tests/compression/format.sh         |  16 +
 tests/cli-tests/compression/levels.sh         |  64 ++
 .../compression/levels.sh.stderr.exact        |  75 ++
 .../compression/long-distance-matcher.sh      |   7 +
 tests/cli-tests/compression/multi-threaded.sh |  11 +
 .../cli-tests/compression/row-match-finder.sh |   7 +
 tests/cli-tests/compression/setup             |   7 +
 tests/cli-tests/compression/stream-size.sh    |   7 +
 tests/cli-tests/dict-builder/no-inputs        |   3 +
 tests/cli-tests/dict-builder/no-inputs.exit   |   1 +
 .../dict-builder/no-inputs.stderr.exact       |   5 +
 .../dictionaries/dictionary-mismatch.sh       |  29 +
 .../dictionary-mismatch.sh.stderr.exact       |   8 +
 tests/cli-tests/dictionaries/setup            |   6 +
 tests/cli-tests/dictionaries/setup_once       |  24 +
 tests/cli-tests/run.py                        | 687 ++++++++++++++++++
 37 files changed, 1452 insertions(+)
 create mode 100644 tests/cli-tests/.gitignore
 create mode 100644 tests/cli-tests/README.md
 create mode 100755 tests/cli-tests/basic/help.sh
 create mode 100644 tests/cli-tests/basic/help.sh.stdout.glob
 create mode 100755 tests/cli-tests/basic/version.sh
 create mode 100644 tests/cli-tests/basic/version.sh.stdout.glob
 create mode 100755 tests/cli-tests/bin/cmp_size
 create mode 100755 tests/cli-tests/bin/datagen
 create mode 100755 tests/cli-tests/bin/die
 create mode 100755 tests/cli-tests/bin/println
 create mode 120000 tests/cli-tests/bin/unzstd
 create mode 100755 tests/cli-tests/bin/zstd
 create mode 120000 tests/cli-tests/bin/zstdcat
 create mode 100755 tests/cli-tests/bin/zstdgrep
 create mode 100644 tests/cli-tests/common/format.sh
 create mode 100644 tests/cli-tests/common/mtime.sh
 create mode 100644 tests/cli-tests/common/permissions.sh
 create mode 100644 tests/cli-tests/common/platform.sh
 create mode 100755 tests/cli-tests/compression/adapt.sh
 create mode 100755 tests/cli-tests/compression/basic.sh
 create mode 100755 tests/cli-tests/compression/compress-literals.sh
 create mode 100755 tests/cli-tests/compression/format.sh
 create mode 100755 tests/cli-tests/compression/levels.sh
 create mode 100644 tests/cli-tests/compression/levels.sh.stderr.exact
 create mode 100755 tests/cli-tests/compression/long-distance-matcher.sh
 create mode 100755 tests/cli-tests/compression/multi-threaded.sh
 create mode 100755 tests/cli-tests/compression/row-match-finder.sh
 create mode 100755 tests/cli-tests/compression/setup
 create mode 100755 tests/cli-tests/compression/stream-size.sh
 create mode 100755 tests/cli-tests/dict-builder/no-inputs
 create mode 100644 tests/cli-tests/dict-builder/no-inputs.exit
 create mode 100644 tests/cli-tests/dict-builder/no-inputs.stderr.exact
 create mode 100755 tests/cli-tests/dictionaries/dictionary-mismatch.sh
 create mode 100644 tests/cli-tests/dictionaries/dictionary-mismatch.sh.stderr.exact
 create mode 100755 tests/cli-tests/dictionaries/setup
 create mode 100755 tests/cli-tests/dictionaries/setup_once
 create mode 100755 tests/cli-tests/run.py

diff --git a/tests/cli-tests/.gitignore b/tests/cli-tests/.gitignore
new file mode 100644
index 00000000..4bb425b6
--- /dev/null
+++ b/tests/cli-tests/.gitignore
@@ -0,0 +1,4 @@
+scratch/
+!bin/
+!datagen
+!zstdcat
diff --git a/tests/cli-tests/README.md b/tests/cli-tests/README.md
new file mode 100644
index 00000000..3098f466
--- /dev/null
+++ b/tests/cli-tests/README.md
@@ -0,0 +1,248 @@
+# CLI tests
+
+The CLI tests are focused on testing the zstd CLI.
+They are intended to be simple tests that the CLI and arguments work as advertised.
+They are not intended to test the library, only the code in `programs/`.
+The library will get incidental coverage, but if you find yourself trying to trigger a specific condition in the library, this is the wrong tool.
+
+## Test runner usage
+
+The test runner `run.py` will run tests against the in-tree build of `zstd` and `datagen` by default. Which means that `zstd` and `datagen` must be built.
+
+The `zstd` binary used can be passed with `--zstd /path/to/zstd`.
+Additionally, to run `zstd` through a tool like `valgrind` or `qemu`, set the `--exec-prefix 'valgrind -q'` flag.
+
+Similarly, the `--datagen`, and `--zstdgrep` flags can be set to specify
+the paths to their respective binaries. However, these tools do not use
+the `EXEC_PREFIX`.
+
+Each test executes in its own scratch directory under `scratch/test/name`. E.g. `scratch/basic/help.sh/`. Normally these directories are removed after the test executes. However, the `--preserve` flag will preserve these directories after execution, and save the tests exit code, stdout, and stderr in the scratch directory to `exit`, `stderr`, and `stdout` respectively. This can be useful for debugging/editing a test and updating the expected output.
+
+### Running all the tests
+
+By default the test runner `run.py` will run all the tests, and report the results.
+
+Examples:
+
+```
+./run.py
+./run.py --preserve
+./run.py --zstd ../../build/programs/zstd --datagen ../../build/tests/datagen
+```
+
+### Running specific tests
+
+A set of test names can be passed to the test runner `run.py` to only execute those tests.
+This can be useful for writing or debugging a test, especially with `--preserve`.
+
+The test name can either be the path to the test file, or the test name, which is the path relative to the test directory.
+
+Examples:
+
+```
+./run.py basic/help.sh
+./run.py --preserve basic/help.sh basic/version.sh
+./run.py --preserve --verbose basic/help.sh
+```
+
+## Writing a test
+
+Test cases are arbitrary executables, and can be written in any language, but are generally shell scripts.
+After the script executes, the exit code, stderr, and stdout are compared against the expectations.
+
+Each test is run in a clean directory that the test can use for intermediate files. This directory will be cleaned up at the end of the test, unless `--preserve` is passed to the test runner. Additionally, the `setup` script can prepare the directory before the test runs.
+
+### Calling zstd, utilities, and environment variables
+
+The `$PATH` for tests is prepended with the `bin/` sub-directory, which contains helper scripts for ease of testing.
+The `zstd` binary will call the zstd binary specified by `run.py` with the correct `$EXEC_PREFIX`.
+Similarly, `datagen`, `unzstd`, `zstdgrep`, `zstdcat`, etc, are provided.
+
+Helper utilities like `cmp_size`, `println`, and `die` are provided here too. See their scripts for details.
+
+Common shell script libraries are provided under `common/`, with helper variables and functions. They can be sourced with `source "$COMMON/library.sh`.
+
+Lastly, environment variables are provided for testing, which can be listed when calling `run.py` with `--verbose`.
+They are generally used by the helper scripts in `bin/` to coordinate everything.
+
+### Basic test case
+
+When executing your `$TEST` executable, by default the exit code is expected to be `0`. However, you can provide an alterate expected exit code in a `$TEST.exit` file.
+
+When executing your `$TEST` exectuable, by default the expected stderr and stdout are empty. However, you can override the default by providing one of three files:
+
+* `$TEST.{stdout,stderr}.exact`
+* `$TEST.{stdout,stderr}.glob`
+* `$TEST.{stdout,stderr}.ignore`
+
+If you provide a `.exact` file, the output is expected to exactly match, byte-for-byte.
+
+If you provide a `.glob` file, the output is expected to match the expected file, where each line is interpreted as a glob syntax. Additionally, a line containing only `...` matches all lines until the next expected line matches.
+
+If you provide a `.ignore` file, the output is ignored.
+
+#### Passing examples
+
+All these examples pass.
+
+Exit 1, and change the expectation to be 1.
+
+```
+exit-1.sh
+---
+#!/bin/sh
+exit 1
+---
+
+exit-1.sh.exit
+---
+1
+---
+```
+
+Check the stdout output exactly matches.
+
+```
+echo.sh
+---
+#!/bin/sh
+echo "hello world"
+---
+
+echo.sh.stdout.exact
+---
+hello world
+---
+```
+
+Check the stderr output using a glob.
+
+```
+random.sh
+---
+#!/bin/sh
+head -c 10 < /dev/urandom | xxd >&2
+---
+
+random.sh.stderr.glob
+---
+00000000: * * * * *                 *
+```
+
+Multiple lines can be matched with ...
+
+```
+random-num-lines.sh
+---
+#!/bin/sh
+echo hello
+seq 0 $RANDOM
+echo world
+---
+
+random-num-lines.sh.stdout.glob
+---
+hello
+0
+...
+world
+---
+```
+
+#### Failing examples
+
+Exit code is expected to be 0, but is 1.
+
+```
+exit-1.sh
+---
+#!/bin/sh
+exit 1
+---
+```
+
+Stdout is expected to be empty, but isn't.
+
+```
+echo.sh
+---
+#!/bin/sh
+echo hello world
+```
+
+Stderr is expected to be hello but is world.
+
+```
+hello.sh
+---
+#!/bin/sh
+echo world >&2
+---
+
+hello.sh.stderr.exact
+---
+hello
+---
+```
+
+### Setup & teardown scripts
+
+Finally, test writing can be eased with setup and teardown scripts.
+Each directory in the test directory is a test-suite consisting of all tests within that directory (but not sub-directories).
+This test suite can come with 4 scripts to help test writing:
+
+* `setup_once`
+* `teardown_once`
+* `setup`
+* `teardown`
+
+The `setup_once` and `teardown_once` are run once before and after all the tests in the suite respectively.
+They operate in the scratch directory for the test suite, which is the parent directory of each scratch directory for each test case.
+They can do work that is shared between tests to improve test efficiency.
+For example, the `dictionaries/setup_once` script builds several dictionaries, for use in the `dictionaries` tests.
+
+The `setup` and `teardown` scripts run before and after each test case respectively, in the test case's scratch directory.
+These scripts can do work that is shared between test cases to make tests more succinct.
+For example, the `dictionaries/setup` script copies the dictionaries built by the `dictionaries/setup_once` script into the test's scratch directory, to make them easier to use, and make sure they aren't accidentally modified.
+
+#### Examples
+
+```
+basic/setup
+---
+#!/bin/sh
+# Create some files for testing with
+datagen > file
+datagen > file0
+datagen > file1
+---
+
+basic/test.sh
+---
+#!/bin/sh
+zstd file file0 file1
+---
+
+dictionaries/setup_once
+---
+#!/bin/sh
+set -e
+
+mkdir files/ dicts/
+for i in $(seq 10); do
+	datagen -g1000 > files/$i
+done
+
+zstd --train -r files/ -o dicts/0
+---
+
+dictionaries/setup
+---
+#!/bin/sh
+
+# Runs in the test case's scratch directory.
+# The test suite's scratch directory that
+# `setup_once` operates in is the parent directory.
+cp -r ../files ../dicts .
+---
+```
diff --git a/tests/cli-tests/basic/help.sh b/tests/cli-tests/basic/help.sh
new file mode 100755
index 00000000..c683b6a6
--- /dev/null
+++ b/tests/cli-tests/basic/help.sh
@@ -0,0 +1,7 @@
+#!/bin/sh -e
+println "+ zstd -h"
+zstd -h
+println "+ zstd -H"
+zstd -H
+println "+ zstd --help"
+zstd --help
diff --git a/tests/cli-tests/basic/help.sh.stdout.glob b/tests/cli-tests/basic/help.sh.stdout.glob
new file mode 100644
index 00000000..5b2f8e45
--- /dev/null
+++ b/tests/cli-tests/basic/help.sh.stdout.glob
@@ -0,0 +1,34 @@
++ zstd -h
+*** zstd command line interface *-bits v1.*.*, by Yann Collet ***
+Usage :
+      zstd *args* *FILE(s)* *-o file*
+
+FILE    : a filename
+          with no FILE, or when FILE is - , read standard input
+Arguments :
+ -#     : # compression level*
+ -d     : decompression
+ -D DICT: use DICT as Dictionary for compression or decompression
+ -o file: result stored into `file` (only 1 output file)
+ -f     : disable input and output checks. Allows overwriting existing files,
+          input from console, output to stdout, operating on links,
+          block devices, etc.
+--rm    : remove source file(s) after successful de/compression
+ -k     : preserve source file(s) (default)
+ -h/-H  : display help/long help and exit
+
+Advanced arguments :
+ -V     : display Version number and exit
+...
++ zstd -H
+...
+Arguments :
+...
+Advanced arguments :
+...
++ zstd --help
+...
+Arguments :
+...
+Advanced arguments :
+...
diff --git a/tests/cli-tests/basic/version.sh b/tests/cli-tests/basic/version.sh
new file mode 100755
index 00000000..d50de0f3
--- /dev/null
+++ b/tests/cli-tests/basic/version.sh
@@ -0,0 +1,3 @@
+#!/bin/sh -e
+zstd -V
+zstd --version
diff --git a/tests/cli-tests/basic/version.sh.stdout.glob b/tests/cli-tests/basic/version.sh.stdout.glob
new file mode 100644
index 00000000..54968fa4
--- /dev/null
+++ b/tests/cli-tests/basic/version.sh.stdout.glob
@@ -0,0 +1,2 @@
+*** zstd command line interface *-bits v1.*.*, by Yann Collet ***
+*** zstd command line interface *-bits v1.*.*, by Yann Collet ***
diff --git a/tests/cli-tests/bin/cmp_size b/tests/cli-tests/bin/cmp_size
new file mode 100755
index 00000000..5afa1c59
--- /dev/null
+++ b/tests/cli-tests/bin/cmp_size
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+# Small utility to
+
+set -e
+
+usage()
+{
+	printf "USAGE:\n\t$0 [-eq|-ne|-lt|-le|-gt|-ge] FILE1 FILE2\n"
+}
+
+help()
+{
+	printf "Small utility to compare file sizes without printing them with set -x.\n\n"
+	usage
+}
+
+case "$1" in
+	-h) help; exit 0  ;;
+	--help) help; exit 0 ;;
+esac
+
+if ! test -f $2; then
+	printf "FILE1='%b' is not a file\n\n" "$2"
+	usage
+	exit 1
+fi
+
+if ! test -f $3; then
+	printf "FILE2='%b' is not a file\n\n" "$3"
+	usage
+	exit 1
+fi
+
+
+size1=$(wc -c < $2)
+size2=$(wc -c < $3)
+
+case "$1" in
+	-eq) [ "$size1" -eq "$size2" ] ;;
+	-ne) [ "$size1" -ne "$size2" ] ;;
+	-lt) [ "$size1" -lt "$size2" ] ;;
+	-le) [ "$size1" -le "$size2" ] ;;
+	-gt) [ "$size1" -gt "$size2" ] ;;
+	-ge) [ "$size1" -ge "$size2" ] ;;
+esac
diff --git a/tests/cli-tests/bin/datagen b/tests/cli-tests/bin/datagen
new file mode 100755
index 00000000..8c60cbcc
--- /dev/null
+++ b/tests/cli-tests/bin/datagen
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+"$DATAGEN_BIN" $@
diff --git a/tests/cli-tests/bin/die b/tests/cli-tests/bin/die
new file mode 100755
index 00000000..8633bc97
--- /dev/null
+++ b/tests/cli-tests/bin/die
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+println "${*}" 1>&2
+exit 1
diff --git a/tests/cli-tests/bin/println b/tests/cli-tests/bin/println
new file mode 100755
index 00000000..1da24604
--- /dev/null
+++ b/tests/cli-tests/bin/println
@@ -0,0 +1,2 @@
+#!/bin/env sh
+printf '%b\n' "${*}"
diff --git a/tests/cli-tests/bin/unzstd b/tests/cli-tests/bin/unzstd
new file mode 120000
index 00000000..613f917f
--- /dev/null
+++ b/tests/cli-tests/bin/unzstd
@@ -0,0 +1 @@
+zstd
\ No newline at end of file
diff --git a/tests/cli-tests/bin/zstd b/tests/cli-tests/bin/zstd
new file mode 100755
index 00000000..198fc6d2
--- /dev/null
+++ b/tests/cli-tests/bin/zstd
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+if [ -z "$EXEC_PREFIX" ]; then
+    "$ZSTD_BIN" $@
+else
+    $EXEC_PREFIX "$ZSTD_BIN" $@
+fi
diff --git a/tests/cli-tests/bin/zstdcat b/tests/cli-tests/bin/zstdcat
new file mode 120000
index 00000000..613f917f
--- /dev/null
+++ b/tests/cli-tests/bin/zstdcat
@@ -0,0 +1 @@
+zstd
\ No newline at end of file
diff --git a/tests/cli-tests/bin/zstdgrep b/tests/cli-tests/bin/zstdgrep
new file mode 100755
index 00000000..8821ebb5
--- /dev/null
+++ b/tests/cli-tests/bin/zstdgrep
@@ -0,0 +1,2 @@
+#!/bin/sh
+"$ZSTDGREP_BIN" $@
diff --git a/tests/cli-tests/common/format.sh b/tests/cli-tests/common/format.sh
new file mode 100644
index 00000000..20ff0f05
--- /dev/null
+++ b/tests/cli-tests/common/format.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+source "$COMMON/platform.sh"
+
+zstd_supports_format()
+{
+	zstd -h | grep > $INTOVOID -- "--format=$1"
+}
+
+format_extension()
+{
+	if [ "$1" = "zstd" ]; then
+		printf "zst"
+	elif [ "$1" = "gzip" ]; then
+		printf "gz"
+	else
+		printf "$1"
+	fi
+}
diff --git a/tests/cli-tests/common/mtime.sh b/tests/cli-tests/common/mtime.sh
new file mode 100644
index 00000000..7ce931a9
--- /dev/null
+++ b/tests/cli-tests/common/mtime.sh
@@ -0,0 +1,13 @@
+source "$COMMON/platform.sh"
+
+MTIME="stat -c %Y"
+case "$UNAME" in
+    Darwin | FreeBSD | OpenBSD | NetBSD) MTIME="stat -f %m" ;;
+esac
+
+assertSameMTime() {
+    MT1=$($MTIME "$1")
+    MT2=$($MTIME "$2")
+    echo MTIME $MT1 $MT2
+    [ "$MT1" = "$MT2" ] || die "mtime on $1 doesn't match mtime on $2 ($MT1 != $MT2)"
+}
diff --git a/tests/cli-tests/common/permissions.sh b/tests/cli-tests/common/permissions.sh
new file mode 100644
index 00000000..b1f6ea3b
--- /dev/null
+++ b/tests/cli-tests/common/permissions.sh
@@ -0,0 +1,18 @@
+source "$COMMON/platform.sh"
+
+GET_PERMS="stat -c %a"
+case "$UNAME" in
+    Darwin | FreeBSD | OpenBSD | NetBSD) GET_PERMS="stat -f %Lp" ;;
+esac
+
+assertFilePermissions() {
+    STAT1=$($GET_PERMS "$1")
+    STAT2=$2
+    [ "$STAT1" = "$STAT2" ] || die "permissions on $1 don't match expected ($STAT1 != $STAT2)"
+}
+
+assertSamePermissions() {
+    STAT1=$($GET_PERMS "$1")
+    STAT2=$($GET_PERMS "$2")
+    [ "$STAT1" = "$STAT2" ] || die "permissions on $1 don't match those on $2 ($STAT1 != $STAT2)"
+}
diff --git a/tests/cli-tests/common/platform.sh b/tests/cli-tests/common/platform.sh
new file mode 100644
index 00000000..6eb45eab
--- /dev/null
+++ b/tests/cli-tests/common/platform.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+UNAME=$(uname)
+
+isWindows=false
+INTOVOID="/dev/null"
+case "$UNAME" in
+  GNU) DEVDEVICE="/dev/random" ;;
+  *) DEVDEVICE="/dev/zero" ;;
+esac
+case "$OS" in
+  Windows*)
+    isWindows=true
+    INTOVOID="NUL"
+    DEVDEVICE="NUL"
+    ;;
+esac
+
+case "$UNAME" in
+  Darwin) MD5SUM="md5 -r" ;;
+  FreeBSD) MD5SUM="gmd5sum" ;;
+  NetBSD) MD5SUM="md5 -n" ;;
+  OpenBSD) MD5SUM="md5" ;;
+  *) MD5SUM="md5sum" ;;
+esac
+
+DIFF="diff"
+case "$UNAME" in
+  SunOS) DIFF="gdiff" ;;
+esac
+
+if echo hello | zstd -v -T2 2>&1 > $INTOVOID | grep -q 'multi-threading is disabled'
+then
+    hasMT=""
+else
+    hasMT="true"
+fi
diff --git a/tests/cli-tests/compression/adapt.sh b/tests/cli-tests/compression/adapt.sh
new file mode 100755
index 00000000..564e955b
--- /dev/null
+++ b/tests/cli-tests/compression/adapt.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+set -e
+
+# Test --adapt
+zstd -f file --adapt -c | zstd -t
diff --git a/tests/cli-tests/compression/basic.sh b/tests/cli-tests/compression/basic.sh
new file mode 100755
index 00000000..6f0f8793
--- /dev/null
+++ b/tests/cli-tests/compression/basic.sh
@@ -0,0 +1,28 @@
+#!/bin/sh -e
+
+# Uncomment the set -x line for debugging
+# set -x
+
+# Test compression flags and check that they work
+zstd file                 ; zstd -t file.zst
+zstd -f file              ; zstd -t file.zst
+zstd -f -z file           ; zstd -t file.zst
+zstd -f -k file           ; zstd -t file.zst
+zstd -f -C file           ; zstd -t file.zst
+zstd -f --check file      ; zstd -t file.zst
+zstd -f --no-check file   ; zstd -t file.zst
+zstd -f -- file           ; zstd -t file.zst
+
+# Test output file compression
+zstd -o file-out.zst ; zstd -t file-out.zst
+zstd -fo file-out.zst; zstd -t file-out.zst
+
+# Test compression to stdout
+zstd -c file       | zstd -t
+zstd --stdout file | zstd -t
+println bob | zstd | zstd -t
+
+# Test --rm
+cp file file-rm
+zstd --rm file-rm; zstd -t file-rm.zst
+test ! -f file-rm
diff --git a/tests/cli-tests/compression/compress-literals.sh b/tests/cli-tests/compression/compress-literals.sh
new file mode 100755
index 00000000..573481a3
--- /dev/null
+++ b/tests/cli-tests/compression/compress-literals.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+set -e
+
+# Test --[no-]compress-literals
+zstd file --no-compress-literals -1 -c       | zstd -t
+zstd file --no-compress-literals -19 -c      | zstd -t
+zstd file --no-compress-literals --fast=1 -c | zstd -t
+zstd file --compress-literals -1 -c          | zstd -t
+zstd file --compress-literals --fast=1 -c    | zstd -t
diff --git a/tests/cli-tests/compression/format.sh b/tests/cli-tests/compression/format.sh
new file mode 100755
index 00000000..86fb4008
--- /dev/null
+++ b/tests/cli-tests/compression/format.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+source "$COMMON/format.sh"
+
+set -e
+
+# Test --format
+zstd --format=zstd file -f
+zstd -t file.zst
+for format in "gzip" "lz4" "xz" "lzma"; do
+	if zstd_supports_format $format; then
+		zstd --format=$format file
+		zstd -t file.$(format_extension $format)
+		zstd -c --format=$format file | zstd -t --format=$format
+	fi
+done
diff --git a/tests/cli-tests/compression/levels.sh b/tests/cli-tests/compression/levels.sh
new file mode 100755
index 00000000..6bd0aca0
--- /dev/null
+++ b/tests/cli-tests/compression/levels.sh
@@ -0,0 +1,64 @@
+#!/bin/sh
+
+set -e
+set -x
+
+datagen > file
+
+# Compress with various levels and ensure that their sizes are ordered
+zstd --fast=10 file -o file-f10.zst
+zstd --fast=1 file -o file-f1.zst
+zstd -1 file -o file-1.zst
+zstd -19 file -o file-19.zst
+zstd -22 --ultra file -o file-22.zst
+
+zstd -t file-{f10,f1,1,19,22}.zst
+
+cmp_size -ne file-19.zst file-22.zst
+cmp_size -lt file-19.zst file-1.zst
+cmp_size -lt file-1.zst file-f1.zst
+cmp_size -lt file-f1.zst file-f10.zst
+
+# Test default levels
+zstd --fast file -f
+cmp file.zst file-f1.zst || die "--fast is not level -1"
+
+zstd -0 file -o file-0.zst
+zstd -f file
+cmp file.zst file-0.zst || die "Level 0 is not the default level"
+
+# Test level clamping
+zstd -99 file -o file-99.zst
+cmp file-19.zst file-99.zst || die "Level 99 is clamped to 19"
+zstd --fast=200000 file -c | zstd -t
+
+zstd -5000000000 -f file       && die "Level too large, must fail" ||:
+zstd --fast=5000000000 -f file && die "Level too large, must fail" ||:
+
+# Test setting a level through the environment variable
+ZSTD_CLEVEL=-10 zstd file -o file-f10-env.zst
+ZSTD_CLEVEL=1 zstd file -o file-1-env.zst
+ZSTD_CLEVEL=+19 zstd file -o file-19-env.zst
+ZSTD_CLEVEL=+99 zstd file -o file-99-env.zst
+
+cmp file-f10{,-env}.zst || die "Environment variable failed to set level"
+cmp file-1{,-env}.zst   || die "Environment variable failed to set level"
+cmp file-19{,-env}.zst  || die "Environment variable failed to set level"
+cmp file-99{,-env}.zst  || die "Environment variable failed to set level"
+
+# Test invalid environment clevel is the default level
+zstd -f file
+ZSTD_CLEVEL=- zstd -f file -o file-env.zst         ; cmp file.zst file-env.zst
+ZSTD_CLEVEL=+ zstd -f file -o file-env.zst         ; cmp file.zst file-env.zst
+ZSTD_CLEVEL=a zstd -f file -o file-env.zst         ; cmp file.zst file-env.zst
+ZSTD_CLEVEL=-a zstd -f file -o file-env.zst        ; cmp file.zst file-env.zst
+ZSTD_CLEVEL=+a zstd -f file -o file-env.zst        ; cmp file.zst file-env.zst
+ZSTD_CLEVEL=3a7 zstd -f file -o file-env.zst       ; cmp file.zst file-env.zst
+ZSTD_CLEVEL=5000000000 zstd -f file -o file-env.zst; cmp file.zst file-env.zst
+
+# Test environment clevel is overridden by command line
+ZSTD_CLEVEL=10 zstd -f file -1 -o file-1-env.zst
+ZSTD_CLEVEL=10 zstd -f file --fast=1 -o file-f1-env.zst
+
+cmp file-1{,-env}.zst  || die "Environment variable not overridden"
+cmp file-f1{,-env}.zst || die "Environment variable not overridden"
diff --git a/tests/cli-tests/compression/levels.sh.stderr.exact b/tests/cli-tests/compression/levels.sh.stderr.exact
new file mode 100644
index 00000000..c0b7066f
--- /dev/null
+++ b/tests/cli-tests/compression/levels.sh.stderr.exact
@@ -0,0 +1,75 @@
++ datagen
++ zstd --fast=10 file -o file-f10.zst
++ zstd --fast=1 file -o file-f1.zst
++ zstd -1 file -o file-1.zst
++ zstd -19 file -o file-19.zst
++ zstd -22 --ultra file -o file-22.zst
++ zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst file-22.zst
++ cmp_size -ne file-19.zst file-22.zst
++ cmp_size -lt file-19.zst file-1.zst
++ cmp_size -lt file-1.zst file-f1.zst
++ cmp_size -lt file-f1.zst file-f10.zst
++ zstd --fast file -f
++ cmp file.zst file-f1.zst
++ zstd -0 file -o file-0.zst
++ zstd -f file
++ cmp file.zst file-0.zst
++ zstd -99 file -o file-99.zst
+Warning : compression level higher than max, reduced to 19 
++ cmp file-19.zst file-99.zst
++ zstd --fast=200000 file -c
++ zstd -t
++ zstd -5000000000 -f file
+error: numeric value overflows 32-bit unsigned int 
++ :
++ zstd --fast=5000000000 -f file
+error: numeric value overflows 32-bit unsigned int 
++ :
++ ZSTD_CLEVEL=-10
++ zstd file -o file-f10-env.zst
++ ZSTD_CLEVEL=1
++ zstd file -o file-1-env.zst
++ ZSTD_CLEVEL=+19
++ zstd file -o file-19-env.zst
++ ZSTD_CLEVEL=+99
++ zstd file -o file-99-env.zst
+Warning : compression level higher than max, reduced to 19 
++ cmp file-f10.zst file-f10-env.zst
++ cmp file-1.zst file-1-env.zst
++ cmp file-19.zst file-19-env.zst
++ cmp file-99.zst file-99-env.zst
++ zstd -f file
++ ZSTD_CLEVEL=-
++ zstd -f file -o file-env.zst
+Ignore environment variable setting ZSTD_CLEVEL=-: not a valid integer value 
++ cmp file.zst file-env.zst
++ ZSTD_CLEVEL=+
++ zstd -f file -o file-env.zst
+Ignore environment variable setting ZSTD_CLEVEL=+: not a valid integer value 
++ cmp file.zst file-env.zst
++ ZSTD_CLEVEL=a
++ zstd -f file -o file-env.zst
+Ignore environment variable setting ZSTD_CLEVEL=a: not a valid integer value 
++ cmp file.zst file-env.zst
++ ZSTD_CLEVEL=-a
++ zstd -f file -o file-env.zst
+Ignore environment variable setting ZSTD_CLEVEL=-a: not a valid integer value 
++ cmp file.zst file-env.zst
++ ZSTD_CLEVEL=+a
++ zstd -f file -o file-env.zst
+Ignore environment variable setting ZSTD_CLEVEL=+a: not a valid integer value 
++ cmp file.zst file-env.zst
++ ZSTD_CLEVEL=3a7
++ zstd -f file -o file-env.zst
+Ignore environment variable setting ZSTD_CLEVEL=3a7: not a valid integer value 
++ cmp file.zst file-env.zst
++ ZSTD_CLEVEL=5000000000
++ zstd -f file -o file-env.zst
+Ignore environment variable setting ZSTD_CLEVEL=5000000000: numeric value too large 
++ cmp file.zst file-env.zst
++ ZSTD_CLEVEL=10
++ zstd -f file -1 -o file-1-env.zst
++ ZSTD_CLEVEL=10
++ zstd -f file --fast=1 -o file-f1-env.zst
++ cmp file-1.zst file-1-env.zst
++ cmp file-f1.zst file-f1-env.zst
diff --git a/tests/cli-tests/compression/long-distance-matcher.sh b/tests/cli-tests/compression/long-distance-matcher.sh
new file mode 100755
index 00000000..8f2c61bf
--- /dev/null
+++ b/tests/cli-tests/compression/long-distance-matcher.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+set -e
+
+# Test --long
+zstd -f file --long   ; zstd -t file.zst
+zstd -f file --long=20; zstd -t file.zst
diff --git a/tests/cli-tests/compression/multi-threaded.sh b/tests/cli-tests/compression/multi-threaded.sh
new file mode 100755
index 00000000..e3961330
--- /dev/null
+++ b/tests/cli-tests/compression/multi-threaded.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+set -e
+
+# Test multi-threaded flags
+zstd --single-thread file -f            ; zstd -t file.zst
+zstd -T2 -f file                        ; zstd -t file.zst
+zstd --rsyncable -f file                ; zstd -t file.zst
+zstd -T0 -f file                        ; zstd -t file.zst
+zstd -T0 --auto-threads=logical -f file ; zstd -t file.zst
+zstd -T0 --auto-threads=physical -f file; zstd -t file.zst
diff --git a/tests/cli-tests/compression/row-match-finder.sh b/tests/cli-tests/compression/row-match-finder.sh
new file mode 100755
index 00000000..5b36017a
--- /dev/null
+++ b/tests/cli-tests/compression/row-match-finder.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+set -e
+
+# Test --[no-]row-match-finder
+zstd file -7f --row-match-finder
+zstd file -7f --no-row-match-finder
diff --git a/tests/cli-tests/compression/setup b/tests/cli-tests/compression/setup
new file mode 100755
index 00000000..96e2309b
--- /dev/null
+++ b/tests/cli-tests/compression/setup
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+set -e
+
+datagen > file
+datagen > file0
+datagen > file1
diff --git a/tests/cli-tests/compression/stream-size.sh b/tests/cli-tests/compression/stream-size.sh
new file mode 100755
index 00000000..7344769a
--- /dev/null
+++ b/tests/cli-tests/compression/stream-size.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+set -e
+
+# Test stream size & hint
+datagen -g7654 | zstd --stream-size=7654 | zstd -t
+datagen -g7654 | zstd --size-hint=7000   | zstd -t
diff --git a/tests/cli-tests/dict-builder/no-inputs b/tests/cli-tests/dict-builder/no-inputs
new file mode 100755
index 00000000..d37fdce5
--- /dev/null
+++ b/tests/cli-tests/dict-builder/no-inputs
@@ -0,0 +1,3 @@
+#!/bin/sh
+set -x
+zstd --train
diff --git a/tests/cli-tests/dict-builder/no-inputs.exit b/tests/cli-tests/dict-builder/no-inputs.exit
new file mode 100644
index 00000000..8351c193
--- /dev/null
+++ b/tests/cli-tests/dict-builder/no-inputs.exit
@@ -0,0 +1 @@
+14
diff --git a/tests/cli-tests/dict-builder/no-inputs.stderr.exact b/tests/cli-tests/dict-builder/no-inputs.stderr.exact
new file mode 100644
index 00000000..b3b5599d
--- /dev/null
+++ b/tests/cli-tests/dict-builder/no-inputs.stderr.exact
@@ -0,0 +1,5 @@
++ zstd --train
+!  Warning : nb of samples too low for proper processing ! 
+!  Please provide _one file per sample_. 
+!  Alternatively, split files into fixed-size blocks representative of samples, with -B# 
+Error 14 : nb of samples too low
diff --git a/tests/cli-tests/dictionaries/dictionary-mismatch.sh b/tests/cli-tests/dictionaries/dictionary-mismatch.sh
new file mode 100755
index 00000000..2b5d5a0d
--- /dev/null
+++ b/tests/cli-tests/dictionaries/dictionary-mismatch.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+source "$COMMON/platform.sh"
+
+set -e
+
+if [ false ]; then
+	for seed in $(seq 100); do
+		datagen -g1000 -s$seed > file$seed
+	done
+
+	zstd --train -r . -o dict0 -qq
+
+	for seed in $(seq 101 200); do
+		datagen -g1000 -s$seed > file$seed
+	done
+
+	zstd --train -r . -o dict1 -qq
+
+	[ "$($MD5SUM < dict0)" != "$($MD5SUM < dict1)" ] || die "dictionaries must not match"
+
+	datagen -g1000 -s0 > file0
+fi
+
+set -x
+zstd files/0 -D dicts/0
+zstd -t files/0.zst -D dicts/0
+zstd -t files/0.zst -D dicts/1 && die "Must fail" ||:
+zstd -t files/0.zst            && die "Must fail" ||:
diff --git a/tests/cli-tests/dictionaries/dictionary-mismatch.sh.stderr.exact b/tests/cli-tests/dictionaries/dictionary-mismatch.sh.stderr.exact
new file mode 100644
index 00000000..399a3207
--- /dev/null
+++ b/tests/cli-tests/dictionaries/dictionary-mismatch.sh.stderr.exact
@@ -0,0 +1,8 @@
++ zstd files/0 -D dicts/0
++ zstd -t files/0.zst -D dicts/0
++ zstd -t files/0.zst -D dicts/1
+files/0.zst : Decoding error (36) : Dictionary mismatch 
++ :
++ zstd -t files/0.zst
+files/0.zst : Decoding error (36) : Dictionary mismatch 
++ :
diff --git a/tests/cli-tests/dictionaries/setup b/tests/cli-tests/dictionaries/setup
new file mode 100755
index 00000000..616c73eb
--- /dev/null
+++ b/tests/cli-tests/dictionaries/setup
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+set -e
+
+cp -r ../files .
+cp -r ../dicts .
diff --git a/tests/cli-tests/dictionaries/setup_once b/tests/cli-tests/dictionaries/setup_once
new file mode 100755
index 00000000..6316df16
--- /dev/null
+++ b/tests/cli-tests/dictionaries/setup_once
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+set -e
+
+source "$COMMON/platform.sh"
+
+
+mkdir files/ dicts/
+
+for seed in $(seq 50); do
+	datagen -g1000 -s$seed > files/$seed
+done
+
+zstd --train -r files -o dicts/0 -qq
+
+for seed in $(seq 51 100); do
+	datagen -g1000 -s$seed > files/$seed
+done
+
+zstd --train -r files -o dicts/1 -qq
+
+cmp dicts/0 dicts/1 && die "dictionaries must not match!"
+
+datagen -g1000 > files/0
diff --git a/tests/cli-tests/run.py b/tests/cli-tests/run.py
new file mode 100755
index 00000000..6791918a
--- /dev/null
+++ b/tests/cli-tests/run.py
@@ -0,0 +1,687 @@
+#!/usr/bin/env python3
+# ################################################################
+# Copyright (c) Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# You may select, at your option, one of the above-listed licenses.
+# ##########################################################################
+
+import argparse
+import contextlib
+import copy
+import fnmatch
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import typing
+
+
+EXCLUDED_DIRS = {
+    "bin",
+    "common",
+    "scratch",
+}
+
+
+EXCLUDED_BASENAMES = {
+    "setup",
+    "setup_once",
+    "teardown",
+    "teardown_once",
+    "README.md",
+    "run.py",
+    ".gitignore",
+}
+
+EXCLUDED_SUFFIXES = [
+    ".exact",
+    ".glob",
+    ".ignore",
+    ".exit",
+]
+
+
+def exclude_dir(dirname: str) -> bool:
+    """
+    Should files under the directory :dirname: be excluded from the test runner?
+    """
+    if dirname in EXCLUDED_DIRS:
+        return True
+    return False
+
+
+def exclude_file(filename: str) -> bool:
+    """Should the file :filename: be excluded from the test runner?"""
+    if filename in EXCLUDED_BASENAMES:
+        return True
+    for suffix in EXCLUDED_SUFFIXES:
+        if filename.endswith(suffix):
+            return True
+    return False
+
+def read_file(filename: str) -> bytes:
+    """Reads the file :filename: and returns the contents as bytes."""
+    with open(filename, "rb") as f:
+        return f.read()
+
+
+def diff(a: bytes, b: bytes) -> str:
+    """Returns a diff between two different byte-strings :a: and :b:."""
+    assert a != b
+    with tempfile.NamedTemporaryFile("wb") as fa:
+        fa.write(a)
+        fa.flush()
+        with tempfile.NamedTemporaryFile("wb") as fb:
+            fb.write(b)
+            fb.flush()
+
+            diff_bytes = subprocess.run(["diff", fa.name, fb.name], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).stdout
+            return diff_bytes.decode("utf8")
+
+
+def pop_line(data: bytes) -> typing.Tuple[typing.Optional[bytes], bytes]:
+    """
+    Pop the first line from :data: and returns the first line and the remainder
+    of the data as a tuple. If :data: is empty, returns :(None, data):. Otherwise
+    the first line always ends in a :\n:, even if it is the last line and :data:
+    doesn't end in :\n:.
+    """
+    NEWLINE = b"\n"[0]
+
+    if data == b'':
+        return (None, data)
+
+    newline_idx = data.find(b"\n")
+    if newline_idx == -1:
+        end_idx = len(data)
+    else:
+        end_idx = newline_idx + 1
+
+    line = data[:end_idx]
+    data = data[end_idx:]
+
+    assert len(line) != 0
+    if line[-1] != NEWLINE:
+        line += NEWLINE
+
+    return (line, data)
+
+
+def glob_line_matches(actual: bytes, expect: bytes) -> bool:
+    """
+    Does the `actual` line match the expected glob line `expect`?
+    """
+    return fnmatch.fnmatchcase(actual.strip(), expect.strip())
+
+
+def glob_diff(actual: bytes, expect: bytes) -> bytes:
+    """
+    Returns None if the :actual: content matches the expected glob :expect:,
+    otherwise returns the diff bytes.
+    """
+    diff = b''
+    actual_line, actual = pop_line(actual)
+    expect_line, expect = pop_line(expect)
+    while True:
+        # Handle end of file conditions - allow extra newlines
+        while expect_line is None and actual_line == b"\n":
+            actual_line, actual = pop_line(actual)
+        while actual_line is None and expect_line == b"\n":
+            expect_line, expect = pop_line(expect)
+
+        if expect_line is None and actual_line is None:
+            if diff == b'':
+                return None
+            return diff
+        elif expect_line is None:
+            diff += b"---\n"
+            while actual_line != None:
+                diff += b"> "
+                diff += actual_line
+                actual_line, actual = pop_line(actual)
+            return diff
+        elif actual_line is None:
+            diff += b"---\n"
+            while expect_line != None:
+                diff += b"< "
+                diff += expect_line
+                expect_line, expect = pop_line(expect)
+            return diff
+
+        assert expect_line is not None
+        assert actual_line is not None
+
+        if expect_line == b'...\n':
+            next_expect_line, expect = pop_line(expect)
+            if next_expect_line is None:
+                if diff == b'':
+                    return None
+                return diff
+            while not glob_line_matches(actual_line, next_expect_line):
+                actual_line, actual = pop_line(actual)
+                if actual_line is None:
+                    diff += b"---\n"
+                    diff += b"< "
+                    diff += next_expect_line
+                    return diff
+            expect_line = next_expect_line
+            continue
+
+        if not glob_line_matches(actual_line, expect_line):
+            diff += b'---\n'
+            diff += b'< ' + expect_line
+            diff += b'> ' + actual_line
+
+        actual_line, actual = pop_line(actual)
+        expect_line, expect = pop_line(expect)
+
+
+class Options:
+    """Options configuring how to run a :TestCase:."""
+    def __init__(
+        self,
+        env: typing.Dict[str, str],
+        timeout: typing.Optional[int],
+        verbose: bool,
+        preserve: bool,
+        scratch_dir: str,
+        test_dir: str,
+    ) -> None:
+        self.env = env
+        self.timeout = timeout
+        self.verbose = verbose
+        self.preserve = preserve
+        self.scratch_dir = scratch_dir
+        self.test_dir = test_dir
+
+
+class TestCase:
+    """
+    Logic and state related to running a single test case.
+
+    1. Initialize the test case.
+    2. Launch the test case with :TestCase.launch():.
+       This will start the test execution in a subprocess, but
+       not wait for completion. So you could launch multiple test
+       cases in parallel. This will now print any test output.
+    3. Analyze the results with :TestCase.analyze():. This will
+       join the test subprocess, check the results against the
+       expectations, and print the results to stdout.
+
+    :TestCase.run(): is also provided which combines the launch & analyze
+    steps for single-threaded use-cases.
+
+    All other methods, prefixed with _, are private helper functions.
+    """
+    def __init__(self, test_filename: str, options: Options) -> None:
+        """
+        Initialize the :TestCase: for the test located in :test_filename:
+        with the given :options:.
+        """
+        self._opts = options
+        self._test_file = test_filename
+        self._test_name = os.path.normpath(
+            os.path.relpath(test_filename, start=self._opts.test_dir)
+        )
+        self._success = {}
+        self._message = {}
+        self._test_stdin = None
+        self._scratch_dir = os.path.abspath(os.path.join(self._opts.scratch_dir, self._test_name))
+
+    @property
+    def name(self) -> str:
+        """Returns the unique name for the test."""
+        return self._test_name
+
+    def launch(self) -> None:
+        """
+        Launch the test case as a subprocess, but do not block on completion.
+        This allows users to run multiple tests in parallel. Results aren't yet
+        printed out.
+        """
+        self._launch_test()
+
+    def analyze(self) -> bool:
+        """
+        Must be called after :TestCase.launch():. Joins the test subprocess and
+        checks the results against expectations. Finally prints the results to
+        stdout and returns the success.
+        """
+        self._join_test()
+        self._check_exit()
+        self._check_stderr()
+        self._check_stdout()
+        self._analyze_results()
+        return self._succeeded
+
+    def run(self) -> bool:
+        """Shorthand for combining both :TestCase.launch(): and :TestCase.analyze():."""
+        self.launch()
+        return self.analyze()
+
+    def _log(self, *args, **kwargs) -> None:
+        """Logs test output."""
+        print(file=sys.stdout, *args, **kwargs)
+
+    def _vlog(self, *args, **kwargs) -> None:
+        """Logs verbose test output."""
+        if self._opts.verbose:
+            print(file=sys.stdout, *args, **kwargs)
+
+    def _test_environment(self) -> typing.Dict[str, str]:
+        """
+        Returns the environment to be used for the
+        test subprocess.
+        """
+        env = copy.copy(os.environ)
+        for k, v in self._opts.env.items():
+            self._vlog(f"${k}='{v}'")
+            env[k] = v
+
+    def _launch_test(self) -> None:
+        """Launch the test subprocess, but do not join it."""
+        args = [os.path.abspath(self._test_file)]
+        stdin_name = f"{self._test_file}.stdin"
+        if os.path.exists(stdin_name):
+            self._test_stdin = open(stdin_name, "rb")
+            stdin = self._test_stdin
+        else:
+            stdin = subprocess.DEVNULL
+        cwd = self._scratch_dir
+        env = self._test_environment()
+        self._test_process = subprocess.Popen(
+            args=args,
+            stdin=stdin,
+            cwd=cwd,
+            env=env,
+            stderr=subprocess.PIPE,
+            stdout=subprocess.PIPE
+        )
+
+    def _join_test(self) -> None:
+        """Join the test process and save stderr, stdout, and the exit code."""
+        (stdout, stderr) = self._test_process.communicate(timeout=self._opts.timeout)
+        self._output = {}
+        self._output["stdout"] = stdout
+        self._output["stderr"] = stderr
+        self._exit_code = self._test_process.returncode
+        self._test_process = None
+        if self._test_stdin is not None:
+            self._test_stdin.close()
+            self._test_stdin = None
+
+    def _check_output_exact(self, out_name: str, expected: bytes) -> None:
+        """
+        Check the output named :out_name: for an exact match against the :expected: content.
+        Saves the success and message.
+        """
+        check_name = f"check_{out_name}"
+        actual = self._output[out_name]
+        if actual == expected:
+            self._success[check_name] = True
+            self._message[check_name] = f"{out_name} matches!"
+        else:
+            self._success[check_name] = False
+            self._message[check_name] = f"{out_name} does not match!\n> diff expected actual\n{diff(expected, actual)}"
+
+    def _check_output_glob(self, out_name: str, expected: bytes) -> None:
+        """
+        Check the output named :out_name: for a glob match against the :expected: glob.
+        Saves the success and message.
+        """
+        check_name = f"check_{out_name}"
+        actual = self._output[out_name]
+        diff = glob_diff(actual, expected)
+        if diff is None:
+            self._success[check_name] = True
+            self._message[check_name] = f"{out_name} matches!"
+        else:
+            utf8_diff = diff.decode('utf8')
+            self._success[check_name] = False
+            self._message[check_name] = f"{out_name} does not match!\n> diff expected actual\n{utf8_diff}"
+
+    def _check_output(self, out_name: str) -> None:
+        """
+        Checks the output named :out_name: for a match against the expectation.
+        We check for a .exact, .glob, and a .ignore file. If none are found we
+        expect that the output should be empty.
+
+        If :Options.preserve: was set then we save the scratch directory and
+        save the stderr, stdout, and exit code to the scratch directory for
+        debugging.
+        """
+        if self._opts.preserve:
+            # Save the output to the scratch directory
+            actual_name = os.path.join(self._scratch_dir, f"{out_name}")
+            with open(actual_name, "wb") as f:
+                    f.write(self._output[out_name])
+
+        exact_name = f"{self._test_file}.{out_name}.exact"
+        glob_name = f"{self._test_file}.{out_name}.glob"
+        ignore_name = f"{self._test_file}.{out_name}.ignore"
+
+        if os.path.exists(exact_name):
+            return self._check_output_exact(out_name, read_file(exact_name))
+        elif os.path.exists(glob_name):
+            return self._check_output_glob(out_name, read_file(glob_name))
+        elif os.path.exists(ignore_name):
+            check_name = f"check_{out_name}"
+            self._success[check_name] = True
+            self._message[check_name] = f"{out_name} ignored!"
+        else:
+            return self._check_output_exact(out_name, bytes())
+
+    def _check_stderr(self) -> None:
+        """Checks the stderr output against the expectation."""
+        self._check_output("stderr")
+
+    def _check_stdout(self) -> None:
+        """Checks the stdout output against the expectation."""
+        self._check_output("stdout")
+
+    def _check_exit(self) -> None:
+        """
+        Checks the exit code against expectations. If a .exit file
+        exists, we expect that the exit code matches the contents.
+        Otherwise we expect the exit code to be zero.
+
+        If :Options.preserve: is set we save the exit code to the
+        scratch directory under the filename "exit".
+        """
+        if self._opts.preserve:
+            exit_name = os.path.join(self._scratch_dir, "exit")
+            with open(exit_name, "w") as f:
+                f.write(str(self._exit_code) + "\n")
+        exit_name = f"{self._test_file}.exit"
+        if os.path.exists(exit_name):
+            exit_code: int = int(read_file(exit_name))
+        else:
+            exit_code: int = 0
+        if exit_code == self._exit_code:
+            self._success["check_exit"] = True
+            self._message["check_exit"] = "Exit code matches!"
+        else:
+            self._success["check_exit"] = False
+            self._message["check_exit"] = f"Exit code mismatch! Expected {exit_code} but got {self._exit_code}"
+
+    def _analyze_results(self) -> None:
+        """
+        After all tests have been checked, collect all the successes
+        and messages, and print the results to stdout.
+        """
+        STATUS = {True: "PASS", False: "FAIL"}
+        checks = sorted(self._success.keys())
+        self._succeeded = all(self._success.values())
+        self._log(f"{STATUS[self._succeeded]}: {self._test_name}")
+
+        if not self._succeeded or self._opts.verbose:
+            for check in checks:
+                if self._opts.verbose or not self._success[check]:
+                    self._log(f"{STATUS[self._success[check]]}: {self._test_name}.{check}")
+                    self._log(self._message[check])
+
+        self._log("----------------------------------------")
+
+
+class TestSuite:
+    """
+    Setup & teardown test suite & cases.
+    This class is intended to be used as a context manager.
+
+    TODO: Make setup/teardown failure emit messages, not throw exceptions.
+    """
+    def __init__(self, test_directory: str, options: Options) -> None:
+        self._opts = options
+        self._test_dir = os.path.abspath(test_directory)
+        rel_test_dir = os.path.relpath(test_directory, start=self._opts.test_dir)
+        assert not rel_test_dir.startswith(os.path.sep)
+        self._scratch_dir = os.path.normpath(os.path.join(self._opts.scratch_dir, rel_test_dir))
+
+    def __enter__(self) -> 'TestSuite':
+        self._setup_once()
+        return self
+
+    def __exit__(self, _exc_type, _exc_value, _traceback) -> None:
+        self._teardown_once()
+
+    @contextlib.contextmanager
+    def test_case(self, test_basename: str) -> TestCase:
+        """
+        Context manager for a test case in the test suite.
+        Pass the basename of the test relative to the :test_directory:.
+        """
+        assert os.path.dirname(test_basename) == ""
+        try:
+            self._setup(test_basename)
+            test_filename = os.path.join(self._test_dir, test_basename)
+            yield TestCase(test_filename, self._opts)
+        finally:
+            self._teardown(test_basename)
+
+    def _remove_scratch_dir(self, dir: str) -> None:
+        """Helper to remove a scratch directory with sanity checks"""
+        assert "scratch" in dir
+        assert dir.startswith(self._scratch_dir)
+        assert os.path.exists(dir)
+        shutil.rmtree(dir)
+
+    def _setup_once(self) -> None:
+        if os.path.exists(self._scratch_dir):
+            self._remove_scratch_dir(self._scratch_dir)
+        os.makedirs(self._scratch_dir)
+        setup_script = os.path.join(self._test_dir, "setup_once")
+        if os.path.exists(setup_script):
+            self._run_script(setup_script, cwd=self._scratch_dir)
+
+    def _teardown_once(self) -> None:
+        assert os.path.exists(self._scratch_dir)
+        teardown_script = os.path.join(self._test_dir, "teardown_once")
+        if os.path.exists(teardown_script):
+            self._run_script(teardown_script, cwd=self._scratch_dir)
+        if not self._opts.preserve:
+            self._remove_scratch_dir(self._scratch_dir)
+
+    def _setup(self, test_basename: str) -> None:
+        test_scratch_dir = os.path.join(self._scratch_dir, test_basename)
+        assert not os.path.exists(test_scratch_dir)
+        os.makedirs(test_scratch_dir)
+        setup_script = os.path.join(self._test_dir, "setup")
+        if os.path.exists(setup_script):
+            self._run_script(setup_script, cwd=test_scratch_dir)
+
+    def _teardown(self, test_basename: str) -> None:
+        test_scratch_dir = os.path.join(self._scratch_dir, test_basename)
+        assert os.path.exists(test_scratch_dir)
+        teardown_script = os.path.join(self._test_dir, "teardown")
+        if os.path.exists(teardown_script):
+            self._run_script(teardown_script, cwd=test_scratch_dir)
+        if not self._opts.preserve:
+            self._remove_scratch_dir(test_scratch_dir)
+
+    def _run_script(self, script: str, cwd: str) -> None:
+        env = copy.copy(os.environ)
+        for k, v in self._opts.env.items():
+            env[k] = v
+        try:
+            subprocess.run(
+                args=[script],
+                stdin=subprocess.DEVNULL,
+                capture_output=True,
+                cwd=cwd,
+                env=env,
+                check=True,
+            )
+        except subprocess.CalledProcessError as e:
+            print(f"{script} failed with exit code {e.returncode}!")
+            print(f"stderr:\n{e.stderr}")
+            print(f"stdout:\n{e.stdout}")
+            raise
+
+TestSuites = typing.Dict[str, typing.List[str]]
+
+def get_all_tests(options: Options) -> TestSuites:
+    """
+    Find all the test in the test directory and return the test suites.
+    """
+    test_suites = {}
+    for root, dirs, files in os.walk(options.test_dir, topdown=True):
+        dirs[:] = [d for d in dirs if not exclude_dir(d)]
+        test_cases = []
+        for file in files:
+            if not exclude_file(file):
+                test_cases.append(file)
+        assert root == os.path.normpath(root)
+        test_suites[root] = test_cases
+    return test_suites
+
+
+def resolve_listed_tests(
+    tests: typing.List[str], options: Options
+) -> TestSuites:
+    """
+    Resolve the list of tests passed on the command line into their
+    respective test suites. Tests can either be paths, or test names
+    relative to the test directory.
+    """
+    test_suites = {}
+    for test in tests:
+        if not os.path.exists(test):
+            test = os.path.join(options.test_dir, test)
+            if not os.path.exists(test):
+                raise RuntimeError(f"Test {test} does not exist!")
+
+        test = os.path.normpath(os.path.abspath(test))
+        assert test.startswith(options.test_dir)
+        test_suite = os.path.dirname(test)
+        test_case = os.path.basename(test)
+        test_suites.setdefault(test_suite, []).append(test_case)
+
+    return test_suites
+
+def run_tests(test_suites: TestSuites, options: Options) -> bool:
+    """
+    Runs all the test in the :test_suites: with the given :options:.
+    Prints the results to stdout.
+    """
+    tests = {}
+    for test_dir, test_files in test_suites.items():
+        with TestSuite(test_dir, options) as test_suite:
+            test_files = sorted(set(test_files))
+            for test_file in test_files:
+                with test_suite.test_case(test_file) as test_case:
+                    tests[test_case.name] = test_case.run()
+
+    successes = 0
+    for test, status in tests.items():
+        if status:
+            successes += 1
+        else:
+            print(f"FAIL: {test}")
+    if successes == len(tests):
+        print(f"PASSED all {len(tests)} tests!")
+        return True
+    else:
+        print(f"FAILED {len(tests) - successes} / {len(tests)} tests!")
+        return False
+
+
+if __name__ == "__main__":
+    CLI_TEST_DIR = os.path.dirname(sys.argv[0])
+    REPO_DIR = os.path.join(CLI_TEST_DIR, "..", "..")
+    PROGRAMS_DIR = os.path.join(REPO_DIR, "programs")
+    TESTS_DIR = os.path.join(REPO_DIR, "tests")
+    ZSTD_PATH = os.path.join(PROGRAMS_DIR, "zstd")
+    ZSTDGREP_PATH = os.path.join(PROGRAMS_DIR, "zstdgrep")
+    DATAGEN_PATH = os.path.join(TESTS_DIR, "datagen")
+
+    parser = argparse.ArgumentParser(
+        (
+            "Runs the zstd CLI tests. Exits nonzero on failure. Default arguments are\n"
+            "generally correct. Pass --preserve to preserve test output for debugging,\n"
+            "and --verbose to get verbose test output.\n"
+        )
+    )
+    parser.add_argument(
+        "--preserve",
+        action="store_true",
+        help="Preserve the scratch directory TEST_DIR/scratch/ for debugging purposes."
+    )
+    parser.add_argument("--verbose", action="store_true", help="Verbose test output.")
+    parser.add_argument("--timeout", default=60, type=int, help="Test case timeout in seconds. Set to 0 to disable timeouts.")
+    parser.add_argument(
+        "--exec-prefix",
+        default=None,
+        help="Sets the EXEC_PREFIX environment variable. Prefix to invocations of the zstd CLI."
+    )
+    parser.add_argument(
+        "--zstd",
+        default=ZSTD_PATH,
+        help="Sets the ZSTD_BIN environment variable. Path of the zstd CLI."
+    )
+    parser.add_argument(
+        "--zstdgrep",
+        default=ZSTDGREP_PATH,
+        help="Sets the ZSTDGREP_BIN environment variable. Path of the zstdgrep CLI."
+    )
+    parser.add_argument(
+        "--datagen",
+        default=DATAGEN_PATH,
+        help="Sets the DATAGEN_BIN environment variable. Path to the datagen CLI."
+    )
+    parser.add_argument(
+        "--test-dir",
+        default=CLI_TEST_DIR,
+        help=(
+            "Runs the tests under this directory. "
+            "Adds TEST_DIR/bin/ to path. "
+            "Scratch directory located in TEST_DIR/scratch/."
+        )
+    )
+    parser.add_argument(
+        "tests",
+        nargs="*",
+        help="Run only these test cases. Can either be paths or test names relative to TEST_DIR/"
+    )
+    args = parser.parse_args()
+
+    if args.timeout <= 0:
+        args.timeout = None
+
+    args.test_dir = os.path.normpath(os.path.abspath(args.test_dir))
+    bin_dir = os.path.join(args.test_dir, "bin")
+    scratch_dir = os.path.join(args.test_dir, "scratch")
+
+    env = {}
+    if args.exec_prefix is not None:
+        env["EXEC_PREFIX"] = args.exec_prefix
+    env["ZSTD_BIN"] = os.path.abspath(args.zstd)
+    env["DATAGEN_BIN"] = os.path.abspath(args.datagen)
+    env["ZSTDGREP_BIN"] = os.path.abspath(args.zstdgrep)
+    env["COMMON"] = os.path.abspath(os.path.join(args.test_dir, "common"))
+    env["PATH"] = os.path.abspath(bin_dir) + ":" + os.getenv("PATH", "")
+
+    opts = Options(
+        env=env,
+        timeout=args.timeout,
+        verbose=args.verbose,
+        preserve=args.preserve,
+        test_dir=args.test_dir,
+        scratch_dir=scratch_dir,
+    )
+
+    if len(args.tests) == 0:
+        tests = get_all_tests(opts)
+    else:
+        tests = resolve_listed_tests(args.tests, opts)
+
+    success = run_tests(tests, opts)
+    if success:
+        sys.exit(0)
+    else:
+        sys.exit(1)
+

From 1fc42de86a53320c056c9a3ca9847eae7ce1262b Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Mon, 24 Jan 2022 13:52:08 -0800
Subject: [PATCH 5/8] [CI] Hook cli-tests up to CI

Add cli-tests to `make test`. This adds a `python3` dependency to `make
test`, but not `make check`. We could make this dependency optional by
skipping the tests if `python3` is not present.
---
 tests/Makefile                                |   8 +-
 tests/cli-tests/basic/help.sh                 |   5 +-
 tests/cli-tests/basic/version.sh              |   5 +-
 tests/cli-tests/bin/cmp_size                  |   2 -
 tests/cli-tests/bin/println                   |   2 +-
 tests/cli-tests/common/format.sh              |   2 +-
 tests/cli-tests/common/mtime.sh               |   2 +-
 tests/cli-tests/common/permissions.sh         |   2 +-
 tests/cli-tests/compression/basic.sh          |   8 +-
 tests/cli-tests/compression/format.sh         |   2 +-
 tests/cli-tests/compression/levels.sh         |  16 +--
 .../compression/levels.sh.stderr.exact        | 124 +++++++++---------
 .../dict-builder/{no-inputs => no-inputs.sh}  |   2 +-
 .../{no-inputs.exit => no-inputs.sh.exit}     |   0
 ...stderr.exact => no-inputs.sh.stderr.exact} |   2 +-
 .../dictionaries/dictionary-mismatch.sh       |   4 +-
 .../dictionary-mismatch.sh.stderr.exact       |  10 +-
 tests/cli-tests/dictionaries/setup_once       |   2 +-
 18 files changed, 102 insertions(+), 96 deletions(-)
 rename tests/cli-tests/dict-builder/{no-inputs => no-inputs.sh} (76%)
 rename tests/cli-tests/dict-builder/{no-inputs.exit => no-inputs.sh.exit} (100%)
 rename tests/cli-tests/dict-builder/{no-inputs.stderr.exact => no-inputs.sh.stderr.exact} (93%)

diff --git a/tests/Makefile b/tests/Makefile
index 132fa7a0..cb77b016 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -297,7 +297,7 @@ check: shortest
 fuzztest: test-fuzzer test-zstream test-decodecorpus
 
 .PHONY: test
-test: test-zstd test-fullbench test-fuzzer test-zstream test-invalidDictionaries test-legacy test-decodecorpus
+test: test-zstd test-fullbench test-fuzzer test-zstream test-invalidDictionaries test-legacy test-decodecorpus test-cli-tests
 ifeq ($(QEMU_SYS),)
 test: test-pool
 endif
@@ -322,6 +322,12 @@ test-zstd test-zstd32 test-zstd-nolegacy: datagen
 	file $(ZSTD)
 	EXE_PREFIX="$(QEMU_SYS)" ZSTD_BIN="$(ZSTD)" DATAGEN_BIN=./datagen ./playTests.sh $(ZSTDRTTEST)
 
+test-cli-tests: ZSTD = $(PRGDIR)/zstd
+test-cli-tests: zstd datagen
+	file $(ZSTD)
+	./cli-tests/run.py --exec-prefix="$(QEMU_SYS)" --zstd="$(ZSTD)" --datagen=./datagen
+
+
 test-fullbench: fullbench datagen
 	$(QEMU_SYS) ./fullbench -i1
 	$(QEMU_SYS) ./fullbench -i1 -P0
diff --git a/tests/cli-tests/basic/help.sh b/tests/cli-tests/basic/help.sh
index c683b6a6..927c3ffb 100755
--- a/tests/cli-tests/basic/help.sh
+++ b/tests/cli-tests/basic/help.sh
@@ -1,4 +1,7 @@
-#!/bin/sh -e
+#!/bin/sh
+
+set -e
+
 println "+ zstd -h"
 zstd -h
 println "+ zstd -H"
diff --git a/tests/cli-tests/basic/version.sh b/tests/cli-tests/basic/version.sh
index d50de0f3..f75eaa84 100755
--- a/tests/cli-tests/basic/version.sh
+++ b/tests/cli-tests/basic/version.sh
@@ -1,3 +1,6 @@
-#!/bin/sh -e
+#!/bin/sh
+
+set -e
+
 zstd -V
 zstd --version
diff --git a/tests/cli-tests/bin/cmp_size b/tests/cli-tests/bin/cmp_size
index 5afa1c59..8e4bef88 100755
--- a/tests/cli-tests/bin/cmp_size
+++ b/tests/cli-tests/bin/cmp_size
@@ -1,7 +1,5 @@
 #!/bin/sh
 
-# Small utility to
-
 set -e
 
 usage()
diff --git a/tests/cli-tests/bin/println b/tests/cli-tests/bin/println
index 1da24604..494eb18c 100755
--- a/tests/cli-tests/bin/println
+++ b/tests/cli-tests/bin/println
@@ -1,2 +1,2 @@
-#!/bin/env sh
+#!/bin/sh
 printf '%b\n' "${*}"
diff --git a/tests/cli-tests/common/format.sh b/tests/cli-tests/common/format.sh
index 20ff0f05..e574e973 100644
--- a/tests/cli-tests/common/format.sh
+++ b/tests/cli-tests/common/format.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-source "$COMMON/platform.sh"
+. "$COMMON/platform.sh"
 
 zstd_supports_format()
 {
diff --git a/tests/cli-tests/common/mtime.sh b/tests/cli-tests/common/mtime.sh
index 7ce931a9..344074d3 100644
--- a/tests/cli-tests/common/mtime.sh
+++ b/tests/cli-tests/common/mtime.sh
@@ -1,4 +1,4 @@
-source "$COMMON/platform.sh"
+. "$COMMON/platform.sh"
 
 MTIME="stat -c %Y"
 case "$UNAME" in
diff --git a/tests/cli-tests/common/permissions.sh b/tests/cli-tests/common/permissions.sh
index b1f6ea3b..6bce1f0b 100644
--- a/tests/cli-tests/common/permissions.sh
+++ b/tests/cli-tests/common/permissions.sh
@@ -1,4 +1,4 @@
-source "$COMMON/platform.sh"
+. "$COMMON/platform.sh"
 
 GET_PERMS="stat -c %a"
 case "$UNAME" in
diff --git a/tests/cli-tests/compression/basic.sh b/tests/cli-tests/compression/basic.sh
index 6f0f8793..8b63e407 100755
--- a/tests/cli-tests/compression/basic.sh
+++ b/tests/cli-tests/compression/basic.sh
@@ -1,7 +1,9 @@
-#!/bin/sh -e
+#!/bin/sh
 
-# Uncomment the set -x line for debugging
-# set -x
+set -e
+
+# Uncomment the set -v line for debugging
+# set -v
 
 # Test compression flags and check that they work
 zstd file                 ; zstd -t file.zst
diff --git a/tests/cli-tests/compression/format.sh b/tests/cli-tests/compression/format.sh
index 86fb4008..192fa2cf 100755
--- a/tests/cli-tests/compression/format.sh
+++ b/tests/cli-tests/compression/format.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-source "$COMMON/format.sh"
+. "$COMMON/format.sh"
 
 set -e
 
diff --git a/tests/cli-tests/compression/levels.sh b/tests/cli-tests/compression/levels.sh
index 6bd0aca0..4837790c 100755
--- a/tests/cli-tests/compression/levels.sh
+++ b/tests/cli-tests/compression/levels.sh
@@ -1,7 +1,7 @@
 #!/bin/sh
 
 set -e
-set -x
+set -v
 
 datagen > file
 
@@ -12,7 +12,7 @@ zstd -1 file -o file-1.zst
 zstd -19 file -o file-19.zst
 zstd -22 --ultra file -o file-22.zst
 
-zstd -t file-{f10,f1,1,19,22}.zst
+zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst file-22.zst
 
 cmp_size -ne file-19.zst file-22.zst
 cmp_size -lt file-19.zst file-1.zst
@@ -41,10 +41,10 @@ ZSTD_CLEVEL=1 zstd file -o file-1-env.zst
 ZSTD_CLEVEL=+19 zstd file -o file-19-env.zst
 ZSTD_CLEVEL=+99 zstd file -o file-99-env.zst
 
-cmp file-f10{,-env}.zst || die "Environment variable failed to set level"
-cmp file-1{,-env}.zst   || die "Environment variable failed to set level"
-cmp file-19{,-env}.zst  || die "Environment variable failed to set level"
-cmp file-99{,-env}.zst  || die "Environment variable failed to set level"
+cmp file-f10.zst file-f10-env.zst || die "Environment variable failed to set level"
+cmp file-1.zst file-1-env.zst || die "Environment variable failed to set level"
+cmp file-19.zst file-19-env.zst || die "Environment variable failed to set level"
+cmp file-99.zst file-99-env.zst || die "Environment variable failed to set level"
 
 # Test invalid environment clevel is the default level
 zstd -f file
@@ -60,5 +60,5 @@ ZSTD_CLEVEL=5000000000 zstd -f file -o file-env.zst; cmp file.zst file-env.zst
 ZSTD_CLEVEL=10 zstd -f file -1 -o file-1-env.zst
 ZSTD_CLEVEL=10 zstd -f file --fast=1 -o file-f1-env.zst
 
-cmp file-1{,-env}.zst  || die "Environment variable not overridden"
-cmp file-f1{,-env}.zst || die "Environment variable not overridden"
+cmp file-1.zst file-1-env.zst  || die "Environment variable not overridden"
+cmp file-f1.zst file-f1-env.zst || die "Environment variable not overridden"
diff --git a/tests/cli-tests/compression/levels.sh.stderr.exact b/tests/cli-tests/compression/levels.sh.stderr.exact
index c0b7066f..cb00433e 100644
--- a/tests/cli-tests/compression/levels.sh.stderr.exact
+++ b/tests/cli-tests/compression/levels.sh.stderr.exact
@@ -1,75 +1,71 @@
-+ datagen
-+ zstd --fast=10 file -o file-f10.zst
-+ zstd --fast=1 file -o file-f1.zst
-+ zstd -1 file -o file-1.zst
-+ zstd -19 file -o file-19.zst
-+ zstd -22 --ultra file -o file-22.zst
-+ zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst file-22.zst
-+ cmp_size -ne file-19.zst file-22.zst
-+ cmp_size -lt file-19.zst file-1.zst
-+ cmp_size -lt file-1.zst file-f1.zst
-+ cmp_size -lt file-f1.zst file-f10.zst
-+ zstd --fast file -f
-+ cmp file.zst file-f1.zst
-+ zstd -0 file -o file-0.zst
-+ zstd -f file
-+ cmp file.zst file-0.zst
-+ zstd -99 file -o file-99.zst
+
+datagen > file
+
+# Compress with various levels and ensure that their sizes are ordered
+zstd --fast=10 file -o file-f10.zst
+zstd --fast=1 file -o file-f1.zst
+zstd -1 file -o file-1.zst
+zstd -19 file -o file-19.zst
+zstd -22 --ultra file -o file-22.zst
+
+zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst file-22.zst
+
+cmp_size -ne file-19.zst file-22.zst
+cmp_size -lt file-19.zst file-1.zst
+cmp_size -lt file-1.zst file-f1.zst
+cmp_size -lt file-f1.zst file-f10.zst
+
+# Test default levels
+zstd --fast file -f
+cmp file.zst file-f1.zst || die "--fast is not level -1"
+
+zstd -0 file -o file-0.zst
+zstd -f file
+cmp file.zst file-0.zst || die "Level 0 is not the default level"
+
+# Test level clamping
+zstd -99 file -o file-99.zst
 Warning : compression level higher than max, reduced to 19 
-+ cmp file-19.zst file-99.zst
-+ zstd --fast=200000 file -c
-+ zstd -t
-+ zstd -5000000000 -f file
+cmp file-19.zst file-99.zst || die "Level 99 is clamped to 19"
+zstd --fast=200000 file -c | zstd -t
+
+zstd -5000000000 -f file       && die "Level too large, must fail" ||:
 error: numeric value overflows 32-bit unsigned int 
-+ :
-+ zstd --fast=5000000000 -f file
+zstd --fast=5000000000 -f file && die "Level too large, must fail" ||:
 error: numeric value overflows 32-bit unsigned int 
-+ :
-+ ZSTD_CLEVEL=-10
-+ zstd file -o file-f10-env.zst
-+ ZSTD_CLEVEL=1
-+ zstd file -o file-1-env.zst
-+ ZSTD_CLEVEL=+19
-+ zstd file -o file-19-env.zst
-+ ZSTD_CLEVEL=+99
-+ zstd file -o file-99-env.zst
+
+# Test setting a level through the environment variable
+ZSTD_CLEVEL=-10 zstd file -o file-f10-env.zst
+ZSTD_CLEVEL=1 zstd file -o file-1-env.zst
+ZSTD_CLEVEL=+19 zstd file -o file-19-env.zst
+ZSTD_CLEVEL=+99 zstd file -o file-99-env.zst
 Warning : compression level higher than max, reduced to 19 
-+ cmp file-f10.zst file-f10-env.zst
-+ cmp file-1.zst file-1-env.zst
-+ cmp file-19.zst file-19-env.zst
-+ cmp file-99.zst file-99-env.zst
-+ zstd -f file
-+ ZSTD_CLEVEL=-
-+ zstd -f file -o file-env.zst
+
+cmp file-f10.zst file-f10-env.zst || die "Environment variable failed to set level"
+cmp file-1.zst file-1-env.zst || die "Environment variable failed to set level"
+cmp file-19.zst file-19-env.zst || die "Environment variable failed to set level"
+cmp file-99.zst file-99-env.zst || die "Environment variable failed to set level"
+
+# Test invalid environment clevel is the default level
+zstd -f file
+ZSTD_CLEVEL=- zstd -f file -o file-env.zst         ; cmp file.zst file-env.zst
 Ignore environment variable setting ZSTD_CLEVEL=-: not a valid integer value 
-+ cmp file.zst file-env.zst
-+ ZSTD_CLEVEL=+
-+ zstd -f file -o file-env.zst
+ZSTD_CLEVEL=+ zstd -f file -o file-env.zst         ; cmp file.zst file-env.zst
 Ignore environment variable setting ZSTD_CLEVEL=+: not a valid integer value 
-+ cmp file.zst file-env.zst
-+ ZSTD_CLEVEL=a
-+ zstd -f file -o file-env.zst
+ZSTD_CLEVEL=a zstd -f file -o file-env.zst         ; cmp file.zst file-env.zst
 Ignore environment variable setting ZSTD_CLEVEL=a: not a valid integer value 
-+ cmp file.zst file-env.zst
-+ ZSTD_CLEVEL=-a
-+ zstd -f file -o file-env.zst
+ZSTD_CLEVEL=-a zstd -f file -o file-env.zst        ; cmp file.zst file-env.zst
 Ignore environment variable setting ZSTD_CLEVEL=-a: not a valid integer value 
-+ cmp file.zst file-env.zst
-+ ZSTD_CLEVEL=+a
-+ zstd -f file -o file-env.zst
+ZSTD_CLEVEL=+a zstd -f file -o file-env.zst        ; cmp file.zst file-env.zst
 Ignore environment variable setting ZSTD_CLEVEL=+a: not a valid integer value 
-+ cmp file.zst file-env.zst
-+ ZSTD_CLEVEL=3a7
-+ zstd -f file -o file-env.zst
+ZSTD_CLEVEL=3a7 zstd -f file -o file-env.zst       ; cmp file.zst file-env.zst
 Ignore environment variable setting ZSTD_CLEVEL=3a7: not a valid integer value 
-+ cmp file.zst file-env.zst
-+ ZSTD_CLEVEL=5000000000
-+ zstd -f file -o file-env.zst
+ZSTD_CLEVEL=5000000000 zstd -f file -o file-env.zst; cmp file.zst file-env.zst
 Ignore environment variable setting ZSTD_CLEVEL=5000000000: numeric value too large 
-+ cmp file.zst file-env.zst
-+ ZSTD_CLEVEL=10
-+ zstd -f file -1 -o file-1-env.zst
-+ ZSTD_CLEVEL=10
-+ zstd -f file --fast=1 -o file-f1-env.zst
-+ cmp file-1.zst file-1-env.zst
-+ cmp file-f1.zst file-f1-env.zst
+
+# Test environment clevel is overridden by command line
+ZSTD_CLEVEL=10 zstd -f file -1 -o file-1-env.zst
+ZSTD_CLEVEL=10 zstd -f file --fast=1 -o file-f1-env.zst
+
+cmp file-1.zst file-1-env.zst  || die "Environment variable not overridden"
+cmp file-f1.zst file-f1-env.zst || die "Environment variable not overridden"
diff --git a/tests/cli-tests/dict-builder/no-inputs b/tests/cli-tests/dict-builder/no-inputs.sh
similarity index 76%
rename from tests/cli-tests/dict-builder/no-inputs
rename to tests/cli-tests/dict-builder/no-inputs.sh
index d37fdce5..416b8374 100755
--- a/tests/cli-tests/dict-builder/no-inputs
+++ b/tests/cli-tests/dict-builder/no-inputs.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
-set -x
+set -v
 zstd --train
diff --git a/tests/cli-tests/dict-builder/no-inputs.exit b/tests/cli-tests/dict-builder/no-inputs.sh.exit
similarity index 100%
rename from tests/cli-tests/dict-builder/no-inputs.exit
rename to tests/cli-tests/dict-builder/no-inputs.sh.exit
diff --git a/tests/cli-tests/dict-builder/no-inputs.stderr.exact b/tests/cli-tests/dict-builder/no-inputs.sh.stderr.exact
similarity index 93%
rename from tests/cli-tests/dict-builder/no-inputs.stderr.exact
rename to tests/cli-tests/dict-builder/no-inputs.sh.stderr.exact
index b3b5599d..d7b3ea02 100644
--- a/tests/cli-tests/dict-builder/no-inputs.stderr.exact
+++ b/tests/cli-tests/dict-builder/no-inputs.sh.stderr.exact
@@ -1,4 +1,4 @@
-+ zstd --train
+zstd --train
 !  Warning : nb of samples too low for proper processing ! 
 !  Please provide _one file per sample_. 
 !  Alternatively, split files into fixed-size blocks representative of samples, with -B# 
diff --git a/tests/cli-tests/dictionaries/dictionary-mismatch.sh b/tests/cli-tests/dictionaries/dictionary-mismatch.sh
index 2b5d5a0d..8264ccca 100755
--- a/tests/cli-tests/dictionaries/dictionary-mismatch.sh
+++ b/tests/cli-tests/dictionaries/dictionary-mismatch.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-source "$COMMON/platform.sh"
+. "$COMMON/platform.sh"
 
 set -e
 
@@ -22,7 +22,7 @@ if [ false ]; then
 	datagen -g1000 -s0 > file0
 fi
 
-set -x
+set -v
 zstd files/0 -D dicts/0
 zstd -t files/0.zst -D dicts/0
 zstd -t files/0.zst -D dicts/1 && die "Must fail" ||:
diff --git a/tests/cli-tests/dictionaries/dictionary-mismatch.sh.stderr.exact b/tests/cli-tests/dictionaries/dictionary-mismatch.sh.stderr.exact
index 399a3207..0afea722 100644
--- a/tests/cli-tests/dictionaries/dictionary-mismatch.sh.stderr.exact
+++ b/tests/cli-tests/dictionaries/dictionary-mismatch.sh.stderr.exact
@@ -1,8 +1,6 @@
-+ zstd files/0 -D dicts/0
-+ zstd -t files/0.zst -D dicts/0
-+ zstd -t files/0.zst -D dicts/1
+zstd files/0 -D dicts/0
+zstd -t files/0.zst -D dicts/0
+zstd -t files/0.zst -D dicts/1 && die "Must fail" ||:
 files/0.zst : Decoding error (36) : Dictionary mismatch 
-+ :
-+ zstd -t files/0.zst
+zstd -t files/0.zst            && die "Must fail" ||:
 files/0.zst : Decoding error (36) : Dictionary mismatch 
-+ :
diff --git a/tests/cli-tests/dictionaries/setup_once b/tests/cli-tests/dictionaries/setup_once
index 6316df16..1241c578 100755
--- a/tests/cli-tests/dictionaries/setup_once
+++ b/tests/cli-tests/dictionaries/setup_once
@@ -2,7 +2,7 @@
 
 set -e
 
-source "$COMMON/platform.sh"
+. "$COMMON/platform.sh"
 
 
 mkdir files/ dicts/

From 8d65f87416740444da4a713d2778b78c11c6b38b Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Sun, 30 Jan 2022 12:16:16 -0800
Subject: [PATCH 6/8] Fix static analysis false-positives

* It couldn't detect that the `fastCoverParams` can't be non-null, since it was just an assertion.
* It thought we were accesing `wksp->dtable` beyond the bounds because we were using it to set the `workSpace` value. Instead, compute the workspace size used in a different way.
---
 lib/common/fse.h            | 2 +-
 lib/common/fse_decompress.c | 3 ++-
 programs/dibio.c            | 8 +++++---
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/lib/common/fse.h b/lib/common/fse.h
index 714bfd3e..bd29e9ac 100644
--- a/lib/common/fse.h
+++ b/lib/common/fse.h
@@ -353,7 +353,7 @@ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
 size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
 /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
 
-#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
+#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
 #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
 size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
 /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c
index a5a35801..bc0c1be2 100644
--- a/lib/common/fse_decompress.c
+++ b/lib/common/fse_decompress.c
@@ -342,7 +342,8 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
     }
 
     if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
-    workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
+    assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
+    workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
     wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
 
     CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
diff --git a/programs/dibio.c b/programs/dibio.c
index 147d1e7b..fddbc9e5 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -31,6 +31,7 @@
 #include "timefn.h"         /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */
 #include "../lib/common/debug.h" /* assert */
 #include "../lib/common/mem.h"  /* read */
+#include "../lib/zstd_errors.h"
 #include "dibio.h"
 
 
@@ -380,7 +381,7 @@ int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize,
         srcBuffer, &loadedSize, sampleSizes, fs.nbSamples, fileNamesTable,
         nbFiles, chunkSize, displayLevel);
 
-    {   size_t dictSize;
+    {   size_t dictSize = ZSTD_error_GENERIC;
         if (params) {
             DiB_fillNoise((char*)srcBuffer + loadedSize, NOISELENGTH);   /* guard band, for end of buffer condition */
             dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize,
@@ -400,8 +401,7 @@ int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize,
               dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer,
                                                      sampleSizes, nbSamplesLoaded, *coverParams);
             }
-        } else {
-            assert(fastCoverParams != NULL);
+        } else if (fastCoverParams != NULL) {
             if (optimize) {
               dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize,
                                                               srcBuffer, sampleSizes, nbSamplesLoaded,
@@ -416,6 +416,8 @@ int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize,
               dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, srcBuffer,
                                                         sampleSizes, nbSamplesLoaded, *fastCoverParams);
             }
+        } else {
+            assert(0 /* Impossible */);
         }
         if (ZDICT_isError(dictSize)) {
             DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize));   /* should not happen */

From cc0657f27d81da8a7db3aa199d24a566b95c4dfe Mon Sep 17 00:00:00 2001
From: Yonatan Komornik <11005061+yoniko@users.noreply.github.com>
Date: Mon, 31 Jan 2022 15:43:41 -0800
Subject: [PATCH 7/8] AsyncIO compression part 2 - added async read and asyncio
 to compression code (#3022)

* Compression asyncio:
- Added asyncio functionality for compression flow
- Added ReadPool for async reads, implemented in both comp and decomp flows
---
 programs/README.md        |   1 +
 programs/fileio.c         | 423 ++++++++++++++++++--------------------
 programs/fileio_asyncio.c | 353 ++++++++++++++++++++++++++-----
 programs/fileio_asyncio.h |  85 ++++++--
 programs/fileio_common.h  |   2 +-
 programs/fileio_types.h   |   4 +-
 programs/zstdcli.c        |   8 +-
 tests/playTests.sh        |  15 +-
 8 files changed, 590 insertions(+), 301 deletions(-)

diff --git a/programs/README.md b/programs/README.md
index 5570f90c..b88cf78d 100644
--- a/programs/README.md
+++ b/programs/README.md
@@ -164,6 +164,7 @@ Advanced arguments :
 --filelist FILE : read list of files to operate upon from FILE
 --output-dir-flat DIR : processed files are stored into DIR
 --output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure
+--[no-]asyncio : use asynchronous IO (default: enabled)
 --[no-]check : during compression, add XXH64 integrity checksum to frame (default: enabled). If specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate).
 --      : All arguments after "--" are treated as files
 
diff --git a/programs/fileio.c b/programs/fileio.c
index 64909b96..502f69c1 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -289,7 +289,7 @@ FIO_prefs_t* FIO_createPreferences(void)
     ret->literalCompressionMode = ZSTD_ps_auto;
     ret->excludeCompressedFiles = 0;
     ret->allowBlockDevices = 0;
-    ret->asyncIO = 0;
+    ret->asyncIO = AIO_supported();
     return ret;
 }
 
@@ -848,16 +848,12 @@ static int FIO_removeMultiFilesWarning(FIO_ctx_t* const fCtx, const FIO_prefs_t*
  *  Compression
  ************************************************************************/
 typedef struct {
-    FILE* srcFile;
-    FILE* dstFile;
-    void*  srcBuffer;
-    size_t srcBufferSize;
-    void*  dstBuffer;
-    size_t dstBufferSize;
     void* dictBuffer;
     size_t dictBufferSize;
     const char* dictFileName;
     ZSTD_CStream* cctx;
+    WritePoolCtx_t *writeCtx;
+    ReadPoolCtx_t *readCtx;
 } cRess_t;
 
 /** ZSTD_cycleLog() :
@@ -906,9 +902,6 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
     if (ress.cctx == NULL)
         EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx",
                     strerror(errno));
-    ress.srcBufferSize = ZSTD_CStreamInSize();
-    ress.srcBuffer = malloc(ress.srcBufferSize);
-    ress.dstBufferSize = ZSTD_CStreamOutSize();
 
     /* need to update memLimit before calling createDictBuffer
      * because of memLimit check inside it */
@@ -916,10 +909,10 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
         unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize;
         FIO_adjustParamsForPatchFromMode(prefs, &comprParams, UTIL_getFileSize(dictFileName), ssSize > 0 ? ssSize : maxSrcFileSize, cLevel);
     }
-    ress.dstBuffer = malloc(ress.dstBufferSize);
     ress.dictBufferSize = FIO_createDictBuffer(&ress.dictBuffer, dictFileName, prefs);   /* works with dictFileName==NULL */
-    if (!ress.srcBuffer || !ress.dstBuffer)
-        EXM_THROW(31, "allocation error : not enough memory");
+
+    ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_CStreamOutSize());
+    ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_CStreamInSize());
 
     /* Advanced parameters, including dictionary */
     if (dictFileName && (ress.dictBuffer==NULL))
@@ -982,9 +975,9 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
 
 static void FIO_freeCResources(const cRess_t* const ress)
 {
-    free(ress->srcBuffer);
-    free(ress->dstBuffer);
     free(ress->dictBuffer);
+    AIO_WritePool_free(ress->writeCtx);
+    AIO_ReadPool_free(ress->readCtx);
     ZSTD_freeCStream(ress->cctx);   /* never fails */
 }
 
@@ -997,6 +990,7 @@ FIO_compressGzFrame(const cRess_t* ress,  /* buffers & handlers are used, but no
 {
     unsigned long long inFileSize = 0, outFileSize = 0;
     z_stream strm;
+    IOJob_t *writeJob = NULL;
 
     if (compressionLevel > Z_BEST_COMPRESSION)
         compressionLevel = Z_BEST_COMPRESSION;
@@ -1012,51 +1006,58 @@ FIO_compressGzFrame(const cRess_t* ress,  /* buffers & handlers are used, but no
             EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret);
     }   }
 
+    writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
     strm.next_in = 0;
     strm.avail_in = 0;
-    strm.next_out = (Bytef*)ress->dstBuffer;
-    strm.avail_out = (uInt)ress->dstBufferSize;
+    strm.next_out = (Bytef*)writeJob->buffer;
+    strm.avail_out = (uInt)writeJob->bufferSize;
 
     while (1) {
         int ret;
         if (strm.avail_in == 0) {
-            size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
-            if (inSize == 0) break;
-            inFileSize += inSize;
-            strm.next_in = (z_const unsigned char*)ress->srcBuffer;
-            strm.avail_in = (uInt)inSize;
+            AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize());
+            if (ress->readCtx->srcBufferLoaded == 0) break;
+            inFileSize += ress->readCtx->srcBufferLoaded;
+            strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer;
+            strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded;
         }
-        ret = deflate(&strm, Z_NO_FLUSH);
+
+        {
+            size_t const availBefore = strm.avail_in;
+            ret = deflate(&strm, Z_NO_FLUSH);
+            AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in);
+        }
+
         if (ret != Z_OK)
             EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret);
-        {   size_t const cSize = ress->dstBufferSize - strm.avail_out;
+        {   size_t const cSize = writeJob->bufferSize - strm.avail_out;
             if (cSize) {
-                if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize)
-                    EXM_THROW(73, "Write error : cannot write to output file : %s ", strerror(errno));
+                writeJob->usedBufferSize = cSize;
+                AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
                 outFileSize += cSize;
-                strm.next_out = (Bytef*)ress->dstBuffer;
-                strm.avail_out = (uInt)ress->dstBufferSize;
-        }   }
+                strm.next_out = (Bytef*)writeJob->buffer;
+                strm.avail_out = (uInt)writeJob->bufferSize;
+            }   }
         if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
             DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ",
-                            (unsigned)(inFileSize>>20),
-                            (double)outFileSize/inFileSize*100)
+                          (unsigned)(inFileSize>>20),
+                          (double)outFileSize/inFileSize*100)
         } else {
             DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%% ",
-                            (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
-                            (double)outFileSize/inFileSize*100);
-    }   }
+                          (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
+                          (double)outFileSize/inFileSize*100);
+        }   }
 
     while (1) {
         int const ret = deflate(&strm, Z_FINISH);
-        {   size_t const cSize = ress->dstBufferSize - strm.avail_out;
+        {   size_t const cSize = writeJob->bufferSize - strm.avail_out;
             if (cSize) {
-                if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize)
-                    EXM_THROW(75, "Write error : %s ", strerror(errno));
+                writeJob->usedBufferSize = cSize;
+                AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
                 outFileSize += cSize;
-                strm.next_out = (Bytef*)ress->dstBuffer;
-                strm.avail_out = (uInt)ress->dstBufferSize;
-        }   }
+                strm.next_out = (Bytef*)writeJob->buffer;
+                strm.avail_out = (uInt)writeJob->bufferSize;
+            }   }
         if (ret == Z_STREAM_END) break;
         if (ret != Z_BUF_ERROR)
             EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret);
@@ -1067,6 +1068,8 @@ FIO_compressGzFrame(const cRess_t* ress,  /* buffers & handlers are used, but no
             EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret);
     }   }
     *readsize = inFileSize;
+    AIO_WritePool_releaseIoJob(writeJob);
+    AIO_WritePool_sparseWriteEnd(ress->writeCtx);
     return outFileSize;
 }
 #endif
@@ -1082,6 +1085,7 @@ FIO_compressLzmaFrame(cRess_t* ress,
     lzma_stream strm = LZMA_STREAM_INIT;
     lzma_action action = LZMA_RUN;
     lzma_ret ret;
+    IOJob_t *writeJob = NULL;
 
     if (compressionLevel < 0) compressionLevel = 0;
     if (compressionLevel > 9) compressionLevel = 9;
@@ -1099,31 +1103,37 @@ FIO_compressLzmaFrame(cRess_t* ress,
             EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret);
     }
 
+    writeJob =AIO_WritePool_acquireJob(ress->writeCtx);
+    strm.next_out = (Bytef*)writeJob->buffer;
+    strm.avail_out = (uInt)writeJob->bufferSize;
     strm.next_in = 0;
     strm.avail_in = 0;
-    strm.next_out = (BYTE*)ress->dstBuffer;
-    strm.avail_out = ress->dstBufferSize;
 
     while (1) {
         if (strm.avail_in == 0) {
-            size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
-            if (inSize == 0) action = LZMA_FINISH;
+            size_t const inSize = AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize());
+            if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH;
             inFileSize += inSize;
-            strm.next_in = (BYTE const*)ress->srcBuffer;
-            strm.avail_in = inSize;
+            strm.next_in = (BYTE const*)ress->readCtx->srcBuffer;
+            strm.avail_in = ress->readCtx->srcBufferLoaded;
+        }
+
+        {
+            size_t const availBefore = strm.avail_in;
+            ret = lzma_code(&strm, action);
+            AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in);
         }
 
-        ret = lzma_code(&strm, action);
 
         if (ret != LZMA_OK && ret != LZMA_STREAM_END)
             EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret);
-        {   size_t const compBytes = ress->dstBufferSize - strm.avail_out;
+        {   size_t const compBytes = writeJob->bufferSize - strm.avail_out;
             if (compBytes) {
-                if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes)
-                    EXM_THROW(85, "Write error : %s", strerror(errno));
+                writeJob->usedBufferSize = compBytes;
+                AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
                 outFileSize += compBytes;
-                strm.next_out = (BYTE*)ress->dstBuffer;
-                strm.avail_out = ress->dstBufferSize;
+                strm.next_out = (Bytef*)writeJob->buffer;
+                strm.avail_out = writeJob->bufferSize;
         }   }
         if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
             DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
@@ -1139,6 +1149,9 @@ FIO_compressLzmaFrame(cRess_t* ress,
     lzma_end(&strm);
     *readsize = inFileSize;
 
+    AIO_WritePool_releaseIoJob(writeJob);
+    AIO_WritePool_sparseWriteEnd(ress->writeCtx);
+
     return outFileSize;
 }
 #endif
@@ -1164,15 +1177,18 @@ FIO_compressLz4Frame(cRess_t* ress,
     LZ4F_preferences_t prefs;
     LZ4F_compressionContext_t ctx;
 
+    IOJob_t* writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
+
     LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
     if (LZ4F_isError(errorCode))
         EXM_THROW(31, "zstd: failed to create lz4 compression context");
 
     memset(&prefs, 0, sizeof(prefs));
 
-    assert(blockSize <= ress->srcBufferSize);
+    assert(blockSize <= ress->readCtx->base.jobBufferSize);
 
-    prefs.autoFlush = 1;
+    /* autoflush off to mitigate a bug in lz4<=1.9.3 for compression level 12 */
+    prefs.autoFlush = 0;
     prefs.compressionLevel = compressionLevel;
     prefs.frameInfo.blockMode = LZ4F_blockLinked;
     prefs.frameInfo.blockSizeID = LZ4F_max64KB;
@@ -1180,27 +1196,25 @@ FIO_compressLz4Frame(cRess_t* ress,
 #if LZ4_VERSION_NUMBER >= 10600
     prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize;
 #endif
-    assert(LZ4F_compressBound(blockSize, &prefs) <= ress->dstBufferSize);
+    assert(LZ4F_compressBound(blockSize, &prefs) <= writeJob->bufferSize);
 
     {
-        size_t readSize;
-        size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs);
+        size_t headerSize = LZ4F_compressBegin(ctx, writeJob->buffer, writeJob->bufferSize, &prefs);
         if (LZ4F_isError(headerSize))
             EXM_THROW(33, "File header generation failed : %s",
                             LZ4F_getErrorName(headerSize));
-        if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize)
-            EXM_THROW(34, "Write error : %s (cannot write header)", strerror(errno));
+        writeJob->usedBufferSize = headerSize;
+        AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
         outFileSize += headerSize;
 
         /* Read first block */
-        readSize  = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
-        inFileSize += readSize;
+        inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
 
         /* Main Loop */
-        while (readSize>0) {
-            size_t const outSize = LZ4F_compressUpdate(ctx,
-                                        ress->dstBuffer, ress->dstBufferSize,
-                                        ress->srcBuffer, readSize, NULL);
+        while (ress->readCtx->srcBufferLoaded) {
+            size_t inSize = MIN(blockSize, ress->readCtx->srcBufferLoaded);
+            size_t const outSize = LZ4F_compressUpdate(ctx, writeJob->buffer, writeJob->bufferSize,
+                                                       ress->readCtx->srcBuffer, inSize, NULL);
             if (LZ4F_isError(outSize))
                 EXM_THROW(35, "zstd: %s: lz4 compression failed : %s",
                             srcFileName, LZ4F_getErrorName(outSize));
@@ -1216,33 +1230,29 @@ FIO_compressLz4Frame(cRess_t* ress,
             }
 
             /* Write Block */
-            {   size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile);
-                if (sizeCheck != outSize)
-                    EXM_THROW(36, "Write error : %s", strerror(errno));
-            }
+            writeJob->usedBufferSize = outSize;
+            AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
 
             /* Read next block */
-            readSize  = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
-            inFileSize += readSize;
+            AIO_ReadPool_consumeBytes(ress->readCtx, inSize);
+            inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
         }
-        if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName);
 
         /* End of Stream mark */
-        headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL);
+        headerSize = LZ4F_compressEnd(ctx, writeJob->buffer, writeJob->bufferSize, NULL);
         if (LZ4F_isError(headerSize))
             EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s",
                         srcFileName, LZ4F_getErrorName(headerSize));
 
-        {   size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile);
-            if (sizeCheck != headerSize)
-                EXM_THROW(39, "Write error : %s (cannot write end of stream)",
-                            strerror(errno));
-        }
+        writeJob->usedBufferSize = headerSize;
+        AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
         outFileSize += headerSize;
     }
 
     *readsize = inFileSize;
     LZ4F_freeCompressionContext(ctx);
+    AIO_WritePool_releaseIoJob(writeJob);
+    AIO_WritePool_sparseWriteEnd(ress->writeCtx);
 
     return outFileSize;
 }
@@ -1257,8 +1267,8 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx,
                       int compressionLevel, U64* readsize)
 {
     cRess_t const ress = *ressPtr;
-    FILE* const srcFile = ress.srcFile;
-    FILE* const dstFile = ress.dstFile;
+    IOJob_t *writeJob = AIO_WritePool_acquireJob(ressPtr->writeCtx);
+
     U64 compressedfilesize = 0;
     ZSTD_EndDirective directive = ZSTD_e_continue;
     U64 pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
@@ -1303,12 +1313,12 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx,
     do {
         size_t stillToFlush;
         /* Fill input Buffer */
-        size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
-        ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 };
+        size_t const inSize = AIO_ReadPool_fillBuffer(ress.readCtx, ZSTD_CStreamInSize());
+        ZSTD_inBuffer inBuff = { ress.readCtx->srcBuffer, ress.readCtx->srcBufferLoaded, 0 };
         DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize);
         *readsize += inSize;
 
-        if ((inSize == 0) || (*readsize == fileSize))
+        if ((ress.readCtx->srcBufferLoaded == 0) || (*readsize == fileSize))
             directive = ZSTD_e_end;
 
         stillToFlush = 1;
@@ -1316,9 +1326,10 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx,
             || (directive == ZSTD_e_end && stillToFlush != 0) ) {
 
             size_t const oldIPos = inBuff.pos;
-            ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 };
+            ZSTD_outBuffer outBuff= { writeJob->buffer, writeJob->bufferSize, 0 };
             size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx);
             CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive));
+            AIO_ReadPool_consumeBytes(ress.readCtx, inBuff.pos - oldIPos);
 
             /* count stats */
             inputPresented++;
@@ -1327,12 +1338,10 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx,
 
             /* Write compressed stream */
             DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n",
-                            (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos);
+                         (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos);
             if (outBuff.pos) {
-                size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile);
-                if (sizeCheck != outBuff.pos)
-                    EXM_THROW(25, "Write error : %s (cannot write compressed block)",
-                                    strerror(errno));
+                writeJob->usedBufferSize = outBuff.pos;
+                AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
                 compressedfilesize += outBuff.pos;
             }
 
@@ -1464,14 +1473,14 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx,
         }  /* while ((inBuff.pos != inBuff.size) */
     } while (directive != ZSTD_e_end);
 
-    if (ferror(srcFile)) {
-        EXM_THROW(26, "Read error : I/O error");
-    }
     if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) {
         EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B",
                 (unsigned long long)*readsize, (unsigned long long)fileSize);
     }
 
+    AIO_WritePool_releaseIoJob(writeJob);
+    AIO_WritePool_sparseWriteEnd(ressPtr->writeCtx);
+
     return compressedfilesize;
 }
 
@@ -1572,7 +1581,7 @@ FIO_compressFilename_internal(FIO_ctx_t* const fCtx,
 
 
 /*! FIO_compressFilename_dstFile() :
- *  open dstFileName, or pass-through if ress.dstFile != NULL,
+ *  open dstFileName, or pass-through if ress.file != NULL,
  *  then start compression with FIO_compressFilename_internal().
  *  Manages source removal (--rm) and file permissions transfer.
  *  note : ress.srcFile must be != NULL,
@@ -1591,8 +1600,9 @@ static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
     int result;
     stat_t statbuf;
     int transferMTime = 0;
-    assert(ress.srcFile != NULL);
-    if (ress.dstFile == NULL) {
+    FILE *dstFile;
+    assert(AIO_ReadPool_getFile(ress.readCtx) != NULL);
+    if (AIO_WritePool_getFile(ress.writeCtx) == NULL) {
         int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
         if ( strcmp (srcFileName, stdinmark)
           && strcmp (dstFileName, stdoutmark)
@@ -1604,8 +1614,9 @@ static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
 
         closeDstFile = 1;
         DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName);
-        ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
-        if (ress.dstFile==NULL) return 1;  /* could not open dstFileName */
+        dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
+        if (dstFile==NULL) return 1;  /* could not open dstFileName */
+        AIO_WritePool_setFile(ress.writeCtx, dstFile);
         /* Must only be added after FIO_openDstFile() succeeds.
          * Otherwise we may delete the destination file if it already exists,
          * and the user presses Ctrl-C when asked if they wish to overwrite.
@@ -1616,13 +1627,10 @@ static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
     result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
 
     if (closeDstFile) {
-        FILE* const dstFile = ress.dstFile;
-        ress.dstFile = NULL;
-
         clearHandler();
 
         DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName);
-        if (fclose(dstFile)) { /* error closing dstFile */
+        if (AIO_WritePool_closeFile(ress.writeCtx)) { /* error closing file */
             DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
             result=1;
         }
@@ -1668,6 +1676,7 @@ FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx,
                              int compressionLevel)
 {
     int result;
+    FILE* srcFile;
     DISPLAYLEVEL(6, "FIO_compressFilename_srcFile: %s \n", srcFileName);
 
     /* ensure src is not a directory */
@@ -1691,13 +1700,13 @@ FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx,
         return 0;
     }
 
-    ress.srcFile = FIO_openSrcFile(prefs, srcFileName);
-    if (ress.srcFile == NULL) return 1;   /* srcFile could not be opened */
+    srcFile = FIO_openSrcFile(prefs, srcFileName);
+    if (srcFile == NULL) return 1;   /* srcFile could not be opened */
 
+    AIO_ReadPool_setFile(ress.readCtx, srcFile);
     result = FIO_compressFilename_dstFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
+    AIO_ReadPool_closeFile(ress.readCtx);
 
-    fclose(ress.srcFile);
-    ress.srcFile = NULL;
     if ( prefs->removeSrcFile   /* --rm */
       && result == 0       /* success */
       && strcmp(srcFileName, stdinmark)   /* exception : don't erase stdin */
@@ -1844,23 +1853,24 @@ int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx,
     /* init */
     assert(outFileName != NULL || suffix != NULL);
     if (outFileName != NULL) {   /* output into a single destination (stdout typically) */
+        FILE *dstFile;
         if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
             FIO_freeCResources(&ress);
             return 1;
         }
-        ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
-        if (ress.dstFile == NULL) {  /* could not open outFileName */
+        dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
+        if (dstFile == NULL) {  /* could not open outFileName */
             error = 1;
         } else {
+            AIO_WritePool_setFile(ress.writeCtx, dstFile);
             for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
                 status = FIO_compressFilename_srcFile(fCtx, prefs, ress, outFileName, inFileNamesTable[fCtx->currFileIdx], compressionLevel);
                 if (!status) fCtx->nbFilesProcessed++;
                 error |= status;
             }
-            if (fclose(ress.dstFile))
+            if (AIO_WritePool_closeFile(ress.writeCtx))
                 EXM_THROW(29, "Write error (%s) : cannot properly close %s",
                             strerror(errno), outFileName);
-            ress.dstFile = NULL;
         }
     } else {
         if (outMirroredRootDirName)
@@ -1916,13 +1926,10 @@ int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx,
 /* **************************************************************************
  *  Decompression
  ***************************************************************************/
-
 typedef struct {
-    void*  srcBuffer;
-    size_t srcBufferSize;
-    size_t srcBufferLoaded;
     ZSTD_DStream* dctx;
     WritePoolCtx_t *writeCtx;
+    ReadPoolCtx_t *readCtx;
 } dRess_t;
 
 static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName)
@@ -1940,11 +1947,6 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi
     CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) );
     CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag));
 
-    ress.srcBufferSize = ZSTD_DStreamInSize();
-    ress.srcBuffer = malloc(ress.srcBufferSize);
-    if (!ress.srcBuffer)
-        EXM_THROW(61, "Allocation error : not enough memory");
-
     /* dictionary */
     {   void* dictBuffer;
         size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs);
@@ -1953,6 +1955,7 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi
     }
 
     ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_DStreamOutSize());
+    ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_DStreamInSize());
 
     return ress;
 }
@@ -1960,47 +1963,31 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi
 static void FIO_freeDResources(dRess_t ress)
 {
     CHECK( ZSTD_freeDStream(ress.dctx) );
-    free(ress.srcBuffer);
     AIO_WritePool_free(ress.writeCtx);
-}
-
-/* FIO_consumeDSrcBuffer:
- * Consumes len bytes from srcBuffer's start and moves the remaining data and srcBufferLoaded accordingly. */
-static void FIO_consumeDSrcBuffer(dRess_t *ress, size_t len) {
-    assert(ress->srcBufferLoaded >= len);
-    ress->srcBufferLoaded -= len;
-    memmove(ress->srcBuffer, (char *)ress->srcBuffer + len, ress->srcBufferLoaded);
+    AIO_ReadPool_free(ress.readCtx);
 }
 
 /** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode
     @return : 0 (no error) */
-static int FIO_passThrough(const FIO_prefs_t* const prefs,
-                           FILE* foutput, FILE* finput,
-                           void* buffer, size_t bufferSize,
-                           size_t alreadyLoaded)
+static int FIO_passThrough(dRess_t *ress)
 {
-    size_t const blockSize = MIN(64 KB, bufferSize);
-    size_t readFromInput;
-    unsigned storedSkips = 0;
+    size_t const blockSize = MIN(MIN(64 KB, ZSTD_DStreamInSize()), ZSTD_DStreamOutSize());
+    IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
+    AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
 
-    /* assumption : ress->srcBufferLoaded bytes already loaded and stored within buffer */
-    {   size_t const sizeCheck = fwrite(buffer, 1, alreadyLoaded, foutput);
-        if (sizeCheck != alreadyLoaded) {
-            DISPLAYLEVEL(1, "Pass-through write error : %s\n", strerror(errno));
-            return 1;
-    }   }
-
-    do {
-        readFromInput = fread(buffer, 1, blockSize, finput);
-        storedSkips = AIO_fwriteSparse(foutput, buffer, readFromInput, prefs, storedSkips);
-    } while (readFromInput == blockSize);
-    if (ferror(finput)) {
-        DISPLAYLEVEL(1, "Pass-through read error : %s\n", strerror(errno));
-        return 1;
+    while(ress->readCtx->srcBufferLoaded) {
+        size_t writeSize;
+        writeSize = MIN(blockSize, ress->readCtx->srcBufferLoaded);
+        assert(writeSize <= writeJob->bufferSize);
+        memcpy(writeJob->buffer, ress->readCtx->srcBuffer, writeSize);
+        writeJob->usedBufferSize = writeSize;
+        AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
+        AIO_ReadPool_consumeBytes(ress->readCtx, writeSize);
+        AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
     }
-    assert(feof(finput));
-
-    AIO_fwriteSparseEnd(prefs, foutput, storedSkips);
+    assert(ress->readCtx->reachedEof);
+    AIO_WritePool_releaseIoJob(writeJob);
+    AIO_WritePool_sparseWriteEnd(ress->writeCtx);
     return 0;
 }
 
@@ -2018,7 +2005,7 @@ FIO_zstdErrorHelp(const FIO_prefs_t* const prefs,
         return;
 
     /* Try to decode the frame header */
-    err = ZSTD_getFrameHeader(&header, ress->srcBuffer, ress->srcBufferLoaded);
+    err = ZSTD_getFrameHeader(&header, ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded);
     if (err == 0) {
         unsigned long long const windowSize = header.windowSize;
         unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0);
@@ -2041,7 +2028,7 @@ FIO_zstdErrorHelp(const FIO_prefs_t* const prefs,
  */
 #define FIO_ERROR_FRAME_DECODING   ((unsigned long long)(-2))
 static unsigned long long
-FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput,
+FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress,
                         const FIO_prefs_t* const prefs,
                         const char* srcFileName,
                         U64 alreadyDecoded)  /* for multi-frames streams */
@@ -2057,16 +2044,11 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput,
     ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only);
 
     /* Header loading : ensures ZSTD_getFrameHeader() will succeed */
-    {   size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX;
-        if (ress->srcBufferLoaded < toDecode) {
-            size_t const toRead = toDecode - ress->srcBufferLoaded;
-            void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded;
-            ress->srcBufferLoaded += fread(startPosition, 1, toRead, finput);
-    }   }
+    AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_FRAMEHEADERSIZE_MAX);
 
     /* Main decompression Loop */
     while (1) {
-        ZSTD_inBuffer  inBuff = { ress->srcBuffer, ress->srcBufferLoaded, 0 };
+        ZSTD_inBuffer  inBuff = { ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded, 0 };
         ZSTD_outBuffer outBuff= { writeJob->buffer, writeJob->bufferSize, 0 };
         size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff);
         const int displayLevel = (g_display_prefs.progressSetting == FIO_ps_always) ? 1 : 2;
@@ -2088,7 +2070,7 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput,
             if (srcFileNameSize > 18) {
                 const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
                 DISPLAYUPDATE(displayLevel, "\rDecompress: %2u/%2u files. Current: ...%s : %.*f%s...    ",
-                                fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix);
+                              fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix);
             } else {
                 DISPLAYUPDATE(displayLevel, "\rDecompress: %2u/%2u files. Current: %s : %.*f%s...    ",
                             fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, hrs.precision, hrs.value, hrs.suffix);
@@ -2098,23 +2080,21 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput,
                             srcFileName, hrs.precision, hrs.value, hrs.suffix);
         }
 
-        FIO_consumeDSrcBuffer(ress, inBuff.pos);
+        AIO_ReadPool_consumeBytes(ress->readCtx, inBuff.pos);
 
         if (readSizeHint == 0) break;   /* end of frame */
 
         /* Fill input buffer */
-        {   size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize);  /* support large skippable frames */
-            if (ress->srcBufferLoaded < toDecode) {
-                size_t const toRead = toDecode - ress->srcBufferLoaded;   /* > 0 */
-                void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded;
-                size_t const readSize = fread(startPosition, 1, toRead, finput);
+        {   size_t const toDecode = MIN(readSizeHint, ZSTD_DStreamInSize());  /* support large skippable frames */
+            if (ress->readCtx->srcBufferLoaded < toDecode) {
+                size_t const readSize = AIO_ReadPool_fillBuffer(ress->readCtx, toDecode);
                 if (readSize==0) {
                     DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n",
-                                    srcFileName);
+                                 srcFileName);
+                    AIO_WritePool_releaseIoJob(writeJob);
                     return FIO_ERROR_FRAME_DECODING;
                 }
-                ress->srcBufferLoaded += readSize;
-    }   }   }
+            }   }   }
 
     AIO_WritePool_releaseIoJob(writeJob);
     AIO_WritePool_sparseWriteEnd(ress->writeCtx);
@@ -2125,7 +2105,7 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput,
 
 #ifdef ZSTD_GZDECOMPRESS
 static unsigned long long
-FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName)
+FIO_decompressGzFrame(dRess_t* ress, const char* srcFileName)
 {
     unsigned long long outFileSize = 0;
     z_stream strm;
@@ -2145,16 +2125,16 @@ FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName)
     writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
     strm.next_out = (Bytef*)writeJob->buffer;
     strm.avail_out = (uInt)writeJob->bufferSize;
-    strm.avail_in = (uInt)ress->srcBufferLoaded;
-    strm.next_in = (z_const unsigned char*)ress->srcBuffer;
+    strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded;
+    strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer;
 
     for ( ; ; ) {
         int ret;
         if (strm.avail_in == 0) {
-            ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
-            if (ress->srcBufferLoaded == 0) flush = Z_FINISH;
-            strm.next_in = (z_const unsigned char*)ress->srcBuffer;
-            strm.avail_in = (uInt)ress->srcBufferLoaded;
+            AIO_ReadPool_consumeAndRefill(ress->readCtx);
+            if (ress->readCtx->srcBufferLoaded == 0) flush = Z_FINISH;
+            strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer;
+            strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded;
         }
         ret = inflate(&strm, flush);
         if (ret == Z_BUF_ERROR) {
@@ -2177,7 +2157,7 @@ FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName)
         if (ret == Z_STREAM_END) break;
     }
 
-    FIO_consumeDSrcBuffer(ress, ress->srcBufferLoaded - strm.avail_in);
+    AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in);
 
     if ( (inflateEnd(&strm) != Z_OK)  /* release resources ; error detected */
       && (decodingError==0) ) {
@@ -2192,7 +2172,7 @@ FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName)
 
 #ifdef ZSTD_LZMADECOMPRESS
 static unsigned long long
-FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile,
+FIO_decompressLzmaFrame(dRess_t* ress,
                         const char* srcFileName, int plain_lzma)
 {
     unsigned long long outFileSize = 0;
@@ -2220,16 +2200,16 @@ FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile,
     writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
     strm.next_out = (Bytef*)writeJob->buffer;
     strm.avail_out = (uInt)writeJob->bufferSize;
-    strm.next_in = (BYTE const*)ress->srcBuffer;
-    strm.avail_in = ress->srcBufferLoaded;
+    strm.next_in = (BYTE const*)ress->readCtx->srcBuffer;
+    strm.avail_in = ress->readCtx->srcBufferLoaded;
 
     for ( ; ; ) {
         lzma_ret ret;
         if (strm.avail_in == 0) {
-            ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
-            if (ress->srcBufferLoaded == 0) action = LZMA_FINISH;
-            strm.next_in = (BYTE const*)ress->srcBuffer;
-            strm.avail_in = ress->srcBufferLoaded;
+            AIO_ReadPool_consumeAndRefill(ress->readCtx);
+            if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH;
+            strm.next_in = (BYTE const*)ress->readCtx->srcBuffer;
+            strm.avail_in = ress->readCtx->srcBufferLoaded;
         }
         ret = lzma_code(&strm, action);
 
@@ -2253,7 +2233,7 @@ FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile,
         if (ret == LZMA_STREAM_END) break;
     }
 
-    FIO_consumeDSrcBuffer(ress, ress->srcBufferLoaded - strm.avail_in);
+    AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in);
     lzma_end(&strm);
     AIO_WritePool_releaseIoJob(writeJob);
     AIO_WritePool_sparseWriteEnd(ress->writeCtx);
@@ -2263,8 +2243,7 @@ FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile,
 
 #ifdef ZSTD_LZ4DECOMPRESS
 static unsigned long long
-FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile,
-                       const char* srcFileName)
+FIO_decompressLz4Frame(dRess_t* ress, const char* srcFileName)
 {
     unsigned long long filesize = 0;
     LZ4F_errorCode_t nextToLoad = 4;
@@ -2282,34 +2261,27 @@ FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile,
 
     /* Main Loop */
     for (;nextToLoad;) {
-        size_t readSize;
         size_t pos = 0;
         size_t decodedBytes = writeJob->bufferSize;
         int fullBufferDecoded = 0;
 
         /* Read input */
-        nextToLoad = MIN(nextToLoad, ress->srcBufferSize-ress->srcBufferLoaded);
-        readSize = fread((char *)ress->srcBuffer + ress->srcBufferLoaded, 1, nextToLoad, srcFile);
-        if(!readSize && ferror(srcFile)) {
-            DISPLAYLEVEL(1, "zstd: %s: read error \n", srcFileName);
-            decodingError=1;
-            break;
-        }
-        if(!readSize && !ress->srcBufferLoaded) break; /* reached end of file */
-        ress->srcBufferLoaded += readSize;
+        AIO_ReadPool_fillBuffer(ress->readCtx, nextToLoad);
+        if(!ress->readCtx->srcBufferLoaded) break; /* reached end of file */
 
-        while ((pos < ress->srcBufferLoaded) || fullBufferDecoded) {  /* still to read, or still to flush */
+        while ((pos < ress->readCtx->srcBufferLoaded) || fullBufferDecoded) {  /* still to read, or still to flush */
             /* Decode Input (at least partially) */
-            size_t remaining = ress->srcBufferLoaded - pos;
+            size_t remaining = ress->readCtx->srcBufferLoaded - pos;
             decodedBytes = writeJob->bufferSize;
-            nextToLoad = LZ4F_decompress(dCtx, writeJob->buffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL);
+            nextToLoad = LZ4F_decompress(dCtx, writeJob->buffer, &decodedBytes, (char*)(ress->readCtx->srcBuffer)+pos,
+                                         &remaining, NULL);
             if (LZ4F_isError(nextToLoad)) {
                 DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n",
                                 srcFileName, LZ4F_getErrorName(nextToLoad));
                 decodingError = 1; nextToLoad = 0; break;
             }
             pos += remaining;
-            assert(pos <= ress->srcBufferLoaded);
+            assert(pos <= ress->readCtx->srcBufferLoaded);
             fullBufferDecoded = decodedBytes == writeJob->bufferSize;
 
             /* Write Block */
@@ -2324,7 +2296,7 @@ FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile,
 
             if (!nextToLoad) break;
         }
-        FIO_consumeDSrcBuffer(ress, pos);
+        AIO_ReadPool_consumeBytes(ress->readCtx, pos);
     }
     if (nextToLoad!=0) {
         DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName);
@@ -2348,23 +2320,20 @@ FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile,
  *           1 : error
  */
 static int FIO_decompressFrames(FIO_ctx_t* const fCtx,
-                          dRess_t ress, FILE* srcFile,
-                          const FIO_prefs_t* const prefs,
-                          const char* dstFileName, const char* srcFileName)
+                                dRess_t ress, const FIO_prefs_t* const prefs,
+                                const char* dstFileName, const char* srcFileName)
 {
     unsigned readSomething = 0;
     unsigned long long filesize = 0;
-    assert(srcFile != NULL);
 
     /* for each frame */
     for ( ; ; ) {
         /* check magic number -> version */
         size_t const toRead = 4;
-        const BYTE* const buf = (const BYTE*)ress.srcBuffer;
-        if (ress.srcBufferLoaded < toRead)  /* load up to 4 bytes for header */
-            ress.srcBufferLoaded += fread((char*)ress.srcBuffer + ress.srcBufferLoaded,
-                                          (size_t)1, toRead - ress.srcBufferLoaded, srcFile);
-        if (ress.srcBufferLoaded==0) {
+        const BYTE* buf;
+        AIO_ReadPool_fillBuffer(ress.readCtx, toRead);
+        buf = (const BYTE*)ress.readCtx->srcBuffer;
+        if (ress.readCtx->srcBufferLoaded==0) {
             if (readSomething==0) {  /* srcFile is empty (which is invalid) */
                 DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName);
                 return 1;
@@ -2372,17 +2341,17 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx,
             break;   /* no more input */
         }
         readSomething = 1;   /* there is at least 1 byte in srcFile */
-        if (ress.srcBufferLoaded < toRead) {
+        if (ress.readCtx->srcBufferLoaded < toRead) {
             DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName);
             return 1;
         }
-        if (ZSTD_isFrame(buf, ress.srcBufferLoaded)) {
-            unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, srcFile, prefs, srcFileName, filesize);
+        if (ZSTD_isFrame(buf, ress.readCtx->srcBufferLoaded)) {
+            unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, prefs, srcFileName, filesize);
             if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
             filesize += frameSize;
         } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */
 #ifdef ZSTD_GZDECOMPRESS
-            unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFile, srcFileName);
+            unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFileName);
             if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
             filesize += frameSize;
 #else
@@ -2392,7 +2361,7 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx,
         } else if ((buf[0] == 0xFD && buf[1] == 0x37)  /* xz magic number */
                 || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */
 #ifdef ZSTD_LZMADECOMPRESS
-            unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFile, srcFileName, buf[0] != 0xFD);
+            unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFileName, buf[0] != 0xFD);
             if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
             filesize += frameSize;
 #else
@@ -2401,7 +2370,7 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx,
 #endif
         } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) {
 #ifdef ZSTD_LZ4DECOMPRESS
-            unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFile, srcFileName);
+            unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFileName);
             if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
             filesize += frameSize;
 #else
@@ -2409,10 +2378,7 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx,
             return 1;
 #endif
         } else if ((prefs->overwrite) && !strcmp (dstFileName, stdoutmark)) {  /* pass-through mode */
-            return FIO_passThrough(prefs,
-                                   AIO_WritePool_getFile(ress.writeCtx), srcFile,
-                                   ress.srcBuffer, ress.srcBufferSize,
-                                   ress.srcBufferLoaded);
+            return FIO_passThrough(&ress);
         } else {
             DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName);
             return 1;
@@ -2432,15 +2398,14 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx,
 }
 
 /** FIO_decompressDstFile() :
-    open `dstFileName`,
-    or path-through if ress.dstFile is already != 0,
+    open `dstFileName`, or pass-through if writeCtx's file is already != 0,
     then start decompression process (FIO_decompressFrames()).
     @return : 0 : OK
               1 : operation aborted
 */
 static int FIO_decompressDstFile(FIO_ctx_t* const fCtx,
                                  FIO_prefs_t* const prefs,
-                                 dRess_t ress, FILE* srcFile,
+                                 dRess_t ress,
                                  const char* dstFileName, const char* srcFileName)
 {
     int result;
@@ -2472,7 +2437,7 @@ static int FIO_decompressDstFile(FIO_ctx_t* const fCtx,
         addHandler(dstFileName);
     }
 
-    result = FIO_decompressFrames(fCtx, ress, srcFile, prefs, dstFileName, srcFileName);
+    result = FIO_decompressFrames(fCtx, ress, prefs, dstFileName, srcFileName);
 
     if (releaseDstFile) {
         clearHandler();
@@ -2513,9 +2478,11 @@ static int FIO_decompressSrcFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs
 
     srcFile = FIO_openSrcFile(prefs, srcFileName);
     if (srcFile==NULL) return 1;
-    ress.srcBufferLoaded = 0;
+    AIO_ReadPool_setFile(ress.readCtx, srcFile);
 
-    result = FIO_decompressDstFile(fCtx, prefs, ress, srcFile, dstFileName, srcFileName);
+    result = FIO_decompressDstFile(fCtx, prefs, ress, dstFileName, srcFileName);
+
+    AIO_ReadPool_setFile(ress.readCtx, NULL);
 
     /* Close file */
     if (fclose(srcFile)) {
diff --git a/programs/fileio_asyncio.c b/programs/fileio_asyncio.c
index 868720a1..332292bb 100644
--- a/programs/fileio_asyncio.c
+++ b/programs/fileio_asyncio.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -29,7 +29,8 @@
 /** AIO_fwriteSparse() :
 *  @return : storedSkips,
 *            argument for next call to AIO_fwriteSparse() or AIO_fwriteSparseEnd() */
-unsigned AIO_fwriteSparse(FILE* file,
+static unsigned
+AIO_fwriteSparse(FILE* file,
                  const void* buffer, size_t bufferSize,
                  const FIO_prefs_t* const prefs,
                  unsigned storedSkips)
@@ -45,7 +46,7 @@ unsigned AIO_fwriteSparse(FILE* file,
     if (!prefs->sparseFileSupport) {  /* normal write */
         size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file);
         if (sizeCheck != bufferSize)
-            EXM_THROW(70, "Write error : cannot write decoded block : %s",
+            EXM_THROW(70, "Write error : cannot write block : %s",
                       strerror(errno));
         return 0;
     }
@@ -77,7 +78,7 @@ unsigned AIO_fwriteSparse(FILE* file,
             storedSkips = 0;
             /* write the rest */
             if (fwrite(ptrT + nb0T, sizeof(size_t), nbNon0ST, file) != nbNon0ST)
-                EXM_THROW(93, "Write error : cannot write decoded block : %s",
+                EXM_THROW(93, "Write error : cannot write block : %s",
                           strerror(errno));
         }
         ptrT += seg0SizeT;
@@ -106,7 +107,8 @@ unsigned AIO_fwriteSparse(FILE* file,
     return storedSkips;
 }
 
-void AIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips)
+static void
+AIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips)
 {
     if (prefs->testMode) assert(storedSkips == 0);
     if (storedSkips>0) {
@@ -127,17 +129,25 @@ void AIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned st
  *  AsyncIO functionality
  ************************************************************************/
 
+/* AIO_supported:
+ * Returns 1 if AsyncIO is supported on the system, 0 otherwise. */
+int AIO_supported(void) {
+#ifdef ZSTD_MULTITHREAD
+    return 1;
+#else
+    return 0;
+#endif
+}
+
 /* ***********************************
  *  General IoPool implementation
  *************************************/
 
 static IOJob_t *AIO_IOPool_createIoJob(IOPoolCtx_t *ctx, size_t bufferSize) {
-    void *buffer;
-    IOJob_t *job;
-    job = (IOJob_t*) malloc(sizeof(IOJob_t));
-    buffer = malloc(bufferSize);
+    IOJob_t* const job  = (IOJob_t*) malloc(sizeof(IOJob_t));
+    void* const buffer = malloc(bufferSize);
     if(!job || !buffer)
-    EXM_THROW(101, "Allocation error : not enough memory");
+        EXM_THROW(101, "Allocation error : not enough memory");
     job->buffer = buffer;
     job->bufferSize = bufferSize;
     job->usedBufferSize = 0;
@@ -151,49 +161,47 @@ static IOJob_t *AIO_IOPool_createIoJob(IOPoolCtx_t *ctx, size_t bufferSize) {
 /* AIO_IOPool_createThreadPool:
  * Creates a thread pool and a mutex for threaded IO pool.
  * Displays warning if asyncio is requested but MT isn't available. */
-static void AIO_IOPool_createThreadPool(IOPoolCtx_t *ctx, const FIO_prefs_t *prefs) {
+static void AIO_IOPool_createThreadPool(IOPoolCtx_t* ctx, const FIO_prefs_t* prefs) {
     ctx->threadPool = NULL;
     if(prefs->asyncIO) {
         if (ZSTD_pthread_mutex_init(&ctx->ioJobsMutex, NULL))
-        EXM_THROW(102,"Failed creating write availableJobs mutex");
+            EXM_THROW(102,"Failed creating write availableJobs mutex");
         /* We want MAX_IO_JOBS-2 queue items because we need to always have 1 free buffer to
          * decompress into and 1 buffer that's actively written to disk and owned by the writing thread. */
         assert(MAX_IO_JOBS >= 2);
         ctx->threadPool = POOL_create(1, MAX_IO_JOBS - 2);
         if (!ctx->threadPool)
-        EXM_THROW(104, "Failed creating writer thread pool");
+            EXM_THROW(104, "Failed creating writer thread pool");
     }
 }
 
 /* AIO_IOPool_init:
  * Allocates and sets and a new write pool including its included availableJobs. */
-static void AIO_IOPool_init(IOPoolCtx_t *ctx, FIO_prefs_t* const prefs, POOL_function poolFunction, size_t bufferSize) {
+static void AIO_IOPool_init(IOPoolCtx_t* ctx, const FIO_prefs_t* prefs, POOL_function poolFunction, size_t bufferSize) {
     int i;
     AIO_IOPool_createThreadPool(ctx, prefs);
     ctx->prefs = prefs;
     ctx->poolFunction = poolFunction;
-    ctx->totalIoJobs = ctx->threadPool ? MAX_IO_JOBS : 1;
+    ctx->totalIoJobs = ctx->threadPool ? MAX_IO_JOBS : 2;
     ctx->availableJobsCount = ctx->totalIoJobs;
     for(i=0; i < ctx->availableJobsCount; i++) {
         ctx->availableJobs[i] = AIO_IOPool_createIoJob(ctx, bufferSize);
     }
+    ctx->jobBufferSize = bufferSize;
     ctx->file = NULL;
 }
 
 
 /* AIO_IOPool_releaseIoJob:
  * Releases an acquired job back to the pool. Doesn't execute the job. */
-static void AIO_IOPool_releaseIoJob(IOJob_t *job) {
-    IOPoolCtx_t *ctx = (IOPoolCtx_t *) job->ctx;
-    if(ctx->threadPool) {
+static void AIO_IOPool_releaseIoJob(IOJob_t* job) {
+    IOPoolCtx_t* const ctx = (IOPoolCtx_t *) job->ctx;
+    if(ctx->threadPool)
         ZSTD_pthread_mutex_lock(&ctx->ioJobsMutex);
-        assert(ctx->availableJobsCount < MAX_IO_JOBS);
-        ctx->availableJobs[ctx->availableJobsCount++] = job;
+    assert(ctx->availableJobsCount < ctx->totalIoJobs);
+    ctx->availableJobs[ctx->availableJobsCount++] = job;
+    if(ctx->threadPool)
         ZSTD_pthread_mutex_unlock(&ctx->ioJobsMutex);
-    } else {
-        assert(ctx->availableJobsCount == 0);
-        ctx->availableJobsCount++;
-    }
 }
 
 /* AIO_IOPool_join:
@@ -225,19 +233,15 @@ static void AIO_IOPool_destroy(IOPoolCtx_t* ctx) {
 
 /* AIO_IOPool_acquireJob:
  * Returns an available io job to be used for a future io. */
-static IOJob_t* AIO_IOPool_acquireJob(IOPoolCtx_t *ctx) {
+static IOJob_t* AIO_IOPool_acquireJob(IOPoolCtx_t* ctx) {
     IOJob_t *job;
     assert(ctx->file != NULL || ctx->prefs->testMode);
-    if(ctx->threadPool) {
+    if(ctx->threadPool)
         ZSTD_pthread_mutex_lock(&ctx->ioJobsMutex);
-        assert(ctx->availableJobsCount > 0);
-        job = (IOJob_t*) ctx->availableJobs[--ctx->availableJobsCount];
+    assert(ctx->availableJobsCount > 0);
+    job = (IOJob_t*) ctx->availableJobs[--ctx->availableJobsCount];
+    if(ctx->threadPool)
         ZSTD_pthread_mutex_unlock(&ctx->ioJobsMutex);
-    } else {
-        assert(ctx->availableJobsCount == 1);
-        ctx->availableJobsCount--;
-        job = (IOJob_t*)ctx->availableJobs[0];
-    }
     job->usedBufferSize = 0;
     job->file = ctx->file;
     job->offset = 0;
@@ -249,22 +253,22 @@ static IOJob_t* AIO_IOPool_acquireJob(IOPoolCtx_t *ctx) {
  * Sets the destination file for future files in the pool.
  * Requires completion of all queues write jobs and release of all otherwise acquired jobs.
  * Also requires ending of sparse write if a previous file was used in sparse mode. */
-static void AIO_IOPool_setFile(IOPoolCtx_t *ctx, FILE* file) {
+static void AIO_IOPool_setFile(IOPoolCtx_t* ctx, FILE* file) {
     assert(ctx!=NULL);
     AIO_IOPool_join(ctx);
     assert(ctx->availableJobsCount == ctx->totalIoJobs);
     ctx->file = file;
 }
 
-static FILE* AIO_IOPool_getFile(IOPoolCtx_t *ctx) {
+static FILE* AIO_IOPool_getFile(const IOPoolCtx_t* ctx) {
     return ctx->file;
 }
 
 /* AIO_IOPool_enqueueJob:
  * Enqueues an io job for execution.
  * The queued job shouldn't be used directly after queueing it. */
-static void AIO_IOPool_enqueueJob(IOJob_t *job) {
-    IOPoolCtx_t* ctx = (IOPoolCtx_t *)job->ctx;
+static void AIO_IOPool_enqueueJob(IOJob_t* job) {
+    IOPoolCtx_t* const ctx = (IOPoolCtx_t *)job->ctx;
     if(ctx->threadPool)
         POOL_add(ctx->threadPool, ctx->poolFunction, job);
     else
@@ -277,7 +281,7 @@ static void AIO_IOPool_enqueueJob(IOJob_t *job) {
 
 /* AIO_WritePool_acquireJob:
  * Returns an available write job to be used for a future write. */
-IOJob_t* AIO_WritePool_acquireJob(WritePoolCtx_t *ctx) {
+IOJob_t* AIO_WritePool_acquireJob(WritePoolCtx_t* ctx) {
     return AIO_IOPool_acquireJob(&ctx->base);
 }
 
@@ -294,7 +298,7 @@ void AIO_WritePool_enqueueAndReacquireWriteJob(IOJob_t **job) {
 /* AIO_WritePool_sparseWriteEnd:
  * Ends sparse writes to the current file.
  * Blocks on completion of all current write jobs before executing. */
-void AIO_WritePool_sparseWriteEnd(WritePoolCtx_t *ctx) {
+void AIO_WritePool_sparseWriteEnd(WritePoolCtx_t* ctx) {
     assert(ctx != NULL);
     if(ctx->base.threadPool)
         POOL_joinJobs(ctx->base.threadPool);
@@ -306,28 +310,28 @@ void AIO_WritePool_sparseWriteEnd(WritePoolCtx_t *ctx) {
  * Sets the destination file for future writes in the pool.
  * Requires completion of all queues write jobs and release of all otherwise acquired jobs.
  * Also requires ending of sparse write if a previous file was used in sparse mode. */
-void AIO_WritePool_setFile(WritePoolCtx_t *ctx, FILE* file) {
+void AIO_WritePool_setFile(WritePoolCtx_t* ctx, FILE* file) {
     AIO_IOPool_setFile(&ctx->base, file);
     assert(ctx->storedSkips == 0);
 }
 
 /* AIO_WritePool_getFile:
  * Returns the file the writePool is currently set to write to. */
-FILE* AIO_WritePool_getFile(WritePoolCtx_t *ctx) {
+FILE* AIO_WritePool_getFile(const WritePoolCtx_t* ctx) {
     return AIO_IOPool_getFile(&ctx->base);
 }
 
 /* AIO_WritePool_releaseIoJob:
  * Releases an acquired job back to the pool. Doesn't execute the job. */
-void AIO_WritePool_releaseIoJob(IOJob_t *job) {
+void AIO_WritePool_releaseIoJob(IOJob_t* job) {
     AIO_IOPool_releaseIoJob(job);
 }
 
 /* AIO_WritePool_closeFile:
  * Ends sparse write and closes the writePool's current file and sets the file to NULL.
  * Requires completion of all queues write jobs and release of all otherwise acquired jobs.  */
-int AIO_WritePool_closeFile(WritePoolCtx_t *ctx) {
-    FILE *dstFile = ctx->base.file;
+int AIO_WritePool_closeFile(WritePoolCtx_t* ctx) {
+    FILE* const dstFile = ctx->base.file;
     assert(dstFile!=NULL || ctx->base.prefs->testMode!=0);
     AIO_WritePool_sparseWriteEnd(ctx);
     AIO_IOPool_setFile(&ctx->base, NULL);
@@ -337,16 +341,16 @@ int AIO_WritePool_closeFile(WritePoolCtx_t *ctx) {
 /* AIO_WritePool_executeWriteJob:
  * Executes a write job synchronously. Can be used as a function for a thread pool. */
 static void AIO_WritePool_executeWriteJob(void* opaque){
-    IOJob_t* job = (IOJob_t*) opaque;
-    WritePoolCtx_t* ctx = (WritePoolCtx_t*) job->ctx;
+    IOJob_t* const job = (IOJob_t*) opaque;
+    WritePoolCtx_t* const ctx = (WritePoolCtx_t*) job->ctx;
     ctx->storedSkips = AIO_fwriteSparse(job->file, job->buffer, job->usedBufferSize, ctx->base.prefs, ctx->storedSkips);
     AIO_IOPool_releaseIoJob(job);
 }
 
 /* AIO_WritePool_create:
  * Allocates and sets and a new write pool including its included jobs. */
-WritePoolCtx_t* AIO_WritePool_create(FIO_prefs_t* const prefs, size_t bufferSize) {
-    WritePoolCtx_t* ctx = (WritePoolCtx_t*) malloc(sizeof(WritePoolCtx_t));
+WritePoolCtx_t* AIO_WritePool_create(const FIO_prefs_t* prefs, size_t bufferSize) {
+    WritePoolCtx_t* const ctx = (WritePoolCtx_t*) malloc(sizeof(WritePoolCtx_t));
     if(!ctx) EXM_THROW(100, "Allocation error : not enough memory");
     AIO_IOPool_init(&ctx->base, prefs, AIO_WritePool_executeWriteJob, bufferSize);
     ctx->storedSkips = 0;
@@ -363,3 +367,256 @@ void AIO_WritePool_free(WritePoolCtx_t* ctx) {
     assert(ctx->storedSkips==0);
     free(ctx);
 }
+
+
+/* ***********************************
+ *  ReadPool implementation
+ *************************************/
+static void AIO_ReadPool_releaseAllCompletedJobs(ReadPoolCtx_t* ctx) {
+    int i;
+    for(i=0; i<ctx->completedJobsCount; i++) {
+        IOJob_t* job = (IOJob_t*) ctx->completedJobs[i];
+        AIO_IOPool_releaseIoJob(job);
+    }
+    ctx->completedJobsCount = 0;
+}
+
+static void AIO_ReadPool_addJobToCompleted(IOJob_t* job) {
+    ReadPoolCtx_t* const ctx = (ReadPoolCtx_t *)job->ctx;
+    if(ctx->base.threadPool)
+        ZSTD_pthread_mutex_lock(&ctx->base.ioJobsMutex);
+    assert(ctx->completedJobsCount < MAX_IO_JOBS);
+    ctx->completedJobs[ctx->completedJobsCount++] = job;
+    if(ctx->base.threadPool) {
+        ZSTD_pthread_cond_signal(&ctx->jobCompletedCond);
+        ZSTD_pthread_mutex_unlock(&ctx->base.ioJobsMutex);
+    }
+}
+
+/* AIO_ReadPool_findNextWaitingOffsetCompletedJob_locked:
+ * Looks through the completed jobs for a job matching the waitingOnOffset and returns it,
+ * if job wasn't found returns NULL.
+ * IMPORTANT: assumes ioJobsMutex is locked. */
+static IOJob_t* AIO_ReadPool_findNextWaitingOffsetCompletedJob_locked(ReadPoolCtx_t* ctx) {
+    IOJob_t *job = NULL;
+    int i;
+    /* This implementation goes through all completed jobs and looks for the one matching the next offset.
+     * While not strictly needed for a single threaded reader implementation (as in such a case we could expect
+     * reads to be completed in order) this implementation was chosen as it better fits other asyncio
+     * interfaces (such as io_uring) that do not provide promises regarding order of completion. */
+    for (i=0; i<ctx->completedJobsCount; i++) {
+        job = (IOJob_t *) ctx->completedJobs[i];
+        if (job->offset == ctx->waitingOnOffset) {
+            ctx->completedJobs[i] = ctx->completedJobs[--ctx->completedJobsCount];
+            return job;
+        }
+    }
+    return NULL;
+}
+
+/* AIO_ReadPool_numReadsInFlight:
+ * Returns the number of IO read jobs currrently in flight. */
+static size_t AIO_ReadPool_numReadsInFlight(ReadPoolCtx_t* ctx) {
+    const size_t jobsHeld = (ctx->currentJobHeld==NULL ? 0 : 1);
+    return ctx->base.totalIoJobs - (ctx->base.availableJobsCount + ctx->completedJobsCount + jobsHeld);
+}
+
+/* AIO_ReadPool_getNextCompletedJob:
+ * Returns a completed IOJob_t for the next read in line based on waitingOnOffset and advances waitingOnOffset.
+ * Would block. */
+static IOJob_t* AIO_ReadPool_getNextCompletedJob(ReadPoolCtx_t* ctx) {
+    IOJob_t *job = NULL;
+    if (ctx->base.threadPool)
+        ZSTD_pthread_mutex_lock(&ctx->base.ioJobsMutex);
+
+    job = AIO_ReadPool_findNextWaitingOffsetCompletedJob_locked(ctx);
+
+    /* As long as we didn't find the job matching the next read, and we have some reads in flight continue waiting */
+    while (!job && (AIO_ReadPool_numReadsInFlight(ctx) > 0)) {
+        assert(ctx->base.threadPool != NULL); /* we shouldn't be here if we work in sync mode */
+        ZSTD_pthread_cond_wait(&ctx->jobCompletedCond, &ctx->base.ioJobsMutex);
+        job = AIO_ReadPool_findNextWaitingOffsetCompletedJob_locked(ctx);
+    }
+
+    if(job) {
+        assert(job->offset == ctx->waitingOnOffset);
+        ctx->waitingOnOffset += job->usedBufferSize;
+    }
+
+    if (ctx->base.threadPool)
+        ZSTD_pthread_mutex_unlock(&ctx->base.ioJobsMutex);
+    return job;
+}
+
+
+/* AIO_ReadPool_executeReadJob:
+ * Executes a read job synchronously. Can be used as a function for a thread pool. */
+static void AIO_ReadPool_executeReadJob(void* opaque){
+    IOJob_t* const job = (IOJob_t*) opaque;
+    ReadPoolCtx_t* const ctx = (ReadPoolCtx_t *)job->ctx;
+    if(ctx->reachedEof) {
+        job->usedBufferSize = 0;
+        AIO_ReadPool_addJobToCompleted(job);
+        return;
+    }
+    job->usedBufferSize = fread(job->buffer, 1, job->bufferSize, job->file);
+    if(job->usedBufferSize < job->bufferSize) {
+        if(ferror(job->file)) {
+            EXM_THROW(37, "Read error");
+        } else if(feof(job->file)) {
+            ctx->reachedEof = 1;
+        } else {
+            EXM_THROW(37, "Unexpected short read");
+        }
+    }
+    AIO_ReadPool_addJobToCompleted(job);
+}
+
+static void AIO_ReadPool_enqueueRead(ReadPoolCtx_t* ctx) {
+    IOJob_t* const job = AIO_IOPool_acquireJob(&ctx->base);
+    job->offset = ctx->nextReadOffset;
+    ctx->nextReadOffset += job->bufferSize;
+    AIO_IOPool_enqueueJob(job);
+}
+
+static void AIO_ReadPool_startReading(ReadPoolCtx_t* ctx) {
+    int i;
+    for (i = 0; i < ctx->base.availableJobsCount; i++) {
+        AIO_ReadPool_enqueueRead(ctx);
+    }
+}
+
+/* AIO_ReadPool_setFile:
+ * Sets the source file for future read in the pool. Initiates reading immediately if file is not NULL.
+ * Waits for all current enqueued tasks to complete if a previous file was set. */
+void AIO_ReadPool_setFile(ReadPoolCtx_t* ctx, FILE* file) {
+    assert(ctx!=NULL);
+    AIO_IOPool_join(&ctx->base);
+    AIO_ReadPool_releaseAllCompletedJobs(ctx);
+    if (ctx->currentJobHeld) {
+        AIO_IOPool_releaseIoJob((IOJob_t *)ctx->currentJobHeld);
+        ctx->currentJobHeld = NULL;
+    }
+    AIO_IOPool_setFile(&ctx->base, file);
+    ctx->nextReadOffset = 0;
+    ctx->waitingOnOffset = 0;
+    ctx->srcBuffer = ctx->coalesceBuffer;
+    ctx->srcBufferLoaded = 0;
+    ctx->reachedEof = 0;
+    if(file != NULL)
+        AIO_ReadPool_startReading(ctx);
+}
+
+/* AIO_ReadPool_create:
+ * Allocates and sets and a new readPool including its included jobs.
+ * bufferSize should be set to the maximal buffer we want to read at a time, will also be used
+ * as our basic read size. */
+ReadPoolCtx_t* AIO_ReadPool_create(const FIO_prefs_t* prefs, size_t bufferSize) {
+    ReadPoolCtx_t* const ctx = (ReadPoolCtx_t*) malloc(sizeof(ReadPoolCtx_t));
+    if(!ctx) EXM_THROW(100, "Allocation error : not enough memory");
+    AIO_IOPool_init(&ctx->base, prefs, AIO_ReadPool_executeReadJob, bufferSize);
+
+    ctx->coalesceBuffer = (U8*) malloc(bufferSize * 2);
+    ctx->srcBuffer = ctx->coalesceBuffer;
+    ctx->srcBufferLoaded = 0;
+    ctx->completedJobsCount = 0;
+    ctx->currentJobHeld = NULL;
+
+    if(ctx->base.threadPool)
+        if (ZSTD_pthread_cond_init(&ctx->jobCompletedCond, NULL))
+            EXM_THROW(103,"Failed creating write jobCompletedCond mutex");
+
+    return ctx;
+}
+
+/* AIO_ReadPool_free:
+ * Frees and releases a readPool and its resources. Closes source file. */
+void AIO_ReadPool_free(ReadPoolCtx_t* ctx) {
+    if(AIO_ReadPool_getFile(ctx))
+        AIO_ReadPool_closeFile(ctx);
+    if(ctx->base.threadPool)
+        ZSTD_pthread_cond_destroy(&ctx->jobCompletedCond);
+    AIO_IOPool_destroy(&ctx->base);
+    free(ctx->coalesceBuffer);
+    free(ctx);
+}
+
+/* AIO_ReadPool_consumeBytes:
+ * Consumes byes from srcBuffer's beginning and updates srcBufferLoaded accordingly. */
+void AIO_ReadPool_consumeBytes(ReadPoolCtx_t* ctx, size_t n) {
+    assert(n <= ctx->srcBufferLoaded);
+    ctx->srcBufferLoaded -= n;
+    ctx->srcBuffer += n;
+}
+
+/* AIO_ReadPool_releaseCurrentlyHeldAndGetNext:
+ * Release the current held job and get the next one, returns NULL if no next job available. */
+static IOJob_t* AIO_ReadPool_releaseCurrentHeldAndGetNext(ReadPoolCtx_t* ctx) {
+    if (ctx->currentJobHeld) {
+        AIO_IOPool_releaseIoJob((IOJob_t *)ctx->currentJobHeld);
+        ctx->currentJobHeld = NULL;
+        AIO_ReadPool_enqueueRead(ctx);
+    }
+    ctx->currentJobHeld = AIO_ReadPool_getNextCompletedJob(ctx);
+    return (IOJob_t*) ctx->currentJobHeld;
+}
+
+/* AIO_ReadPool_fillBuffer:
+ * Tries to fill the buffer with at least n or jobBufferSize bytes (whichever is smaller).
+ * Returns if srcBuffer has at least the expected number of bytes loaded or if we've reached the end of the file.
+ * Return value is the number of bytes added to the buffer.
+ * Note that srcBuffer might have up to 2 times jobBufferSize bytes. */
+size_t AIO_ReadPool_fillBuffer(ReadPoolCtx_t* ctx, size_t n) {
+    IOJob_t *job;
+    int useCoalesce = 0;
+    if(n > ctx->base.jobBufferSize)
+        n = ctx->base.jobBufferSize;
+
+    /* We are good, don't read anything */
+    if (ctx->srcBufferLoaded >= n)
+        return 0;
+
+    /* We still have bytes loaded, but not enough to satisfy caller. We need to get the next job
+     * and coalesce the remaining bytes with the next job's buffer */
+    if (ctx->srcBufferLoaded > 0) {
+        useCoalesce = 1;
+        memcpy(ctx->coalesceBuffer, ctx->srcBuffer, ctx->srcBufferLoaded);
+        ctx->srcBuffer = ctx->coalesceBuffer;
+    }
+
+    /* Read the next chunk */
+    job = AIO_ReadPool_releaseCurrentHeldAndGetNext(ctx);
+    if(!job)
+        return 0;
+    if(useCoalesce) {
+        assert(ctx->srcBufferLoaded + job->usedBufferSize <= 2*ctx->base.jobBufferSize);
+        memcpy(ctx->coalesceBuffer + ctx->srcBufferLoaded, job->buffer, job->usedBufferSize);
+        ctx->srcBufferLoaded += job->usedBufferSize;
+    }
+    else {
+        ctx->srcBuffer = (U8 *) job->buffer;
+        ctx->srcBufferLoaded = job->usedBufferSize;
+    }
+    return job->usedBufferSize;
+}
+
+/* AIO_ReadPool_consumeAndRefill:
+ * Consumes the current buffer and refills it with bufferSize bytes. */
+size_t AIO_ReadPool_consumeAndRefill(ReadPoolCtx_t* ctx) {
+    AIO_ReadPool_consumeBytes(ctx, ctx->srcBufferLoaded);
+    return AIO_ReadPool_fillBuffer(ctx, ctx->base.jobBufferSize);
+}
+
+/* AIO_ReadPool_getFile:
+ * Returns the current file set for the read pool. */
+FILE* AIO_ReadPool_getFile(const ReadPoolCtx_t* ctx) {
+    return AIO_IOPool_getFile(&ctx->base);
+}
+
+/* AIO_ReadPool_closeFile:
+ * Closes the current set file. Waits for all current enqueued tasks to complete and resets state. */
+int AIO_ReadPool_closeFile(ReadPoolCtx_t* ctx) {
+    FILE* const file = AIO_ReadPool_getFile(ctx);
+    AIO_ReadPool_setFile(ctx, NULL);
+    return fclose(file);
+}
diff --git a/programs/fileio_asyncio.h b/programs/fileio_asyncio.h
index 3e91164c..bf07f859 100644
--- a/programs/fileio_asyncio.h
+++ b/programs/fileio_asyncio.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -28,7 +28,7 @@ typedef struct {
     /* These struct fields should be set only on creation and not changed afterwards */
     POOL_ctx* threadPool;
     int totalIoJobs;
-    FIO_prefs_t* prefs;
+    const FIO_prefs_t* prefs;
     POOL_function poolFunction;
 
     /* Controls the file we currently write to, make changes only by using provided utility functions */
@@ -39,8 +39,36 @@ typedef struct {
     ZSTD_pthread_mutex_t ioJobsMutex;
     void* availableJobs[MAX_IO_JOBS];
     int availableJobsCount;
+    size_t jobBufferSize;
 } IOPoolCtx_t;
 
+typedef struct {
+    IOPoolCtx_t base;
+
+    /* State regarding the currently read file */
+    int reachedEof;
+    U64 nextReadOffset;
+    U64 waitingOnOffset;
+
+    /* We may hold an IOJob object as needed if we actively expose its buffer. */
+    void *currentJobHeld;
+
+    /* Coalesce buffer is used to join two buffers in case where we need to read more bytes than left in
+     * the first of them. Shouldn't be accessed from outside ot utility functions. */
+    U8 *coalesceBuffer;
+
+    /* Read buffer can be used by consumer code, take care when copying this pointer aside as it might
+     * change when consuming / refilling buffer. */
+    U8 *srcBuffer;
+    size_t srcBufferLoaded;
+
+    /* We need to know what tasks completed so we can use their buffers when their time comes.
+     * Should only be accessed after locking base.ioJobsMutex . */
+    void* completedJobs[MAX_IO_JOBS];
+    int completedJobsCount;
+    ZSTD_pthread_cond_t jobCompletedCond;
+} ReadPoolCtx_t;
+
 typedef struct {
     IOPoolCtx_t base;
     unsigned storedSkips;
@@ -59,15 +87,10 @@ typedef struct {
     U64 offset;
 } IOJob_t;
 
-/** AIO_fwriteSparse() :
-*  @return : storedSkips,
-*            argument for next call to AIO_fwriteSparse() or AIO_fwriteSparseEnd() */
-unsigned AIO_fwriteSparse(FILE* file,
-                 const void* buffer, size_t bufferSize,
-                 const FIO_prefs_t* const prefs,
-                 unsigned storedSkips);
+/* AIO_supported:
+ * Returns 1 if AsyncIO is supported on the system, 0 otherwise. */
+int AIO_supported(void);
 
-void AIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips);
 
 /* AIO_WritePool_releaseIoJob:
  * Releases an acquired job back to the pool. Doesn't execute the job. */
@@ -97,7 +120,7 @@ void AIO_WritePool_setFile(WritePoolCtx_t *ctx, FILE* file);
 
 /* AIO_WritePool_getFile:
  * Returns the file the writePool is currently set to write to. */
-FILE* AIO_WritePool_getFile(WritePoolCtx_t *ctx);
+FILE* AIO_WritePool_getFile(const WritePoolCtx_t* ctx);
 
 /* AIO_WritePool_closeFile:
  * Ends sparse write and closes the writePool's current file and sets the file to NULL.
@@ -107,12 +130,50 @@ int AIO_WritePool_closeFile(WritePoolCtx_t *ctx);
 /* AIO_WritePool_create:
  * Allocates and sets and a new write pool including its included jobs.
  * bufferSize should be set to the maximal buffer we want to write to at a time. */
-WritePoolCtx_t* AIO_WritePool_create(FIO_prefs_t* const prefs, size_t bufferSize);
+WritePoolCtx_t* AIO_WritePool_create(const FIO_prefs_t* prefs, size_t bufferSize);
 
 /* AIO_WritePool_free:
  * Frees and releases a writePool and its resources. Closes destination file. */
 void AIO_WritePool_free(WritePoolCtx_t* ctx);
 
+/* AIO_ReadPool_create:
+ * Allocates and sets and a new readPool including its included jobs.
+ * bufferSize should be set to the maximal buffer we want to read at a time, will also be used
+ * as our basic read size. */
+ReadPoolCtx_t* AIO_ReadPool_create(const FIO_prefs_t* prefs, size_t bufferSize);
+
+/* AIO_ReadPool_free:
+ * Frees and releases a readPool and its resources. Closes source file. */
+void AIO_ReadPool_free(ReadPoolCtx_t* ctx);
+
+/* AIO_ReadPool_consumeBytes:
+ * Consumes byes from srcBuffer's beginning and updates srcBufferLoaded accordingly. */
+void AIO_ReadPool_consumeBytes(ReadPoolCtx_t *ctx, size_t n);
+
+/* AIO_ReadPool_fillBuffer:
+ * Makes sure buffer has at least n bytes loaded (as long as n is not bigger than the initalized bufferSize).
+ * Returns if srcBuffer has at least n bytes loaded or if we've reached the end of the file.
+ * Return value is the number of bytes added to the buffer.
+ * Note that srcBuffer might have up to 2 times bufferSize bytes. */
+size_t AIO_ReadPool_fillBuffer(ReadPoolCtx_t *ctx, size_t n);
+
+/* AIO_ReadPool_consumeAndRefill:
+ * Consumes the current buffer and refills it with bufferSize bytes. */
+size_t AIO_ReadPool_consumeAndRefill(ReadPoolCtx_t *ctx);
+
+/* AIO_ReadPool_setFile:
+ * Sets the source file for future read in the pool. Initiates reading immediately if file is not NULL.
+ * Waits for all current enqueued tasks to complete if a previous file was set. */
+void AIO_ReadPool_setFile(ReadPoolCtx_t *ctx, FILE* file);
+
+/* AIO_ReadPool_getFile:
+ * Returns the current file set for the read pool. */
+FILE* AIO_ReadPool_getFile(const ReadPoolCtx_t *ctx);
+
+/* AIO_ReadPool_closeFile:
+ * Closes the current set file. Waits for all current enqueued tasks to complete and resets state. */
+int AIO_ReadPool_closeFile(ReadPoolCtx_t *ctx);
+
 #if defined (__cplusplus)
 }
 #endif
diff --git a/programs/fileio_common.h b/programs/fileio_common.h
index d33c19d7..282c2f13 100644
--- a/programs/fileio_common.h
+++ b/programs/fileio_common.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
diff --git a/programs/fileio_types.h b/programs/fileio_types.h
index 1909ab1a..cf566aa2 100644
--- a/programs/fileio_types.h
+++ b/programs/fileio_types.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under both the BSD-style license (found in the
@@ -70,4 +70,4 @@ typedef struct FIO_prefs_s {
     int allowBlockDevices;
 } FIO_prefs_t;
 
-#endif /* FILEIO_TYPES_HEADER */
\ No newline at end of file
+#endif /* FILEIO_TYPES_HEADER */
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index 2e3f4ddb..29da261d 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -46,6 +46,7 @@
 #  include "zstdcli_trace.h"
 #endif
 #include "../lib/zstd.h"  /* ZSTD_VERSION_STRING, ZSTD_minCLevel, ZSTD_maxCLevel */
+#include "fileio_asyncio.h"
 
 
 /*-************************************
@@ -179,7 +180,8 @@ static void usage_advanced(const char* programName)
 #ifdef UTIL_HAS_MIRRORFILELIST
     DISPLAYOUT( "--output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure \n");
 #endif
-
+    if (AIO_supported())
+        DISPLAYOUT( "--[no-]asyncio : use asynchronous IO (default: enabled) \n");
 
 #ifndef ZSTD_NOCOMPRESS
     DISPLAYOUT( "--[no-]check : during compression, add XXH64 integrity checksum to frame (default: enabled)");
@@ -242,9 +244,6 @@ static void usage_advanced(const char* programName)
     DISPLAYOUT( " -l        : print information about zstd compressed files \n");
     DISPLAYOUT( "--test     : test compressed file integrity \n");
     DISPLAYOUT( " -M#       : Set a memory usage limit for decompression \n");
-#ifdef ZSTD_MULTITHREAD
-    DISPLAYOUT( "--[no-]asyncio  : use threaded asynchronous IO for output (default: disabled) \n");
-#endif
 # if ZSTD_SPARSE_DEFAULT
     DISPLAYOUT( "--[no-]sparse : sparse mode (default: enabled on file, disabled on stdout) \n");
 # else
@@ -1459,6 +1458,7 @@ int main(int argCount, const char* argv[])
         FIO_setTargetCBlockSize(prefs, targetCBlockSize);
         FIO_setSrcSizeHint(prefs, srcSizeHint);
         FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
+        FIO_setSparseWrite(prefs, 0);
         if (adaptMin > cLevel) cLevel = adaptMin;
         if (adaptMax < cLevel) cLevel = adaptMax;
 
diff --git a/tests/playTests.sh b/tests/playTests.sh
index f04f1da7..f97e96e3 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -260,10 +260,13 @@ zstd -dc - < tmp.zst > $INTOVOID
 zstd -d    < tmp.zst > $INTOVOID   # implicit stdout when stdin is used
 zstd -d  - < tmp.zst > $INTOVOID
 println "test : impose memory limitation (must fail)"
-zstd -d -f tmp.zst -M2K -c > $INTOVOID && die "decompression needs more memory than allowed"
-zstd -d -f tmp.zst --memlimit=2K -c > $INTOVOID && die "decompression needs more memory than allowed"  # long command
-zstd -d -f tmp.zst --memory=2K -c > $INTOVOID && die "decompression needs more memory than allowed"  # long command
-zstd -d -f tmp.zst --memlimit-decompress=2K -c > $INTOVOID && die "decompression needs more memory than allowed"  # long command
+datagen -g500K > tmplimit
+zstd -f tmplimit
+zstd -d -f tmplimit.zst -M2K -c > $INTOVOID && die "decompression needs more memory than allowed"
+zstd -d -f tmplimit.zst --memlimit=2K -c > $INTOVOID && die "decompression needs more memory than allowed"  # long command
+zstd -d -f tmplimit.zst --memory=2K -c > $INTOVOID && die "decompression needs more memory than allowed"  # long command
+zstd -d -f tmplimit.zst --memlimit-decompress=2K -c > $INTOVOID && die "decompression needs more memory than allowed"  # long command
+rm -f tmplimit tmplimit.zst
 println "test : overwrite protection"
 zstd -q tmp && die "overwrite check failed!"
 println "test : force overwrite"
@@ -1596,11 +1599,11 @@ elif [ "$longCSize19wlog23" -gt "$optCSize19wlog23" ]; then
     exit 1
 fi
 
-println "\n===>  zstd asyncio decompression tests "
+println "\n===>  zstd asyncio tests "
 
 addFrame() {
     datagen -g2M -s$2 >> tmp_uncompressed
-    datagen -g2M -s$2 | zstd --format=$1 >> tmp_compressed.zst
+    datagen -g2M -s$2 | zstd -1 --format=$1 >> tmp_compressed.zst
 }
 
 addTwoFrames() {

From 4b24ebdcf33cc5c2819272278e2d742b9d0f72fe Mon Sep 17 00:00:00 2001
From: Yonatan Komornik <11005061+yoniko@users.noreply.github.com>
Date: Mon, 31 Jan 2022 16:49:49 -0800
Subject: [PATCH 8/8] Travis CI: fix by installing pip compatible with python
 3.6 (#3041)

Pip install script no longer supports python3.6 by default, switched to a script that does.
---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 6a1295b4..c49f3ee3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -102,7 +102,7 @@ matrix:
           travis_retry curl -o ~/ninja.zip -L 'https://github.com/ninja-build/ninja/releases/download/v1.9.0/ninja-linux.zip' &&
           unzip ~/ninja.zip -d ~/.local/bin
         - |
-          travis_retry curl -o ~/get-pip.py -L 'https://bootstrap.pypa.io/get-pip.py' &&
+          travis_retry curl -o ~/get-pip.py -L 'https://bootstrap.pypa.io/pip/3.6/get-pip.py' &&
           python3 ~/get-pip.py --user &&
           pip3 install --user meson
       script: