diff --git a/build/VS2008/fullbench/fullbench.vcproj b/build/VS2008/fullbench/fullbench.vcproj
index 996ee26a..2246262d 100644
--- a/build/VS2008/fullbench/fullbench.vcproj
+++ b/build/VS2008/fullbench/fullbench.vcproj
@@ -408,6 +408,10 @@
 				RelativePath="..\..\..\programs\util.c"
 				>
 			</File>
+			<File
+				RelativePath="..\..\..\programs\timefn.c"
+				>
+			</File>
 			<File
 				RelativePath="..\..\..\lib\compress\zstd_fast.c"
 				>
diff --git a/build/VS2008/fuzzer/fuzzer.vcproj b/build/VS2008/fuzzer/fuzzer.vcproj
index 25924c8b..46cc4d4f 100644
--- a/build/VS2008/fuzzer/fuzzer.vcproj
+++ b/build/VS2008/fuzzer/fuzzer.vcproj
@@ -332,6 +332,10 @@
 				RelativePath="..\..\..\programs\util.c"
 				>
 			</File>
+			<File
+				RelativePath="..\..\..\programs\timefn.c"
+				>
+			</File>
 			<File
 				RelativePath="..\..\..\programs\datagen.c"
 				>
diff --git a/build/VS2008/zstd/zstd.vcproj b/build/VS2008/zstd/zstd.vcproj
index d738be56..745f2e87 100644
--- a/build/VS2008/zstd/zstd.vcproj
+++ b/build/VS2008/zstd/zstd.vcproj
@@ -336,6 +336,10 @@
 				RelativePath="..\..\..\programs\util.c"
 				>
 			</File>
+			<File
+				RelativePath="..\..\..\programs\timefn.c"
+				>
+			</File>
 			<File
 				RelativePath="..\..\..\programs\benchfn.c"
 				>
diff --git a/build/VS2010/fullbench-dll/fullbench-dll.vcxproj b/build/VS2010/fullbench-dll/fullbench-dll.vcxproj
index 29762860..befdc044 100644
--- a/build/VS2010/fullbench-dll/fullbench-dll.vcxproj
+++ b/build/VS2010/fullbench-dll/fullbench-dll.vcxproj
@@ -167,6 +167,7 @@
   <ItemGroup>
     <ClCompile Include="..\..\..\lib\common\xxhash.c" />
     <ClCompile Include="..\..\..\programs\util.c" />
+    <ClCompile Include="..\..\..\programs\timefn.c" />
     <ClCompile Include="..\..\..\programs\datagen.c" />
     <ClCompile Include="..\..\..\programs\benchfn.c" />
     <ClCompile Include="..\..\..\tests\fullbench.c" />
diff --git a/build/VS2010/fullbench/fullbench.vcxproj b/build/VS2010/fullbench/fullbench.vcxproj
index a3a884a7..57ee3371 100644
--- a/build/VS2010/fullbench/fullbench.vcxproj
+++ b/build/VS2010/fullbench/fullbench.vcxproj
@@ -156,7 +156,6 @@
   </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="..\..\..\lib\common\entropy_common.c" />
-    <ClCompile Include="..\..\..\programs\util.c" />
     <ClCompile Include="..\..\..\lib\common\debug.c" />
     <ClCompile Include="..\..\..\lib\common\fse_decompress.c" />
     <ClCompile Include="..\..\..\lib\common\zstd_common.c" />
@@ -178,6 +177,8 @@
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress_block.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_ddict.c" />
+    <ClCompile Include="..\..\..\programs\util.c" />
+    <ClCompile Include="..\..\..\programs\timefn.c" />
     <ClCompile Include="..\..\..\programs\datagen.c" />
     <ClCompile Include="..\..\..\programs\benchfn.c" />
     <ClCompile Include="..\..\..\tests\fullbench.c" />
diff --git a/build/VS2010/fuzzer/fuzzer.vcxproj b/build/VS2010/fuzzer/fuzzer.vcxproj
index 106dcf99..53881c19 100644
--- a/build/VS2010/fuzzer/fuzzer.vcxproj
+++ b/build/VS2010/fuzzer/fuzzer.vcxproj
@@ -182,6 +182,7 @@
     <ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
     <ClCompile Include="..\..\..\programs\util.c" />
+    <ClCompile Include="..\..\..\programs\timefn.c" />
     <ClCompile Include="..\..\..\programs\datagen.c" />
     <ClCompile Include="..\..\..\tests\fuzzer.c" />
   </ItemGroup>
diff --git a/build/VS2010/zstd/zstd.vcxproj b/build/VS2010/zstd/zstd.vcxproj
index 6681e581..6e7ddca1 100644
--- a/build/VS2010/zstd/zstd.vcxproj
+++ b/build/VS2010/zstd/zstd.vcxproj
@@ -53,6 +53,7 @@
     <ClCompile Include="..\..\..\lib\legacy\zstd_v06.c" />
     <ClCompile Include="..\..\..\lib\legacy\zstd_v07.c" />
     <ClCompile Include="..\..\..\programs\util.c" />
+    <ClCompile Include="..\..\..\programs\timefn.c" />
     <ClCompile Include="..\..\..\programs\benchfn.c" />
     <ClCompile Include="..\..\..\programs\benchzstd.c" />
     <ClCompile Include="..\..\..\programs\datagen.c" />
diff --git a/build/cmake/programs/CMakeLists.txt b/build/cmake/programs/CMakeLists.txt
index 4c5146f8..f6f7a361 100644
--- a/build/cmake/programs/CMakeLists.txt
+++ b/build/cmake/programs/CMakeLists.txt
@@ -26,7 +26,7 @@ if (MSVC)
     set(PlatformDependResources ${MSVC_RESOURCE_DIR}/zstd.rc)
 endif ()
 
-add_executable(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/dibio.c ${PlatformDependResources})
+add_executable(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/dibio.c ${PlatformDependResources})
 target_link_libraries(zstd libzstd_static)
 if (CMAKE_SYSTEM_NAME MATCHES "(Solaris|SunOS)")
     target_link_libraries(zstd rt)
@@ -63,7 +63,7 @@ if (UNIX)
     install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zstdgrep.1 DESTINATION "${MAN_INSTALL_DIR}")
     install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zstdless.1 DESTINATION "${MAN_INSTALL_DIR}")
 
-    add_executable(zstd-frugal ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/fileio.c)
+    add_executable(zstd-frugal ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c)
     target_link_libraries(zstd-frugal libzstd_static)
     set_property(TARGET zstd-frugal APPEND PROPERTY COMPILE_DEFINITIONS "ZSTD_NOBENCH;ZSTD_NODICT")
 endif ()
diff --git a/build/cmake/tests/CMakeLists.txt b/build/cmake/tests/CMakeLists.txt
index 69316e28..077d824b 100644
--- a/build/cmake/tests/CMakeLists.txt
+++ b/build/cmake/tests/CMakeLists.txt
@@ -43,13 +43,13 @@ include_directories(${TESTS_DIR} ${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/c
 add_executable(datagen ${PROGRAMS_DIR}/datagen.c ${TESTS_DIR}/datagencli.c)
 target_link_libraries(datagen libzstd_static)
 
-add_executable(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${TESTS_DIR}/fullbench.c)
+add_executable(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${TESTS_DIR}/fullbench.c)
 target_link_libraries(fullbench libzstd_static)
 
-add_executable(fuzzer ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${TESTS_DIR}/fuzzer.c)
+add_executable(fuzzer ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/fuzzer.c)
 target_link_libraries(fuzzer libzstd_static)
 
 if (UNIX)
-    add_executable(paramgrill ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${TESTS_DIR}/paramgrill.c)
+    add_executable(paramgrill ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/paramgrill.c)
     target_link_libraries(paramgrill libzstd_static m) #m is math library
 endif ()
diff --git a/contrib/adaptive-compression/Makefile b/contrib/adaptive-compression/Makefile
index 2c6867f5..2718e9d6 100644
--- a/contrib/adaptive-compression/Makefile
+++ b/contrib/adaptive-compression/Makefile
@@ -22,10 +22,10 @@ FLAGS     = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MULTITHREAD_LDFLAGS)
 
 all: adapt datagen
 
-adapt: $(ZSTD_FILES) $(PRGDIR)/util.c adapt.c
+adapt: $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c adapt.c
 	$(CC) $(FLAGS) $^ -o $@
 
-adapt-debug: $(ZSTD_FILES) $(PRGDIR)/util.c adapt.c
+adapt-debug: $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c adapt.c
 	$(CC) $(FLAGS) -DDEBUG_MODE=2 $^ -o adapt
 
 datagen : $(PRGDIR)/datagen.c datagencli.c
diff --git a/contrib/adaptive-compression/adapt.c b/contrib/adaptive-compression/adapt.c
index 524d1378..8fb4047e 100644
--- a/contrib/adaptive-compression/adapt.c
+++ b/contrib/adaptive-compression/adapt.c
@@ -13,6 +13,7 @@
 #include <string.h>     /* memset */
 #include "zstd_internal.h"
 #include "util.h"
+#include "timefn.h"     /* UTIL_time_t, UTIL_getTime, UTIL_getSpanTimeMicro */
 
 #define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
 #define PRINT(...) fprintf(stdout, __VA_ARGS__)
diff --git a/contrib/largeNbDicts/Makefile b/contrib/largeNbDicts/Makefile
index 6fc38256..4c055b0e 100644
--- a/contrib/largeNbDicts/Makefile
+++ b/contrib/largeNbDicts/Makefile
@@ -28,14 +28,17 @@ default: largeNbDicts
 
 all : largeNbDicts
 
-largeNbDicts: util.o benchfn.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD)
+largeNbDicts: util.o timefn.o benchfn.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD)
 	$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
 
 .PHONY: $(LIBZSTD)
 $(LIBZSTD):
 	$(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)"
 
-benchfn.o  : $(PROGDIR)/benchfn.c
+benchfn.o: $(PROGDIR)/benchfn.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
+
+timefn.o: $(PROGDIR)/timefn.c
 	$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
 
 datagen.o: $(PROGDIR)/datagen.c
@@ -48,6 +51,7 @@ util.o: $(PROGDIR)/util.c
 xxhash.o : $(LIBDIR)/common/xxhash.c
 	$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
 
+
 clean:
 	$(RM) *.o
 	$(MAKE) -C $(LIBDIR) clean > /dev/null
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index c7962e7d..cb101e1c 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1,37 +1,46 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>zstd 1.3.8 Manual</title>
+<title>zstd 1.4.0 Manual</title>
 </head>
 <body>
-<h1>zstd 1.3.8 Manual</h1>
+<h1>zstd 1.4.0 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
 <li><a href="#Chapter1">Introduction</a></li>
 <li><a href="#Chapter2">Version</a></li>
 <li><a href="#Chapter3">Default constant</a></li>
-<li><a href="#Chapter4">Simple API</a></li>
-<li><a href="#Chapter5">Explicit context</a></li>
-<li><a href="#Chapter6">Simple dictionary API</a></li>
-<li><a href="#Chapter7">Bulk processing dictionary API</a></li>
-<li><a href="#Chapter8">Streaming</a></li>
-<li><a href="#Chapter9">Streaming compression - HowTo</a></li>
-<li><a href="#Chapter10">Streaming decompression - HowTo</a></li>
-<li><a href="#Chapter11">ADVANCED AND EXPERIMENTAL FUNCTIONS</a></li>
-<li><a href="#Chapter12">Candidate API for promotion to stable status</a></li>
-<li><a href="#Chapter13">Advanced compression API</a></li>
-<li><a href="#Chapter14">experimental API (static linking only)</a></li>
-<li><a href="#Chapter15">Frame size functions</a></li>
-<li><a href="#Chapter16">Memory management</a></li>
-<li><a href="#Chapter17">Advanced compression functions</a></li>
-<li><a href="#Chapter18">Advanced decompression functions</a></li>
-<li><a href="#Chapter19">Advanced streaming functions</a></li>
-<li><a href="#Chapter20">Buffer-less and synchronous inner streaming functions</a></li>
-<li><a href="#Chapter21">Buffer-less streaming compression (synchronous mode)</a></li>
-<li><a href="#Chapter22">Buffer-less streaming decompression (synchronous mode)</a></li>
-<li><a href="#Chapter23">ZSTD_getFrameHeader() :</a></li>
-<li><a href="#Chapter24">Block level API</a></li>
+<li><a href="#Chapter4">Constants</a></li>
+<li><a href="#Chapter5">Simple API</a></li>
+<li><a href="#Chapter6">Explicit context</a></li>
+<li><a href="#Chapter7">Advanced compression API</a></li>
+<li><a href="#Chapter8">Advanced decompression API</a></li>
+<li><a href="#Chapter9">Streaming</a></li>
+<li><a href="#Chapter10">Streaming compression - HowTo</a></li>
+<li><a href="#Chapter11">This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and</a></li>
+<li><a href="#Chapter12">Equivalent to:</a></li>
+<li><a href="#Chapter13">Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).</a></li>
+<li><a href="#Chapter14">Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush).</a></li>
+<li><a href="#Chapter15">Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end).</a></li>
+<li><a href="#Chapter16">Streaming decompression - HowTo</a></li>
+<li><a href="#Chapter17">Simple dictionary API</a></li>
+<li><a href="#Chapter18">Bulk processing dictionary API</a></li>
+<li><a href="#Chapter19">Dictionary helper functions</a></li>
+<li><a href="#Chapter20">Advanced dictionary and prefix API</a></li>
+<li><a href="#Chapter21">ADVANCED AND EXPERIMENTAL FUNCTIONS</a></li>
+<li><a href="#Chapter22">experimental API (static linking only)</a></li>
+<li><a href="#Chapter23">Frame size functions</a></li>
+<li><a href="#Chapter24">ZSTD_decompressBound() :</a></li>
+<li><a href="#Chapter25">Memory management</a></li>
+<li><a href="#Chapter26">Advanced compression functions</a></li>
+<li><a href="#Chapter27">Advanced decompression functions</a></li>
+<li><a href="#Chapter28">Advanced streaming functions</a></li>
+<li><a href="#Chapter29">Buffer-less and synchronous inner streaming functions</a></li>
+<li><a href="#Chapter30">Buffer-less streaming compression (synchronous mode)</a></li>
+<li><a href="#Chapter31">Buffer-less streaming decompression (synchronous mode)</a></li>
+<li><a href="#Chapter32">ZSTD_getFrameHeader() :</a></li>
+<li><a href="#Chapter33">Block level API</a></li>
 </ol>
 <hr>
 <a name="Chapter1"></a><h2>Introduction</h2><pre>
@@ -71,7 +80,9 @@
 </b></pre><BR>
 <a name="Chapter3"></a><h2>Default constant</h2><pre></pre>
 
-<a name="Chapter4"></a><h2>Simple API</h2><pre></pre>
+<a name="Chapter4"></a><h2>Constants</h2><pre></pre>
+
+<a name="Chapter5"></a><h2>Simple API</h2><pre></pre>
 
 <pre><b>size_t ZSTD_compress( void* dst, size_t dstCapacity,
                 const void* src, size_t srcSize,
@@ -126,13 +137,22 @@ unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
  @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. 
 </p></pre><BR>
 
+<pre><b>size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
+</b><p> `src` should point to the start of a ZSTD frame or skippable frame.
+ `srcSize` must be >= first frame size
+ @return : the compressed size of the first frame starting at `src`,
+           suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
+        or an error code if input is invalid 
+</p></pre><BR>
+
 <h3>Helper functions</h3><pre></pre><b><pre>#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) </b>/* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */<b>
 size_t      ZSTD_compressBound(size_t srcSize); </b>/*!< maximum compressed size in worst case single-pass scenario */<b>
 unsigned    ZSTD_isError(size_t code);          </b>/*!< tells if a `size_t` function result is an error code */<b>
 const char* ZSTD_getErrorName(size_t code);     </b>/*!< provides readable string from an error code */<b>
+int         ZSTD_minCLevel(void);               </b>/*!< minimum negative compression level allowed */<b>
 int         ZSTD_maxCLevel(void);               </b>/*!< maximum compression level available */<b>
 </pre></b><BR>
-<a name="Chapter5"></a><h2>Explicit context</h2><pre></pre>
+<a name="Chapter6"></a><h2>Explicit context</h2><pre></pre>
 
 <h3>Compression context</h3><pre>  When compressing many times,
   it is recommended to allocate a context just once, and re-use it for each successive compression operation.
@@ -169,228 +189,7 @@ size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
  
 </p></pre><BR>
 
-<a name="Chapter6"></a><h2>Simple dictionary API</h2><pre></pre>
-
-<pre><b>size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
-                               void* dst, size_t dstCapacity,
-                         const void* src, size_t srcSize,
-                         const void* dict,size_t dictSize,
-                               int compressionLevel);
-</b><p>  Compression at an explicit compression level using a Dictionary.
-  A dictionary can be any arbitrary data segment (also called a prefix),
-  or a buffer with specified information (see dictBuilder/zdict.h).
-  Note : This function loads the dictionary, resulting in significant startup delay.
-         It's intended for a dictionary used only once.
-  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. 
-</p></pre><BR>
-
-<pre><b>size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
-                                 void* dst, size_t dstCapacity,
-                           const void* src, size_t srcSize,
-                           const void* dict,size_t dictSize);
-</b><p>  Decompression using a known Dictionary.
-  Dictionary must be identical to the one used during compression.
-  Note : This function loads the dictionary, resulting in significant startup delay.
-         It's intended for a dictionary used only once.
-  Note : When `dict == NULL || dictSize < 8` no dictionary is used. 
-</p></pre><BR>
-
-<a name="Chapter7"></a><h2>Bulk processing dictionary API</h2><pre></pre>
-
-<pre><b>ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
-                             int compressionLevel);
-</b><p>  When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once.
-  ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost.
-  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
- `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
-  Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
-  Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. 
-</p></pre><BR>
-
-<pre><b>size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
-</b><p>  Function frees memory allocated by ZSTD_createCDict(). 
-</p></pre><BR>
-
-<pre><b>size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
-                                void* dst, size_t dstCapacity,
-                          const void* src, size_t srcSize,
-                          const ZSTD_CDict* cdict);
-</b><p>  Compression using a digested Dictionary.
-  Recommended when same dictionary is used multiple times.
-  Note : compression level is _decided at dictionary creation time_,
-     and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) 
-</p></pre><BR>
-
-<pre><b>ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
-</b><p>  Create a digested dictionary, ready to start decompression operation without startup delay.
-  dictBuffer can be released after DDict creation, as its content is copied inside DDict. 
-</p></pre><BR>
-
-<pre><b>size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
-</b><p>  Function frees memory allocated with ZSTD_createDDict() 
-</p></pre><BR>
-
-<pre><b>size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
-                                  void* dst, size_t dstCapacity,
-                            const void* src, size_t srcSize,
-                            const ZSTD_DDict* ddict);
-</b><p>  Decompression using a digested Dictionary.
-  Recommended when same dictionary is used multiple times. 
-</p></pre><BR>
-
-<a name="Chapter8"></a><h2>Streaming</h2><pre></pre>
-
-<pre><b>typedef struct ZSTD_inBuffer_s {
-  const void* src;    </b>/**< start of input buffer */<b>
-  size_t size;        </b>/**< size of input buffer */<b>
-  size_t pos;         </b>/**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */<b>
-} ZSTD_inBuffer;
-</b></pre><BR>
-<pre><b>typedef struct ZSTD_outBuffer_s {
-  void*  dst;         </b>/**< start of output buffer */<b>
-  size_t size;        </b>/**< size of output buffer */<b>
-  size_t pos;         </b>/**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */<b>
-} ZSTD_outBuffer;
-</b></pre><BR>
-<a name="Chapter9"></a><h2>Streaming compression - HowTo</h2><pre>
-  A ZSTD_CStream object is required to track streaming operation.
-  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
-  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
-  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
-
-  For parallel execution, use one separate ZSTD_CStream per thread.
-
-  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
-
-  Parameters are sticky : when starting a new compression on the same context,
-  it will re-use the same sticky parameters as previous compression session.
-  When in doubt, it's recommended to fully initialize the context before usage.
-  Use ZSTD_initCStream() to set the parameter to a selected compression level.
-  Use advanced API (ZSTD_CCtx_setParameter(), etc.) to set more specific parameters.
-
-  Use ZSTD_compressStream() as many times as necessary to consume input stream.
-  The function will automatically update both `pos` fields within `input` and `output`.
-  Note that the function may not consume the entire input,
-  for example, because the output buffer is already full,
-  in which case `input.pos < input.size`.
-  The caller must check if input has been entirely consumed.
-  If not, the caller must make some room to receive more compressed data,
-  and then present again remaining input data.
- @return : a size hint, preferred nb of bytes to use as input for next function call
-           or an error code, which can be tested using ZSTD_isError().
-           Note 1 : it's just a hint, to help latency a little, any value will work fine.
-           Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize()
-
-  At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
-  using ZSTD_flushStream(). `output->pos` will be updated.
-  Note that, if `output->size` is too small, a single invocation of ZSTD_flushStream() might not be enough (return code > 0).
-  In which case, make some room to receive more compressed data, and call again ZSTD_flushStream().
-  @return : 0 if internal buffers are entirely flushed,
-            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
-            or an error code, which can be tested using ZSTD_isError().
-
-  ZSTD_endStream() instructs to finish a frame.
-  It will perform a flush and write frame epilogue.
-  The epilogue is required for decoders to consider a frame completed.
-  flush() operation is the same, and follows same rules as ZSTD_flushStream().
-  @return : 0 if frame fully completed and fully flushed,
-            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
-            or an error code, which can be tested using ZSTD_isError().
-
- 
-<BR></pre>
-
-<pre><b>typedef ZSTD_CCtx ZSTD_CStream;  </b>/**< CCtx and CStream are now effectively same object (>= v1.3.0) */<b>
-</b></pre><BR>
-<h3>ZSTD_CStream management functions</h3><pre></pre><b><pre>ZSTD_CStream* ZSTD_createCStream(void);
-size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
-</pre></b><BR>
-<h3>Streaming compression functions</h3><pre></pre><b><pre>size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
-size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
-size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
-size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
-</pre></b><BR>
-<pre><b>size_t ZSTD_CStreamInSize(void);    </b>/**< recommended size for input buffer */<b>
-</b></pre><BR>
-<pre><b>size_t ZSTD_CStreamOutSize(void);   </b>/**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */<b>
-</b></pre><BR>
-<a name="Chapter10"></a><h2>Streaming decompression - HowTo</h2><pre>
-  A ZSTD_DStream object is required to track streaming operations.
-  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
-  ZSTD_DStream objects can be re-used multiple times.
-
-  Use ZSTD_initDStream() to start a new decompression operation.
- @return : recommended first input size
-  Alternatively, use advanced API to set specific properties.
-
-  Use ZSTD_decompressStream() repetitively to consume your input.
-  The function will update both `pos` fields.
-  If `input.pos < input.size`, some input has not been consumed.
-  It's up to the caller to present again remaining data.
-  The function tries to flush all data decoded immediately, respecting output buffer size.
-  If `output.pos < output.size`, decoder has flushed everything it could.
-  But if `output.pos == output.size`, there might be some data left within internal buffers.,
-  In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
-  Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
- @return : 0 when a frame is completely decoded and fully flushed,
-        or an error code, which can be tested using ZSTD_isError(),
-        or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
-                                the return value is a suggested next input size (just a hint for better latency)
-                                that will never request more than the remaining frame size.
- 
-<BR></pre>
-
-<pre><b>typedef ZSTD_DCtx ZSTD_DStream;  </b>/**< DCtx and DStream are now effectively same object (>= v1.3.0) */<b>
-</b></pre><BR>
-<h3>ZSTD_DStream management functions</h3><pre></pre><b><pre>ZSTD_DStream* ZSTD_createDStream(void);
-size_t ZSTD_freeDStream(ZSTD_DStream* zds);
-</pre></b><BR>
-<h3>Streaming decompression functions</h3><pre></pre><b><pre>size_t ZSTD_initDStream(ZSTD_DStream* zds);
-size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
-</pre></b><BR>
-<pre><b>size_t ZSTD_DStreamInSize(void);    </b>/*!< recommended size for input buffer */<b>
-</b></pre><BR>
-<pre><b>size_t ZSTD_DStreamOutSize(void);   </b>/*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */<b>
-</b></pre><BR>
-<a name="Chapter11"></a><h2>ADVANCED AND EXPERIMENTAL FUNCTIONS</h2><pre>
- The definitions in the following section are considered experimental.
- They are provided for advanced scenarios.
- They should never be used with a dynamic library, as prototypes may change in the future.
- Use them only in association with static linking.
- 
-<BR></pre>
-
-<a name="Chapter12"></a><h2>Candidate API for promotion to stable status</h2><pre>
- The following symbols and constants form the "staging area" :
- they are considered to join "stable API" by v1.4.0.
- The proposal is written so that it can be made stable "as is",
- though it's still possible to suggest improvements.
- Staging is in fact last chance for changes,
- the API is locked once reaching "stable" status.
- 
-<BR></pre>
-
-<pre><b>int ZSTD_minCLevel(void);  </b>/*!< minimum negative compression level allowed */<b>
-</b></pre><BR>
-<pre><b>size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
-</b><p> `src` should point to the start of a ZSTD frame or skippable frame.
- `srcSize` must be >= first frame size
- @return : the compressed size of the first frame starting at `src`,
-           suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
-        or an error code if input is invalid 
-</p></pre><BR>
-
-<pre><b>size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
-size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
-size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
-size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
-size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
-size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
-</b><p>  These functions give the _current_ memory usage of selected object.
-  Note that object memory usage can evolve (increase or decrease) over time. 
-</p></pre><BR>
-
-<a name="Chapter13"></a><h2>Advanced compression API</h2><pre></pre>
+<a name="Chapter7"></a><h2>Advanced compression API</h2><pre></pre>
 
 <pre><b>typedef enum { ZSTD_fast=1,
                ZSTD_dfast=2,
@@ -407,7 +206,10 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 </b></pre><BR>
 <pre><b>typedef enum {
 
-    </b>/* compression parameters */<b>
+    </b>/* compression parameters<b>
+     * Note: When compressing with a ZSTD_CDict these parameters are superseded
+     * by the parameters used to construct the ZSTD_CDict. See ZSTD_CCtx_refCDict()
+     * for more info (superseded-by-cdict). */
     ZSTD_c_compressionLevel=100, </b>/* Update all compression parameters according to pre-defined cLevel table<b>
                               * Default level is ZSTD_CLEVEL_DEFAULT==3.
                               * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
@@ -529,6 +331,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
      * ZSTD_c_format
      * ZSTD_c_forceMaxWindow
      * ZSTD_c_forceAttachDict
+     * ZSTD_c_literalCompressionMode
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly;
      *        also, the enums values themselves are unstable and can still change.
@@ -536,7 +339,8 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
      ZSTD_c_experimentalParam1=500,
      ZSTD_c_experimentalParam2=10,
      ZSTD_c_experimentalParam3=1000,
-     ZSTD_c_experimentalParam4=1001
+     ZSTD_c_experimentalParam4=1001,
+     ZSTD_c_experimentalParam5=1002,
 } ZSTD_cParameter;
 </b></pre><BR>
 <pre><b>typedef struct {
@@ -584,58 +388,6 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
-</b><p>  Create an internal CDict from `dict` buffer.
-  Decompression will have to use same dictionary.
- @result : 0, or an error code (which can be tested with ZSTD_isError()).
-  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
-           meaning "return to no-dictionary mode".
-  Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
-           To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
-  Note 2 : Loading a dictionary involves building tables.
-           It's also a CPU consuming operation, with non-negligible impact on latency.
-           Tables are dependent on compression parameters, and for this reason,
-           compression parameters can no longer be changed after loading a dictionary.
-  Note 3 :`dict` content will be copied internally.
-           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
-           In such a case, dictionary buffer must outlive its users.
-  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
-           to precisely select how dictionary content must be interpreted. 
-</p></pre><BR>
-
-<pre><b>size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
-</b><p>  Reference a prepared dictionary, to be used for all next compressed frames.
-  Note that compression parameters are enforced from within CDict,
-  and supercede any compression parameter previously set within CCtx.
-  The dictionary will remain valid for future compressed frames using same CCtx.
- @result : 0, or an error code (which can be tested with ZSTD_isError()).
-  Special : Referencing a NULL CDict means "return to no-dictionary mode".
-  Note 1 : Currently, only one dictionary can be managed.
-           Referencing a new dictionary effectively "discards" any previous one.
-  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. 
-</p></pre><BR>
-
-<pre><b>size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
-                     const void* prefix, size_t prefixSize);
-</b><p>  Reference a prefix (single-usage dictionary) for next compressed frame.
-  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
-  Decompression will need same prefix to properly regenerate data.
-  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
-  but performs much faster, especially during decompression (compression speed is tunable with compression level).
- @result : 0, or an error code (which can be tested with ZSTD_isError()).
-  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
-  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
-           Its content must remain unmodified during compression.
-  Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
-           ensure that the window size is large enough to contain the entire source.
-           See ZSTD_c_windowLog.
-  Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
-           It's a CPU consuming operation, with non-negligible impact on latency.
-           If there is a need to use the same prefix multiple times, consider loadDictionary instead.
-  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dm_rawContent).
-           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. 
-</p></pre><BR>
-
 <pre><b>typedef enum {
     ZSTD_reset_session_only = 1,
     ZSTD_reset_parameters = 2,
@@ -672,42 +424,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
  
 </p></pre><BR>
 
-<pre><b>typedef enum {
-    ZSTD_e_continue=0, </b>/* collect more data, encoder decides when to output compressed result, for optimal compression ratio */<b>
-    ZSTD_e_flush=1,    </b>/* flush any data provided so far,<b>
-                        * it creates (at least) one new block, that can be decoded immediately on reception;
-                        * frame will continue: any future data can still reference previously compressed data, improving compression. */
-    ZSTD_e_end=2       </b>/* flush any remaining data _and_ close current frame.<b>
-                        * note that frame is only closed after compressed data is fully flushed (return value == 0).
-                        * After that point, any additional data starts a new frame.
-                        * note : each frame is independent (does not reference any content from previous frame). */
-} ZSTD_EndDirective;
-</b></pre><BR>
-<pre><b>size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
-                             ZSTD_outBuffer* output,
-                             ZSTD_inBuffer* input,
-                             ZSTD_EndDirective endOp);
-</b><p>  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
-  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
-  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
-  - outpot->pos must be <= dstCapacity, input->pos must be <= srcSize
-  - outpot->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
-  - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
-  - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available,
-                                                  and then immediately returns, just indicating that there is some data remaining to be flushed.
-                                                  The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
-  - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
-  - @return provides a minimum amount of data remaining to be flushed from internal buffers
-            or an error code, which can be tested using ZSTD_isError().
-            if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
-            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
-            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
-  - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
-            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
-            Before starting a new compression job, or changing compression parameters,
-            it is required to fully flush internal buffers.
- 
-</p></pre><BR>
+<a name="Chapter8"></a><h2>Advanced decompression API</h2><pre></pre>
 
 <pre><b>typedef enum {
 
@@ -715,7 +432,8 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
                               * the streaming API will refuse to allocate memory buffer
                               * in order to protect the host from unreasonable memory requirements.
                               * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
-                              * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) */
+                              * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT).
+                              * Special: value 0 means "use default maximum windowLog". */
 
     </b>/* note : additional experimental parameters are also available<b>
      * within the experimental section of the API.
@@ -746,6 +464,352 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
  
 </p></pre><BR>
 
+<pre><b>size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
+</b><p>  Return a DCtx to clean state.
+  Session and parameters can be reset jointly or separately.
+  Parameters can only be reset when no active frame is being decompressed.
+ @return : 0, or an error code, which can be tested with ZSTD_isError()
+ 
+</p></pre><BR>
+
+<a name="Chapter9"></a><h2>Streaming</h2><pre></pre>
+
+<pre><b>typedef struct ZSTD_inBuffer_s {
+  const void* src;    </b>/**< start of input buffer */<b>
+  size_t size;        </b>/**< size of input buffer */<b>
+  size_t pos;         </b>/**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */<b>
+} ZSTD_inBuffer;
+</b></pre><BR>
+<pre><b>typedef struct ZSTD_outBuffer_s {
+  void*  dst;         </b>/**< start of output buffer */<b>
+  size_t size;        </b>/**< size of output buffer */<b>
+  size_t pos;         </b>/**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */<b>
+} ZSTD_outBuffer;
+</b></pre><BR>
+<a name="Chapter10"></a><h2>Streaming compression - HowTo</h2><pre>
+  A ZSTD_CStream object is required to track streaming operation.
+  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
+  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
+
+  For parallel execution, use one separate ZSTD_CStream per thread.
+
+  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
+
+  Parameters are sticky : when starting a new compression on the same context,
+  it will re-use the same sticky parameters as previous compression session.
+  When in doubt, it's recommended to fully initialize the context before usage.
+  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
+  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
+  set more specific parameters, the pledged source size, or load a dictionary.
+
+  Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to
+  consume input stream. The function will automatically update both `pos`
+  fields within `input` and `output`.
+  Note that the function may not consume the entire input, for example, because
+  the output buffer is already full, in which case `input.pos < input.size`.
+  The caller must check if input has been entirely consumed.
+  If not, the caller must make some room to receive more compressed data,
+  and then present again remaining input data.
+  note: ZSTD_e_continue is guaranteed to make some forward progress when called,
+        but doesn't guarantee maximal forward progress. This is especially relevant
+        when compressing with multiple threads. The call won't block if it can
+        consume some input, but if it can't it will wait for some, but not all,
+        output to be flushed.
+ @return : provides a minimum amount of data remaining to be flushed from internal buffers
+           or an error code, which can be tested using ZSTD_isError().
+
+  At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
+  using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated.
+  Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0).
+  In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush.
+  You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the
+  operation.
+  note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will
+        block until the flush is complete or the output buffer is full.
+  @return : 0 if internal buffers are entirely flushed,
+            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+            or an error code, which can be tested using ZSTD_isError().
+
+  Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame.
+  It will perform a flush and write frame epilogue.
+  The epilogue is required for decoders to consider a frame completed.
+  flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush.
+  You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to
+  start a new frame.
+  note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will
+        block until the flush is complete or the output buffer is full.
+  @return : 0 if frame fully completed and fully flushed,
+            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+            or an error code, which can be tested using ZSTD_isError().
+
+ 
+<BR></pre>
+
+<pre><b>typedef ZSTD_CCtx ZSTD_CStream;  </b>/**< CCtx and CStream are now effectively same object (>= v1.3.0) */<b>
+</b></pre><BR>
+<h3>ZSTD_CStream management functions</h3><pre></pre><b><pre>ZSTD_CStream* ZSTD_createCStream(void);
+size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
+</pre></b><BR>
+<h3>Streaming compression functions</h3><pre></pre><b><pre>typedef enum {
+    ZSTD_e_continue=0, </b>/* collect more data, encoder decides when to output compressed result, for optimal compression ratio */<b>
+    ZSTD_e_flush=1,    </b>/* flush any data provided so far,<b>
+                        * it creates (at least) one new block, that can be decoded immediately on reception;
+                        * frame will continue: any future data can still reference previously compressed data, improving compression.
+                        * note : multithreaded compression will block to flush as much output as possible. */
+    ZSTD_e_end=2       </b>/* flush any remaining data _and_ close current frame.<b>
+                        * note that frame is only closed after compressed data is fully flushed (return value == 0).
+                        * After that point, any additional data starts a new frame.
+                        * note : each frame is independent (does not reference any content from previous frame).
+                        : note : multithreaded compression will block to flush as much output as possible. */
+} ZSTD_EndDirective;
+</pre></b><BR>
+<pre><b>size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+                             ZSTD_outBuffer* output,
+                             ZSTD_inBuffer* input,
+                             ZSTD_EndDirective endOp);
+</b><p>  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
+  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
+  - outpot->pos must be <= dstCapacity, input->pos must be <= srcSize
+  - outpot->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
+  - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
+  - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available,
+                                                  and then immediately returns, just indicating that there is some data remaining to be flushed.
+                                                  The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
+  - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
+  - @return provides a minimum amount of data remaining to be flushed from internal buffers
+            or an error code, which can be tested using ZSTD_isError().
+            if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
+            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
+            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
+  - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
+            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
+            Before starting a new compression job, or changing compression parameters,
+            it is required to fully flush internal buffers.
+ 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_CStreamInSize(void);    </b>/**< recommended size for input buffer */<b>
+</b></pre><BR>
+<pre><b>size_t ZSTD_CStreamOutSize(void);   </b>/**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */<b>
+</b></pre><BR>
+<a name="Chapter11"></a><h2>This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and</h2><pre> ZSTD_compressStream2(). It is redundent, but is still fully supported.
+ Advanced parameters and dictionary compression can only be used through the
+ new API.
+<BR></pre>
+
+<a name="Chapter12"></a><h2>Equivalent to:</h2><pre>
+     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ 
+<BR></pre>
+
+<a name="Chapter13"></a><h2>Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).</h2><pre> NOTE: The return value is different. ZSTD_compressStream() returns a hint for
+ the next read size (if non-zero and not an error). ZSTD_compressStream2()
+ returns the number of bytes left to flush (if non-zero and not an error).
+ 
+<BR></pre>
+
+<a name="Chapter14"></a><h2>Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush).</h2><pre></pre>
+
+<a name="Chapter15"></a><h2>Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end).</h2><pre></pre>
+
+<a name="Chapter16"></a><h2>Streaming decompression - HowTo</h2><pre>
+  A ZSTD_DStream object is required to track streaming operations.
+  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
+  ZSTD_DStream objects can be re-used multiple times.
+
+  Use ZSTD_initDStream() to start a new decompression operation.
+ @return : recommended first input size
+  Alternatively, use advanced API to set specific properties.
+
+  Use ZSTD_decompressStream() repetitively to consume your input.
+  The function will update both `pos` fields.
+  If `input.pos < input.size`, some input has not been consumed.
+  It's up to the caller to present again remaining data.
+  The function tries to flush all data decoded immediately, respecting output buffer size.
+  If `output.pos < output.size`, decoder has flushed everything it could.
+  But if `output.pos == output.size`, there might be some data left within internal buffers.,
+  In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
+  Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
+ @return : 0 when a frame is completely decoded and fully flushed,
+        or an error code, which can be tested using ZSTD_isError(),
+        or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
+                                the return value is a suggested next input size (just a hint for better latency)
+                                that will never request more than the remaining frame size.
+ 
+<BR></pre>
+
+<pre><b>typedef ZSTD_DCtx ZSTD_DStream;  </b>/**< DCtx and DStream are now effectively same object (>= v1.3.0) */<b>
+</b></pre><BR>
+<h3>ZSTD_DStream management functions</h3><pre></pre><b><pre>ZSTD_DStream* ZSTD_createDStream(void);
+size_t ZSTD_freeDStream(ZSTD_DStream* zds);
+</pre></b><BR>
+<h3>Streaming decompression functions</h3><pre></pre><b><pre>size_t ZSTD_initDStream(ZSTD_DStream* zds);
+size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+</pre></b><BR>
+<pre><b>size_t ZSTD_DStreamInSize(void);    </b>/*!< recommended size for input buffer */<b>
+</b></pre><BR>
+<pre><b>size_t ZSTD_DStreamOutSize(void);   </b>/*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */<b>
+</b></pre><BR>
+<a name="Chapter17"></a><h2>Simple dictionary API</h2><pre></pre>
+
+<pre><b>size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                         const void* dict,size_t dictSize,
+                               int compressionLevel);
+</b><p>  Compression at an explicit compression level using a Dictionary.
+  A dictionary can be any arbitrary data segment (also called a prefix),
+  or a buffer with specified information (see dictBuilder/zdict.h).
+  Note : This function loads the dictionary, resulting in significant startup delay.
+         It's intended for a dictionary used only once.
+  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                 void* dst, size_t dstCapacity,
+                           const void* src, size_t srcSize,
+                           const void* dict,size_t dictSize);
+</b><p>  Decompression using a known Dictionary.
+  Dictionary must be identical to the one used during compression.
+  Note : This function loads the dictionary, resulting in significant startup delay.
+         It's intended for a dictionary used only once.
+  Note : When `dict == NULL || dictSize < 8` no dictionary is used. 
+</p></pre><BR>
+
+<a name="Chapter18"></a><h2>Bulk processing dictionary API</h2><pre></pre>
+
+<pre><b>ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
+                             int compressionLevel);
+</b><p>  When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once.
+  ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost.
+  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
+  Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
+  Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. 
+</p></pre><BR>
+
+<pre><b>size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
+</b><p>  Function frees memory allocated by ZSTD_createCDict(). 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                          const void* src, size_t srcSize,
+                          const ZSTD_CDict* cdict);
+</b><p>  Compression using a digested Dictionary.
+  Recommended when same dictionary is used multiple times.
+  Note : compression level is _decided at dictionary creation time_,
+     and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) 
+</p></pre><BR>
+
+<pre><b>ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
+</b><p>  Create a digested dictionary, ready to start decompression operation without startup delay.
+  dictBuffer can be released after DDict creation, as its content is copied inside DDict. 
+</p></pre><BR>
+
+<pre><b>size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
+</b><p>  Function frees memory allocated with ZSTD_createDDict() 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const ZSTD_DDict* ddict);
+</b><p>  Decompression using a digested Dictionary.
+  Recommended when same dictionary is used multiple times. 
+</p></pre><BR>
+
+<a name="Chapter19"></a><h2>Dictionary helper functions</h2><pre></pre>
+
+<pre><b>unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
+</b><p>  Provides the dictID stored within dictionary.
+  if @return == 0, the dictionary is not conformant with Zstandard specification.
+  It can still be loaded, but as a content-only dictionary. 
+</p></pre><BR>
+
+<pre><b>unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
+</b><p>  Provides the dictID of the dictionary loaded into `ddict`.
+  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. 
+</p></pre><BR>
+
+<pre><b>unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
+</b><p>  Provides the dictID required to decompressed the frame stored within `src`.
+  If @return == 0, the dictID could not be decoded.
+  This could for one of the following reasons :
+  - The frame does not require a dictionary to be decoded (most common case).
+  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
+    Note : this use case also happens when using a non-conformant dictionary.
+  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
+  - This is not a Zstandard frame.
+  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. 
+</p></pre><BR>
+
+<a name="Chapter20"></a><h2>Advanced dictionary and prefix API</h2><pre>
+ This API allows dictionaries to be used with ZSTD_compress2(),
+ ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
+ only reset with the context is reset with ZSTD_reset_parameters or
+ ZSTD_reset_session_and_parameters. Prefixes are single-use.
+<BR></pre>
+
+<pre><b>size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+</b><p>  Create an internal CDict from `dict` buffer.
+  Decompression will have to use same dictionary.
+ @result : 0, or an error code (which can be tested with ZSTD_isError()).
+  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
+           meaning "return to no-dictionary mode".
+  Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
+           To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
+  Note 2 : Loading a dictionary involves building tables.
+           It's also a CPU consuming operation, with non-negligible impact on latency.
+           Tables are dependent on compression parameters, and for this reason,
+           compression parameters can no longer be changed after loading a dictionary.
+  Note 3 :`dict` content will be copied internally.
+           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
+           In such a case, dictionary buffer must outlive its users.
+  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
+           to precisely select how dictionary content must be interpreted. 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
+</b><p>  Reference a prepared dictionary, to be used for all next compressed frames.
+  Note that compression parameters are enforced from within CDict,
+  and supersede any compression parameter previously set within CCtx.
+  The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
+  The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
+  The dictionary will remain valid for future compressed frames using same CCtx.
+ @result : 0, or an error code (which can be tested with ZSTD_isError()).
+  Special : Referencing a NULL CDict means "return to no-dictionary mode".
+  Note 1 : Currently, only one dictionary can be managed.
+           Referencing a new dictionary effectively "discards" any previous one.
+  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
+                     const void* prefix, size_t prefixSize);
+</b><p>  Reference a prefix (single-usage dictionary) for next compressed frame.
+  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
+  Decompression will need same prefix to properly regenerate data.
+  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
+  but performs much faster, especially during decompression (compression speed is tunable with compression level).
+ @result : 0, or an error code (which can be tested with ZSTD_isError()).
+  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
+           Its content must remain unmodified during compression.
+  Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
+           ensure that the window size is large enough to contain the entire source.
+           See ZSTD_c_windowLog.
+  Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
+           It's a CPU consuming operation, with non-negligible impact on latency.
+           If there is a need to use the same prefix multiple times, consider loadDictionary instead.
+  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dm_rawContent).
+           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. 
+</p></pre><BR>
+
 <pre><b>size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
 </b><p>  Create an internal DDict from dict buffer,
   to be used to decompress next frames.
@@ -793,15 +857,25 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
-</b><p>  Return a DCtx to clean state.
-  Session and parameters can be reset jointly or separately.
-  Parameters can only be reset when no active frame is being decompressed.
- @return : 0, or an error code, which can be tested with ZSTD_isError()
- 
+<pre><b>size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
+size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
+size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+</b><p>  These functions give the _current_ memory usage of selected object.
+  Note that object memory usage can evolve (increase or decrease) over time. 
 </p></pre><BR>
 
-<a name="Chapter14"></a><h2>experimental API (static linking only)</h2><pre>
+<a name="Chapter21"></a><h2>ADVANCED AND EXPERIMENTAL FUNCTIONS</h2><pre>
+ The definitions in the following section are considered experimental.
+ They are provided for advanced scenarios.
+ They should never be used with a dynamic library, as prototypes may change in the future.
+ Use them only in association with static linking.
+ 
+<BR></pre>
+
+<a name="Chapter22"></a><h2>experimental API (static linking only)</h2><pre>
  The following symbols and constants
  are not planned to join "stable API" status in the near future.
  They can still change in future versions.
@@ -890,12 +964,21 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
     ZSTD_dictForceCopy     = 2, </b>/* Always copy the dictionary. */<b>
 } ZSTD_dictAttachPref_e;
 </b></pre><BR>
-<a name="Chapter15"></a><h2>Frame size functions</h2><pre></pre>
+<pre><b>typedef enum {
+  ZSTD_lcm_auto = 0,          </b>/**< Automatically determine the compression mode based on the compression level.<b>
+                               *   Negative compression levels will be uncompressed, and positive compression
+                               *   levels will be compressed. */
+  ZSTD_lcm_huffman = 1,       </b>/**< Always attempt Huffman compression. Uncompressed literals will still be<b>
+                               *   emitted if Huffman compression is not profitable. */
+  ZSTD_lcm_uncompressed = 2,  </b>/**< Always emit uncompressed literals. */<b>
+} ZSTD_literalCompressionMode_e;
+</b></pre><BR>
+<a name="Chapter23"></a><h2>Frame size functions</h2><pre></pre>
 
 <pre><b>unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
-</b><p>  `src` should point the start of a series of ZSTD encoded and/or skippable frames
+</b><p>  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
   `srcSize` must be the _exact_ size of this series
-       (i.e. there should be a frame boundary exactly at `srcSize` bytes after `src`)
+       (i.e. there should be a frame boundary at `src + srcSize`)
   @return : - decompressed size of all data in all successive frames
             - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
             - if an error occurred: ZSTD_CONTENTSIZE_ERROR
@@ -915,13 +998,27 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
             however it does mean that all frame data must be present and valid. 
 </p></pre><BR>
 
+<a name="Chapter24"></a><h2>ZSTD_decompressBound() :</h2><pre>  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+  `srcSize` must be the _exact_ size of this series
+       (i.e. there should be a frame boundary at `src + srcSize`)
+  @return : - upper-bound for the decompressed size of all data in all successive frames
+            - if an error occured: ZSTD_CONTENTSIZE_ERROR
+
+  note 1  : an error can occur if `src` contains an invalid or incorrectly formatted frame.
+  note 2  : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
+            in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value.
+  note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
+              upper-bound = # blocks * min(128 KB, Window_Size)
+ 
+<BR></pre>
+
 <pre><b>size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
 </b><p>  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
  @return : size of the Frame Header,
            or an error code (if srcSize is too small) 
 </p></pre><BR>
 
-<a name="Chapter16"></a><h2>Memory management</h2><pre></pre>
+<a name="Chapter25"></a><h2>Memory management</h2><pre></pre>
 
 <pre><b>size_t ZSTD_estimateCCtxSize(int compressionLevel);
 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
@@ -933,7 +1030,7 @@ size_t ZSTD_estimateDCtxSize(void);
   It will also consider src size to be arbitrarily "large", which is worst case.
   If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
   ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
-  ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
+  ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
   Note : CCtx size estimation is only correct for single-threaded compression. 
 </p></pre><BR>
 
@@ -946,7 +1043,7 @@ size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
   It will also consider src size to be arbitrarily "large", which is worst case.
   If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
   ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
-  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
+  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
   Note : CStream size estimation is only correct for single-threaded compression.
   ZSTD_DStream memory budget depends on window Size.
   This information can be passed manually, using ZSTD_estimateDStreamSize,
@@ -1001,7 +1098,7 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  </b>/**< t
  
 </p></pre><BR>
 
-<a name="Chapter17"></a><h2>Advanced compression functions</h2><pre></pre>
+<a name="Chapter26"></a><h2>Advanced compression functions</h2><pre></pre>
 
 <pre><b>ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
 </b><p>  Create a digested dictionary for compression
@@ -1076,10 +1173,10 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  </b>/**< t
 size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
 </b><p>  Quick howto :
   - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
-  - ZSTD_CCtxParam_setParameter() : Push parameters one by one into
-                                    an existing ZSTD_CCtx_params structure.
-                                    This is similar to
-                                    ZSTD_CCtx_setParameter().
+  - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
+                                     an existing ZSTD_CCtx_params structure.
+                                     This is similar to
+                                     ZSTD_CCtx_setParameter().
   - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
                                     an existing CCtx.
                                     These parameters will be applied to
@@ -1109,7 +1206,7 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
+<pre><b>size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
 </b><p>  Similar to ZSTD_CCtx_setParameter.
   Set one compression parameter, selected by enum ZSTD_cParameter.
   Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams().
@@ -1117,7 +1214,7 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_CCtxParam_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
+<pre><b>size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
 </b><p> Similar to ZSTD_CCtx_getParameter.
  Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
  @result : 0, or an error code (which can be tested with ZSTD_isError()).
@@ -1146,7 +1243,7 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
  
 </p></pre><BR>
 
-<a name="Chapter18"></a><h2>Advanced decompression functions</h2><pre></pre>
+<a name="Chapter27"></a><h2>Advanced decompression functions</h2><pre></pre>
 
 <pre><b>unsigned ZSTD_isFrame(const void* buffer, size_t size);
 </b><p>  Tells if the content of `buffer` starts with a valid Frame Identifier.
@@ -1162,30 +1259,6 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
   it must remain read accessible throughout the lifetime of DDict 
 </p></pre><BR>
 
-<pre><b>unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
-</b><p>  Provides the dictID stored within dictionary.
-  if @return == 0, the dictionary is not conformant with Zstandard specification.
-  It can still be loaded, but as a content-only dictionary. 
-</p></pre><BR>
-
-<pre><b>unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
-</b><p>  Provides the dictID of the dictionary loaded into `ddict`.
-  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
-  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. 
-</p></pre><BR>
-
-<pre><b>unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
-</b><p>  Provides the dictID required to decompressed the frame stored within `src`.
-  If @return == 0, the dictID could not be decoded.
-  This could for one of the following reasons :
-  - The frame does not require a dictionary to be decoded (most common case).
-  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
-    Note : this use case also happens when using a non-conformant dictionary.
-  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
-  - This is not a Zstandard frame.
-  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. 
-</p></pre><BR>
-
 <pre><b>size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
 </b><p>  Same as ZSTD_DCtx_loadDictionary(),
   but references `dict` content instead of copying it into `dctx`.
@@ -1232,20 +1305,74 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
  
 </p></pre><BR>
 
-<a name="Chapter19"></a><h2>Advanced streaming functions</h2><pre>  Warning : most of these functions are now redundant with the Advanced API.
+<a name="Chapter28"></a><h2>Advanced streaming functions</h2><pre>  Warning : most of these functions are now redundant with the Advanced API.
   Once Advanced API reaches "stable" status,
   redundant functions will be deprecated, and then at some point removed.
 <BR></pre>
 
-<h3>Advanced Streaming compression functions</h3><pre></pre><b><pre>size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);   </b>/**< pledgedSrcSize must be correct. If it is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, "0" also disables frame content size field. It may be enabled in the future. */<b>
-size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); </b>/**< creates of an internal CDict (incompatible with static CCtx), except if dict == NULL or dictSize < 8, in which case no dict is used. Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.*/<b>
+<h3>Advanced Streaming compression functions</h3><pre></pre><b><pre></b>/**! ZSTD_initCStream_srcSize() :<b>
+ * This function is deprecated, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *
+ * pledgedSrcSize must be correct. If it is not known at init time, use
+ * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
+ * "0" also disables frame content size field. It may be enabled in the future.
+ */
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);
+</b>/**! ZSTD_initCStream_usingDict() :<b>
+ * This function is deprecated, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * Creates of an internal CDict (incompatible with static CCtx), except if
+ * dict == NULL or dictSize < 8, in which case no dict is used.
+ * Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if
+ * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
+ */
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
+</b>/**! ZSTD_initCStream_advanced() :<b>
+ * This function is deprecated, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setZstdParams(zcs, params); // Set the zstd params and leave the rest as-is
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * pledgedSrcSize must be correct. If srcSize is not known at init time, use
+ * value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy.
+ */
 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
-                                             ZSTD_parameters params, unsigned long long pledgedSrcSize);  </b>/**< pledgedSrcSize must be correct. If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy. */<b>
-size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);  </b>/**< note : cdict will just be referenced, and must outlive compression session */<b>
-size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize);  </b>/**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters. pledgedSrcSize must be correct. If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. */<b>
+                                             ZSTD_parameters params, unsigned long long pledgedSrcSize);
+</b>/**! ZSTD_initCStream_usingCDict() :<b>
+ * This function is deprecated, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * note : cdict will just be referenced, and must outlive compression session
+ */
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+</b>/**! ZSTD_initCStream_usingCDict_advanced() :<b>
+ * This function is deprecated, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setZstdFrameParams(zcs, fParams); // Set the zstd frame params and leave the rest as-is
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
+ * pledgedSrcSize must be correct. If srcSize is not known at init time, use
+ * value ZSTD_CONTENTSIZE_UNKNOWN.
+ */
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize);
 </pre></b><BR>
 <pre><b>size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
-</b><p>  start a new frame, using same parameters from previous frame.
+</b><p> This function is deprecated, and is equivalent to:
+     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+
+  start a new frame, using same parameters from previous frame.
   This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
   Note that zcs must be init at least once before using ZSTD_resetCStream().
   If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
@@ -1284,14 +1411,14 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict*
 size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);  </b>/**< note : ddict is referenced, it must outlive decompression session */<b>
 size_t ZSTD_resetDStream(ZSTD_DStream* zds);  </b>/**< re-use decompression parameters from previous init; saves dictionary loading */<b>
 </pre></b><BR>
-<a name="Chapter20"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
+<a name="Chapter29"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
   This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
   But it's also a complex one, with several restrictions, documented below.
   Prefer normal streaming API for an easier experience.
  
 <BR></pre>
 
-<a name="Chapter21"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
+<a name="Chapter30"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
   A ZSTD_CCtx object is required to track streaming operations.
   Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
   ZSTD_CCtx object can be re-used multiple times within successive compression operations.
@@ -1327,7 +1454,7 @@ size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
 size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   </b>/* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */<b>
 size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); </b>/**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */<b>
 </pre></b><BR>
-<a name="Chapter22"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
+<a name="Chapter31"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
   A ZSTD_DCtx object is required to track streaming operations.
   Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
   A ZSTD_DCtx object can be re-used multiple times.
@@ -1409,7 +1536,7 @@ typedef struct {
     unsigned checksumFlag;
 } ZSTD_frameHeader;
 </pre></b><BR>
-<a name="Chapter23"></a><h2>ZSTD_getFrameHeader() :</h2><pre>  decode Frame Header, or requires larger `srcSize`.
+<a name="Chapter32"></a><h2>ZSTD_getFrameHeader() :</h2><pre>  decode Frame Header, or requires larger `srcSize`.
  @return : 0, `zfhPtr` is correctly filled,
           >0, `srcSize` is too small, value is wanted `srcSize` amount,
            or an error code, which can be tested using ZSTD_isError() 
@@ -1425,7 +1552,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
 
 <pre><b>typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
 </b></pre><BR>
-<a name="Chapter24"></a><h2>Block level API</h2><pre></pre>
+<a name="Chapter33"></a><h2>Block level API</h2><pre></pre>
 
 <pre><b></b><p>    Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
     User will have to take in charge required information to regenerate data, such as compressed and content sizes.
diff --git a/programs/Makefile b/programs/Makefile
index 692980e4..64dcae00 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -165,7 +165,7 @@ $(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP)
 zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP)
 zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD)
 zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
-zstd : $(ZSTDLIB_FILES) zstdcli.o util.o fileio.o benchfn.o benchzstd.o datagen.o dibio.o
+zstd : $(ZSTDLIB_FILES) zstdcli.o util.o timefn.o fileio.o benchfn.o benchzstd.o datagen.o dibio.o
 	@echo "$(THREAD_MSG)"
 	@echo "$(ZLIB_MSG)"
 	@echo "$(LZMA_MSG)"
@@ -183,13 +183,13 @@ zstd-release: zstd
 zstd32 : CPPFLAGS += $(THREAD_CPP)
 zstd32 : LDFLAGS  += $(THREAD_LD)
 zstd32 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
-zstd32 : $(ZSTDLIB_FILES) zstdcli.c util.c fileio.c benchfn.c benchzstd.c datagen.c dibio.c
+zstd32 : $(ZSTDLIB_FILES) zstdcli.c util.c timefn.c fileio.c benchfn.c benchzstd.c datagen.c dibio.c
 ifneq (,$(filter Windows%,$(OS)))
 	windres/generate_res.bat
 endif
 	$(CC) -m32 $(FLAGS) $^ $(RES32_FILE) -o $@$(EXT)
 
-zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o util.o fileio.c benchfn.o benchzstd.o datagen.o dibio.o
+zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o util.o fileio.c benchfn.o benchzstd.o timefn.o datagen.o dibio.o
 	$(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS)
 
 zstd-nomt : THREAD_CPP :=
@@ -222,13 +222,13 @@ zstd-pgo :
 
 # minimal target, with only zstd compression and decompression. no bench. no legacy.
 zstd-small: CFLAGS = -Os -s
-zstd-frugal zstd-small: $(ZSTD_FILES) zstdcli.c util.c fileio.c
+zstd-frugal zstd-small: $(ZSTD_FILES) zstdcli.c util.c timefn.c fileio.c
 	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT $^ -o $@$(EXT)
 
-zstd-decompress: $(ZSTDCOMMON_FILES) $(ZSTDDECOMP_FILES) zstdcli.c util.c fileio.c
+zstd-decompress: $(ZSTDCOMMON_FILES) $(ZSTDDECOMP_FILES) zstdcli.c util.c timefn.c fileio.c
 	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS $^ -o $@$(EXT)
 
-zstd-compress: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) zstdcli.c util.c fileio.c
+zstd-compress: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) zstdcli.c util.c timefn.c fileio.c
 	$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS $^ -o $@$(EXT)
 
 zstdmt: zstd
@@ -265,9 +265,9 @@ man: zstd.1 zstdgrep.1 zstdless.1
 
 .PHONY: clean-man
 clean-man:
-	rm zstd.1
-	rm zstdgrep.1
-	rm zstdless.1
+	$(RM) zstd.1
+	$(RM) zstdgrep.1
+	$(RM) zstdless.1
 
 .PHONY: preview-man
 preview-man: clean-man man
diff --git a/programs/benchfn.c b/programs/benchfn.c
index f5118d08..0932d155 100644
--- a/programs/benchfn.c
+++ b/programs/benchfn.c
@@ -13,25 +13,20 @@
 /* *************************************
 *  Includes
 ***************************************/
-#include "platform.h"    /* Large Files support */
-#include "util.h"        /* UTIL_getFileSize, UTIL_sleep */
 #include <stdlib.h>      /* malloc, free */
 #include <string.h>      /* memset */
-#include <stdio.h>       /* fprintf, fopen */
 #undef NDEBUG            /* assert must not be disabled */
 #include <assert.h>      /* assert */
 
-#include "mem.h"
+#include "timefn.h"        /* UTIL_time_t, UTIL_getTime */
 #include "benchfn.h"
 
 
 /* *************************************
 *  Constants
 ***************************************/
-#define TIMELOOP_MICROSEC     (1*1000000ULL) /* 1 second */
+#define TIMELOOP_MICROSEC     SEC_TO_MICRO      /* 1 second */
 #define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */
-#define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */
-#define COOLPERIOD_SEC        10
 
 #define KB *(1 <<10)
 #define MB *(1 <<20)
@@ -39,14 +34,16 @@
 
 
 /* *************************************
-*  Errors
+*  Debug errors
 ***************************************/
-#ifndef DEBUG
-#  define DEBUG 0
+#if defined(DEBUG) && (DEBUG >= 1)
+#  include <stdio.h>       /* fprintf */
+#  define DISPLAY(...)       fprintf(stderr, __VA_ARGS__)
+#  define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
+#else
+#  define DEBUGOUTPUT(...)
 #endif
 
-#define DISPLAY(...)       fprintf(stderr, __VA_ARGS__)
-#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
 
 /* error without displaying */
 #define RETURN_QUIET_ERROR(retValue, ...) {           \
@@ -116,15 +113,7 @@ BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
     {   size_t i;
         for(i = 0; i < p.blockCount; i++) {
             memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]);  /* warm up and erase result buffer */
-        }
-#if 0
-        /* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops
-         * (Makes former slower)
-         */
-        UTIL_sleepMilli(5);  /* give processor time to other processes */
-        UTIL_waitForNextTick();
-#endif
-    }
+    }   }
 
     /* benchmark */
     {   UTIL_time_t const clockStart = UTIL_getTime();
@@ -146,9 +135,9 @@ BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
             }   }
         }  /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
 
-        {   U64 const totalTime = UTIL_clockSpanNano(clockStart);
+        {   PTime const totalTime = UTIL_clockSpanNano(clockStart);
             BMK_runTime_t rt;
-            rt.nanoSecPerRun = totalTime / nbLoops;
+            rt.nanoSecPerRun = (double)totalTime / nbLoops;
             rt.sumOfReturn = dstSize;
             return BMK_setValid_runTime(rt);
     }   }
@@ -158,9 +147,9 @@ BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
 /* ====  Benchmarking any function, providing intermediate results  ==== */
 
 struct BMK_timedFnState_s {
-    U64 timeSpent_ns;
-    U64 timeBudget_ns;
-    U64 runBudget_ns;
+    PTime timeSpent_ns;
+    PTime timeBudget_ns;
+    PTime runBudget_ns;
     BMK_runTime_t fastestRun;
     unsigned nbLoops;
     UTIL_time_t coolTime;
@@ -174,8 +163,20 @@ BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
     return r;
 }
 
-void BMK_freeTimedFnState(BMK_timedFnState_t* state) {
-    free(state);
+void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }
+
+BMK_timedFnState_t*
+BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
+{
+    typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1];  /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
+    typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align;  /* force tfs to be aligned at its next best position */
+    size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
+    BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
+    if (buffer == NULL) return NULL;
+    if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
+    if ((size_t)buffer % tfs_alignment) return NULL;  /* buffer must be properly aligned */
+    BMK_resetTimedFnState(r, total_ms, run_ms);
+    return r;
 }
 
 void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
@@ -184,9 +185,9 @@ void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms,
     if (!run_ms) run_ms = 1;
     if (run_ms > total_ms) run_ms = total_ms;
     timedFnState->timeSpent_ns = 0;
-    timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000;
-    timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000;
-    timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL);
+    timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
+    timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
+    timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000;  /* hopefully large enough : must be larger than any potential measurement */
     timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
     timedFnState->nbLoops = 1;
     timedFnState->coolTime = UTIL_getTime();
@@ -208,37 +209,27 @@ int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
 BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
                                   BMK_benchParams_t p)
 {
-    U64 const runBudget_ns = cont->runBudget_ns;
-    U64 const runTimeMin_ns = runBudget_ns / 2;
+    PTime const runBudget_ns = cont->runBudget_ns;
+    PTime const runTimeMin_ns = runBudget_ns / 2;
     int completed = 0;
     BMK_runTime_t bestRunTime = cont->fastestRun;
 
     while (!completed) {
-        BMK_runOutcome_t runResult;
-
-        /* Overheat protection */
-        if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) {
-            DEBUGOUTPUT("\rcooling down ...    \r");
-            UTIL_sleep(COOLPERIOD_SEC);
-            cont->coolTime = UTIL_getTime();
-        }
-
-        /* reinitialize capacity */
-        runResult = BMK_benchFunction(p, cont->nbLoops);
+        BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
 
         if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
             return runResult;
         }
 
         {   BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
-            U64 const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
+            double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
 
-            cont->timeSpent_ns += loopDuration_ns;
+            cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
 
             /* estimate nbLoops for next run to last approximately 1 second */
             if (loopDuration_ns > (runBudget_ns / 50)) {
-                U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
-                cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1;
+                double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
+                cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1;
             } else {
                 /* previous run was too short : blindly increase workload by x multiplier */
                 const unsigned multiplier = 10;
diff --git a/programs/benchfn.h b/programs/benchfn.h
index 3ca36e36..19e05658 100644
--- a/programs/benchfn.h
+++ b/programs/benchfn.h
@@ -31,7 +31,7 @@ extern "C" {
 /* BMK_runTime_t: valid result return type */
 
 typedef struct {
-    unsigned long long nanoSecPerRun;  /* time per iteration (over all blocks) */
+    double nanoSecPerRun;  /* time per iteration (over all blocks) */
     size_t sumOfReturn;         /* sum of return values */
 } BMK_runTime_t;
 
@@ -58,30 +58,31 @@ typedef size_t (*BMK_initFn_t)(void* initPayload);
 typedef unsigned (*BMK_errorFn_t)(size_t);
 
 
-/* BMK_benchFunction() parameters are provided through following structure.
- * This is preferable for readability,
- * as the number of parameters required is pretty large.
+/* BMK_benchFunction() parameters are provided via the following structure.
+ * A structure is preferable for readability,
+ * as the number of parameters required is fairly large.
  * No initializer is provided, because it doesn't make sense to provide some "default" :
- * all parameters should be specified by the caller */
+ * all parameters must be specified by the caller.
+ * optional parameters are labelled explicitly, and accept value NULL when not used */
 typedef struct {
-    BMK_benchFn_t benchFn;   /* the function to benchmark, over the set of blocks */
-    void* benchPayload;      /* pass custom parameters to benchFn  :
-                              * (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) */
-    BMK_initFn_t initFn;     /* (*initFn)(initPayload) is run once per run, at the beginning. */
-    void* initPayload;       /* Both arguments can be NULL, in which case nothing is run. */
-    BMK_errorFn_t errorFn;   /* errorFn will check each return value of benchFn over each block, to determine if it failed or not.
-                              * errorFn can be NULL, in which case no check is performed.
-                              * errorFn must return 0 when benchFn was successful, and >= 1 if it detects an error.
-                              * Execution is stopped as soon as an error is detected.
-                              * the triggering return value can be retrieved using BMK_extract_errorResult(). */
-    size_t blockCount;       /* number of blocks to operate benchFn on.
-                              * It's also the size of all array parameters :
-                              * srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults */
-    const void *const * srcBuffers; /* array of buffers to be operated on by benchFn */
-    const size_t* srcSizes;  /* array of the sizes of srcBuffers buffers */
-    void *const * dstBuffers;/* array of buffers to be written into by benchFn */
-    const size_t* dstCapacities; /* array of the capacities of dstBuffers buffers */
-    size_t* blockResults;    /* Optional: store the return value of benchFn for each block. Use NULL if this result is not requested. */
+    BMK_benchFn_t benchFn;    /* the function to benchmark, over the set of blocks */
+    void* benchPayload;       /* pass custom parameters to benchFn  :
+                               * (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) */
+    BMK_initFn_t initFn;      /* (*initFn)(initPayload) is run once per run, at the beginning. */
+    void* initPayload;        /* Both arguments can be NULL, in which case nothing is run. */
+    BMK_errorFn_t errorFn;    /* errorFn will check each return value of benchFn over each block, to determine if it failed or not.
+                               * errorFn can be NULL, in which case no check is performed.
+                               * errorFn must return 0 when benchFn was successful, and >= 1 if it detects an error.
+                               * Execution is stopped as soon as an error is detected.
+                               * the triggering return value can be retrieved using BMK_extract_errorResult(). */
+    size_t blockCount;        /* number of blocks to operate benchFn on.
+                               * It's also the size of all array parameters :
+                               * srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults */
+    const void *const * srcBuffers; /* read-only array of buffers to be operated on by benchFn */
+    const size_t* srcSizes;   /* read-only array containing sizes of srcBuffers */
+    void *const * dstBuffers; /* array of buffers to be written into by benchFn. This array is not optional, it must be provided even if unused by benchfn. */
+    const size_t* dstCapacities; /* read-only array containing capacities of dstBuffers. This array must be present. */
+    size_t* blockResults;     /* Optional: store the return value of benchFn for each block. Use NULL if this result is not requested. */
 } BMK_benchParams_t;
 
 
@@ -159,6 +160,21 @@ void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms,
 void BMK_freeTimedFnState(BMK_timedFnState_t* state);
 
 
+/* BMK_timedFnState_shell and BMK_initStatic_timedFnState() :
+ * Makes it possible to statically allocate a BMK_timedFnState_t on stack.
+ * BMK_timedFnState_shell is only there to allocate space,
+ * never ever access its members.
+ * BMK_timedFnState_t() actually accepts any buffer.
+ * It will check if provided buffer is large enough and is correctly aligned,
+ * and will return NULL if conditions are not respected.
+ */
+#define BMK_TIMEDFNSTATE_SIZE 64
+typedef union {
+    char never_access_space[BMK_TIMEDFNSTATE_SIZE];
+    long long alignment_enforcer;  /* must be aligned on 8-bytes boundaries */
+} BMK_timedFnState_shell;
+BMK_timedFnState_t* BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms);
+
 
 #endif   /* BENCH_FN_H_23876 */
 
diff --git a/programs/benchzstd.c b/programs/benchzstd.c
index 4bd42cfe..94ec5f25 100644
--- a/programs/benchzstd.c
+++ b/programs/benchzstd.c
@@ -28,6 +28,7 @@
 #include <errno.h>
 #include <assert.h>      /* assert */
 
+#include "timefn.h"      /* UTIL_time_t */
 #include "benchfn.h"
 #include "mem.h"
 #define ZSTD_STATIC_LINKING_ONLY
@@ -160,9 +161,13 @@ typedef struct {
 #define MIN(a,b)    ((a) < (b) ? (a) : (b))
 #define MAX(a,b)    ((a) > (b) ? (a) : (b))
 
-static void BMK_initCCtx(ZSTD_CCtx* ctx,
-    const void* dictBuffer, size_t dictBufferSize, int cLevel,
-    const ZSTD_compressionParameters* comprParams, const BMK_advancedParams_t* adv) {
+static void
+BMK_initCCtx(ZSTD_CCtx* ctx,
+            const void* dictBuffer, size_t dictBufferSize,
+            int cLevel,
+            const ZSTD_compressionParameters* comprParams,
+            const BMK_advancedParams_t* adv)
+{
     ZSTD_CCtx_reset(ctx, ZSTD_reset_session_and_parameters);
     if (adv->nbWorkers==1) {
         CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, 0));
@@ -175,13 +180,13 @@ static void BMK_initCCtx(ZSTD_CCtx* ctx,
     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog));
     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmBucketSizeLog, adv->ldmBucketSizeLog));
     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashRateLog, adv->ldmHashRateLog));
+    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, (int)comprParams->windowLog));
+    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, (int)comprParams->hashLog));
+    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, (int)comprParams->chainLog));
+    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, (int)comprParams->searchLog));
+    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, (int)comprParams->minMatch));
+    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, (int)comprParams->targetLength));
     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_literalCompressionMode, (int)adv->literalCompressionMode));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, comprParams->windowLog));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, comprParams->hashLog));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, comprParams->chainLog));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, comprParams->searchLog));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, comprParams->minMatch));
-    CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, comprParams->targetLength));
     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_strategy, comprParams->strategy));
     CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize));
 }
@@ -446,7 +451,7 @@ BMK_benchMemAdvancedNoAlloc(
                     cSize = cResult.sumOfReturn;
                     ratio = (double)srcSize / cSize;
                     {   BMK_benchResult_t newResult;
-                        newResult.cSpeed = ((U64)srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun);
+                        newResult.cSpeed = (U64)((double)srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun);
                         benchResult.cSize = cSize;
                         if (newResult.cSpeed > benchResult.cSpeed)
                             benchResult.cSpeed = newResult.cSpeed;
@@ -470,7 +475,7 @@ BMK_benchMemAdvancedNoAlloc(
                 }
 
                 {   BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome);
-                    U64 const newDSpeed = (srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun);
+                    U64 const newDSpeed = (U64)((double)srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun);
                     if (newDSpeed > benchResult.dSpeed)
                         benchResult.dSpeed = newDSpeed;
                 }
@@ -507,17 +512,21 @@ BMK_benchMemAdvancedNoAlloc(
                         pos = (U32)(u - bacc);
                         bNb = pos / (128 KB);
                         DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos);
-                        if (u>5) {
-                            int n;
+                        {   size_t const lowest = (u>5) ? 5 : u;
+                            size_t n;
                             DISPLAY("origin: ");
-                            for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
+                            for (n=lowest; n>0; n--)
+                                DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u-n]);
                             DISPLAY(" :%02X:  ", ((const BYTE*)srcBuffer)[u]);
-                            for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
+                            for (n=1; n<3; n++)
+                                DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
                             DISPLAY(" \n");
                             DISPLAY("decode: ");
-                            for (n=-5; n<0; n++) DISPLAY("%02X ", resultBuffer[u+n]);
+                            for (n=lowest; n>0; n++)
+                                DISPLAY("%02X ", resultBuffer[u-n]);
                             DISPLAY(" :%02X:  ", resultBuffer[u]);
-                            for (n=1; n<3; n++) DISPLAY("%02X ", resultBuffer[u+n]);
+                            for (n=1; n<3; n++)
+                                DISPLAY("%02X ", resultBuffer[u+n]);
                             DISPLAY(" \n");
                         }
                         break;
diff --git a/programs/benchzstd.h b/programs/benchzstd.h
index d6f5486a..376a80a9 100644
--- a/programs/benchzstd.h
+++ b/programs/benchzstd.h
@@ -105,17 +105,17 @@ typedef enum {
 } BMK_mode_t;
 
 typedef struct {
-    BMK_mode_t mode;            /* 0: all, 1: compress only 2: decode only */
-    unsigned nbSeconds;         /* default timing is in nbSeconds */
-    size_t blockSize;           /* Maximum size of each block*/
-    unsigned nbWorkers;         /* multithreading */
-    unsigned realTime;          /* real time priority */
-    int additionalParam;        /* used by python speed benchmark */
-    unsigned ldmFlag;           /* enables long distance matching */
-    unsigned ldmMinMatch;       /* below: parameters for long distance matching, see zstd.1.md */
-    unsigned ldmHashLog;
-    unsigned ldmBucketSizeLog;
-    unsigned ldmHashRateLog;
+    BMK_mode_t mode;        /* 0: all, 1: compress only 2: decode only */
+    unsigned nbSeconds;     /* default timing is in nbSeconds */
+    size_t blockSize;       /* Maximum size of each block*/
+    int nbWorkers;          /* multithreading */
+    unsigned realTime;      /* real time priority */
+    int additionalParam;    /* used by python speed benchmark */
+    int ldmFlag;            /* enables long distance matching */
+    int ldmMinMatch;        /* below: parameters for long distance matching, see zstd.1.md */
+    int ldmHashLog;
+    int ldmBucketSizeLog;
+    int ldmHashRateLog;
     ZSTD_literalCompressionMode_e literalCompressionMode;
 } BMK_advancedParams_t;
 
diff --git a/programs/dibio.c b/programs/dibio.c
index c9d214e7..12eb3268 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -29,6 +29,7 @@
 #include <errno.h>          /* errno */
 #include <assert.h>
 
+#include "timefn.h"         /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */
 #include "mem.h"            /* read */
 #include "error_private.h"
 #include "dibio.h"
diff --git a/programs/fileio.c b/programs/fileio.c
index 412ef476..30514d41 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -31,6 +31,7 @@
 #include <assert.h>
 #include <errno.h>      /* errno */
 #include <signal.h>
+#include "timefn.h"     /* UTIL_getTime, UTIL_clockSpanMicro */
 
 #if defined (_MSC_VER)
 #  include <sys/stat.h>
@@ -1546,10 +1547,12 @@ static unsigned FIO_fwriteSparse(FIO_prefs_t* const prefs, FILE* file, const voi
     return storedSkips;
 }
 
-static void FIO_fwriteSparseEnd(FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips)
+static void
+FIO_fwriteSparseEnd(FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips)
 {
     if (storedSkips>0) {
         assert(prefs->sparseFileSupport > 0);  /* storedSkips>0 implies sparse support is enabled */
+        (void)prefs;   /* assert can be disabled, in which case prefs becomes unused */
         if (LONG_SEEK(file, storedSkips-1, SEEK_CUR) != 0)
             EXM_THROW(69, "Final skip error (sparse file support)");
         /* last zero must be explicitly written,
diff --git a/programs/platform.h b/programs/platform.h
index 1a8f97bc..38ded872 100644
--- a/programs/platform.h
+++ b/programs/platform.h
@@ -87,8 +87,8 @@ extern "C" {
  * The following list of build macros tries to "guess" if target OS is likely unix-like, and therefore can #include <unistd.h>
  */
 #  elif !defined(_WIN32) \
-     && (defined(__unix__) || defined(__unix) \
-     || defined(__midipix__) || defined(__VMS) || defined(__HAIKU__))
+     && ( defined(__unix__) || defined(__unix) \
+       || defined(__midipix__) || defined(__VMS) || defined(__HAIKU__) )
 
 #    if defined(__linux__) || defined(__linux)
 #      ifndef _POSIX_C_SOURCE
@@ -108,6 +108,7 @@ extern "C" {
 
 #endif   /* PLATFORM_POSIX_VERSION */
 
+
 /*-*********************************************
 *  Detect if isatty() and fileno() are available
 ************************************************/
diff --git a/programs/timefn.c b/programs/timefn.c
new file mode 100644
index 00000000..096e1910
--- /dev/null
+++ b/programs/timefn.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2019-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* ===  Dependencies  === */
+
+#include "timefn.h"
+
+
+/*-****************************************
+*  Time functions
+******************************************/
+
+#if defined(_WIN32)   /* Windows */
+
+#include <stdlib.h>   /* abort */
+#include <stdio.h>    /* perror */
+
+UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; }
+
+PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
+{
+    static LARGE_INTEGER ticksPerSecond;
+    static int init = 0;
+    if (!init) {
+        if (!QueryPerformanceFrequency(&ticksPerSecond)) {
+            perror("timefn::QueryPerformanceFrequency");
+            abort();
+        }
+        init = 1;
+    }
+    return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
+}
+
+PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
+{
+    static LARGE_INTEGER ticksPerSecond;
+    static int init = 0;
+    if (!init) {
+        if (!QueryPerformanceFrequency(&ticksPerSecond)) {
+            perror("timefn::QueryPerformanceFrequency");
+            abort();
+        }
+        init = 1;
+    }
+    return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
+}
+
+
+
+#elif defined(__APPLE__) && defined(__MACH__)
+
+UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); }
+
+PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
+{
+    static mach_timebase_info_data_t rate;
+    static int init = 0;
+    if (!init) {
+        mach_timebase_info(&rate);
+        init = 1;
+    }
+    return (((clockEnd - clockStart) * (PTime)rate.numer) / ((PTime)rate.denom))/1000ULL;
+}
+
+PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
+{
+    static mach_timebase_info_data_t rate;
+    static int init = 0;
+    if (!init) {
+        mach_timebase_info(&rate);
+        init = 1;
+    }
+    return ((clockEnd - clockStart) * (PTime)rate.numer) / ((PTime)rate.denom);
+}
+
+
+
+#elif (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */) \
+    && defined(TIME_UTC) /* C11 requires timespec_get, but FreeBSD 11 lacks it, while still claiming C11 compliance */
+
+#include <stdlib.h>   /* abort */
+#include <stdio.h>    /* perror */
+
+UTIL_time_t UTIL_getTime(void)
+{
+    /* time must be initialized, othersize it may fail msan test.
+     * No good reason, likely a limitation of timespec_get() for some target */
+    UTIL_time_t time = UTIL_TIME_INITIALIZER;
+    if (timespec_get(&time, TIME_UTC) != TIME_UTC) {
+        perror("timefn::timespec_get");
+        abort();
+    }
+    return time;
+}
+
+static UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end)
+{
+    UTIL_time_t diff;
+    if (end.tv_nsec < begin.tv_nsec) {
+        diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec;
+        diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec;
+    } else {
+        diff.tv_sec = end.tv_sec - begin.tv_sec;
+        diff.tv_nsec = end.tv_nsec - begin.tv_nsec;
+    }
+    return diff;
+}
+
+PTime UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end)
+{
+    UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
+    PTime micro = 0;
+    micro += 1000000ULL * diff.tv_sec;
+    micro += diff.tv_nsec / 1000ULL;
+    return micro;
+}
+
+PTime UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end)
+{
+    UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
+    PTime nano = 0;
+    nano += 1000000000ULL * diff.tv_sec;
+    nano += diff.tv_nsec;
+    return nano;
+}
+
+
+
+#else   /* relies on standard C90 (note : clock_t measurements can be wrong when using multi-threading) */
+
+UTIL_time_t UTIL_getTime(void) { return clock(); }
+PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
+PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
+
+#endif
+
+
+
+/* returns time span in microseconds */
+PTime UTIL_clockSpanMicro(UTIL_time_t clockStart )
+{
+    UTIL_time_t const clockEnd = UTIL_getTime();
+    return UTIL_getSpanTimeMicro(clockStart, clockEnd);
+}
+
+/* returns time span in microseconds */
+PTime UTIL_clockSpanNano(UTIL_time_t clockStart )
+{
+    UTIL_time_t const clockEnd = UTIL_getTime();
+    return UTIL_getSpanTimeNano(clockStart, clockEnd);
+}
+
+void UTIL_waitForNextTick(void)
+{
+    UTIL_time_t const clockStart = UTIL_getTime();
+    UTIL_time_t clockEnd;
+    do {
+        clockEnd = UTIL_getTime();
+    } while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0);
+}
diff --git a/programs/timefn.h b/programs/timefn.h
new file mode 100644
index 00000000..d1ddd31b
--- /dev/null
+++ b/programs/timefn.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef TIME_FN_H_MODULE_287987
+#define TIME_FN_H_MODULE_287987
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include <sys/types.h>    /* utime */
+#if defined(_MSC_VER)
+#  include <sys/utime.h>  /* utime */
+#else
+#  include <utime.h>      /* utime */
+#endif
+#include <time.h>         /* clock_t, clock, CLOCKS_PER_SEC */
+
+
+
+/*-****************************************
+*  Local Types
+******************************************/
+
+#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+  typedef uint64_t           PTime;  /* Precise Time */
+#else
+  typedef unsigned long long PTime;  /* does not support compilers without long long support */
+#endif
+
+
+
+/*-****************************************
+*  Time functions
+******************************************/
+#if defined(_WIN32)   /* Windows */
+
+    #include <Windows.h>   /* LARGE_INTEGER */
+    typedef LARGE_INTEGER UTIL_time_t;
+    #define UTIL_TIME_INITIALIZER { { 0, 0 } }
+
+#elif defined(__APPLE__) && defined(__MACH__)
+
+    #include <mach/mach_time.h>
+    typedef PTime UTIL_time_t;
+    #define UTIL_TIME_INITIALIZER 0
+
+#elif (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */) \
+    && defined(TIME_UTC) /* C11 requires timespec_get, but FreeBSD 11 lacks it, while still claiming C11 compliance */
+
+    typedef struct timespec UTIL_time_t;
+    #define UTIL_TIME_INITIALIZER { 0, 0 }
+
+#else   /* relies on standard C90 (note : clock_t measurements can be wrong when using multi-threading) */
+
+    typedef clock_t UTIL_time_t;
+    #define UTIL_TIME_INITIALIZER 0
+
+#endif
+
+
+UTIL_time_t UTIL_getTime(void);
+PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd);
+PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd);
+
+#define SEC_TO_MICRO ((PTime)1000000)
+PTime UTIL_clockSpanMicro(UTIL_time_t clockStart);
+PTime UTIL_clockSpanNano(UTIL_time_t clockStart);
+
+void UTIL_waitForNextTick(void);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* TIME_FN_H_MODULE_287987 */
diff --git a/programs/util.c b/programs/util.c
index 622e5025..7b827d45 100644
--- a/programs/util.c
+++ b/programs/util.c
@@ -352,146 +352,18 @@ UTIL_createFileList(const char **inputNames, unsigned inputNamesNb,
     return fileTable;
 }
 
+
 /*-****************************************
 *  Console log
 ******************************************/
 int g_utilDisplayLevel;
 
 
+
 /*-****************************************
-*  Time functions
+*  count the number of physical cores
 ******************************************/
-#if defined(_WIN32)   /* Windows */
 
-UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; }
-
-U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
-{
-    static LARGE_INTEGER ticksPerSecond;
-    static int init = 0;
-    if (!init) {
-        if (!QueryPerformanceFrequency(&ticksPerSecond))
-            UTIL_DISPLAYLEVEL(1, "ERROR: QueryPerformanceFrequency() failure\n");
-        init = 1;
-    }
-    return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
-}
-
-U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
-{
-    static LARGE_INTEGER ticksPerSecond;
-    static int init = 0;
-    if (!init) {
-        if (!QueryPerformanceFrequency(&ticksPerSecond))
-            UTIL_DISPLAYLEVEL(1, "ERROR: QueryPerformanceFrequency() failure\n");
-        init = 1;
-    }
-    return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
-}
-
-#elif defined(__APPLE__) && defined(__MACH__)
-
-UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); }
-
-U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
-{
-    static mach_timebase_info_data_t rate;
-    static int init = 0;
-    if (!init) {
-        mach_timebase_info(&rate);
-        init = 1;
-    }
-    return (((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom))/1000ULL;
-}
-
-U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
-{
-    static mach_timebase_info_data_t rate;
-    static int init = 0;
-    if (!init) {
-        mach_timebase_info(&rate);
-        init = 1;
-    }
-    return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom);
-}
-
-#elif (PLATFORM_POSIX_VERSION >= 200112L) \
-   && (defined(__UCLIBC__)                \
-      || (defined(__GLIBC__)              \
-          && ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) \
-             || (__GLIBC__ > 2))))
-
-UTIL_time_t UTIL_getTime(void)
-{
-    UTIL_time_t time;
-    if (clock_gettime(CLOCK_MONOTONIC, &time))
-        UTIL_DISPLAYLEVEL(1, "ERROR: Failed to get time\n");   /* we could also exit() */
-    return time;
-}
-
-UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end)
-{
-    UTIL_time_t diff;
-    if (end.tv_nsec < begin.tv_nsec) {
-        diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec;
-        diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec;
-    } else {
-        diff.tv_sec = end.tv_sec - begin.tv_sec;
-        diff.tv_nsec = end.tv_nsec - begin.tv_nsec;
-    }
-    return diff;
-}
-
-U64 UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end)
-{
-    UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
-    U64 micro = 0;
-    micro += 1000000ULL * diff.tv_sec;
-    micro += diff.tv_nsec / 1000ULL;
-    return micro;
-}
-
-U64 UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end)
-{
-    UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
-    U64 nano = 0;
-    nano += 1000000000ULL * diff.tv_sec;
-    nano += diff.tv_nsec;
-    return nano;
-}
-
-#else   /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */
-
-UTIL_time_t UTIL_getTime(void) { return clock(); }
-U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
-U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
-
-#endif
-
-/* returns time span in microseconds */
-U64 UTIL_clockSpanMicro(UTIL_time_t clockStart )
-{
-    UTIL_time_t const clockEnd = UTIL_getTime();
-    return UTIL_getSpanTimeMicro(clockStart, clockEnd);
-}
-
-/* returns time span in microseconds */
-U64 UTIL_clockSpanNano(UTIL_time_t clockStart )
-{
-    UTIL_time_t const clockEnd = UTIL_getTime();
-    return UTIL_getSpanTimeNano(clockStart, clockEnd);
-}
-
-void UTIL_waitForNextTick(void)
-{
-    UTIL_time_t const clockStart = UTIL_getTime();
-    UTIL_time_t clockEnd;
-    do {
-        clockEnd = UTIL_getTime();
-    } while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0);
-}
-
-/* count the number of physical cores */
 #if defined(_WIN32) || defined(WIN32)
 
 #include <windows.h>
diff --git a/programs/util.h b/programs/util.h
index eee7ebfc..d6e5bb55 100644
--- a/programs/util.h
+++ b/programs/util.h
@@ -112,52 +112,6 @@ extern int g_utilDisplayLevel;
 #define UTIL_DISPLAYLEVEL(l, ...) { if (g_utilDisplayLevel>=l) { UTIL_DISPLAY(__VA_ARGS__); } }
 
 
-/*-****************************************
-*  Time functions
-******************************************/
-#if defined(_WIN32)   /* Windows */
-
-    #define UTIL_TIME_INITIALIZER { { 0, 0 } }
-    typedef LARGE_INTEGER UTIL_time_t;
-
-#elif defined(__APPLE__) && defined(__MACH__)
-
-    #include <mach/mach_time.h>
-    #define UTIL_TIME_INITIALIZER 0
-    typedef U64 UTIL_time_t;
-
-#elif (PLATFORM_POSIX_VERSION >= 200112L) \
-   && (defined(__UCLIBC__)                \
-      || (defined(__GLIBC__)              \
-          && ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) \
-             || (__GLIBC__ > 2))))
-
-    #define UTIL_TIME_INITIALIZER { 0, 0 }
-    typedef struct timespec UTIL_freq_t;
-    typedef struct timespec UTIL_time_t;
-
-    UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end);
-
-#else   /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */
-
-    typedef clock_t UTIL_time_t;
-    #define UTIL_TIME_INITIALIZER 0
-
-#endif
-
-UTIL_time_t UTIL_getTime(void);
-U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd);
-U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd);
-
-#define SEC_TO_MICRO 1000000
-
-/* returns time span in microseconds */
-U64 UTIL_clockSpanMicro(UTIL_time_t clockStart);
-
-/* returns time span in microseconds */
-U64 UTIL_clockSpanNano(UTIL_time_t clockStart);
-void UTIL_waitForNextTick(void);
-
 /*-****************************************
 *  File functions
 ******************************************/
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index f5782174..904bcdf8 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -242,18 +242,20 @@ static void errorOut(const char* msg)
  * @return 1 if an overflow error occurs */
 static int readU32FromCharChecked(const char** stringPtr, unsigned* value)
 {
-    static unsigned const max = (((unsigned)(-1)) / 10) - 1;
     unsigned result = 0;
     while ((**stringPtr >='0') && (**stringPtr <='9')) {
-        if (result > max) return 1; // overflow error
-        result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
+        unsigned const max = (((unsigned)(-1)) / 10) - 1;
+        if (result > max) return 1; /* overflow error */
+        result *= 10;
+        result += (unsigned)(**stringPtr - '0');
+        (*stringPtr)++ ;
     }
     if ((**stringPtr=='K') || (**stringPtr=='M')) {
         unsigned const maxK = ((unsigned)(-1)) >> 10;
-        if (result > maxK) return 1; // overflow error
+        if (result > maxK) return 1; /* overflow error */
         result <<= 10;
         if (**stringPtr=='M') {
-            if (result > maxK) return 1; // overflow error
+            if (result > maxK) return 1; /* overflow error */
             result <<= 10;
         }
         (*stringPtr)++;  /* skip `K` or `M` */
diff --git a/tests/Makefile b/tests/Makefile
index 2a9cd3d8..f11b7318 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -82,7 +82,7 @@ default: fullbench
 	@echo $(ZSTDMT_OBJECTS)
 
 all: fullbench fuzzer zstreamtest paramgrill datagen decodecorpus roundTripCrash \
-     fullbench-lib
+     fullbench-lib poolTests
 
 all32: fullbench32 fuzzer32 zstreamtest32
 
@@ -132,18 +132,18 @@ fullbench fullbench32 : CPPFLAGS += $(MULTITHREAD_CPP)
 fullbench fullbench32 : LDFLAGS += $(MULTITHREAD_LD)
 fullbench fullbench32 : DEBUGFLAGS = -DNDEBUG  # turn off assert() for speed measurements
 fullbench fullbench32 : $(ZSTD_FILES)
-fullbench fullbench32 : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c fullbench.c
+fullbench fullbench32 : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c fullbench.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
 fullbench-lib : CPPFLAGS += -DXXH_NAMESPACE=ZSTD_
 fullbench-lib : zstd-staticLib
-fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c fullbench.c
+fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c fullbench.c
 	$(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) $(ZSTDDIR)/libzstd.a
 
 # note : broken : requires unavailable symbols
 fullbench-dll : zstd-dll
 fullbench-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd
-fullbench-dll: $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c fullbench.c
+fullbench-dll: $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c $(PRGDIR)/timefn.c fullbench.c
 #	$(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(ZSTDDIR)/dll/libzstd.dll
 	$(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT)
 
@@ -152,32 +152,32 @@ fuzzer  : LDFLAGS += $(MULTITHREAD_LD)
 fuzzer32: CFLAGS += -m32
 fuzzer  : $(ZSTDMT_OBJECTS)
 fuzzer32: $(ZSTD_FILES)
-fuzzer fuzzer32 : $(ZDICT_FILES) $(PRGDIR)/util.c $(PRGDIR)/datagen.c fuzzer.c
+fuzzer fuzzer32 : $(ZDICT_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c fuzzer.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
 fuzzer-dll : zstd-dll
 fuzzer-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd
-fuzzer-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/datagen.c fuzzer.c
+fuzzer-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c fuzzer.c
 	$(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT)
 
 zbufftest : CPPFLAGS += -I$(ZSTDDIR)/deprecated
 zbufftest : CFLAGS += -Wno-deprecated-declarations   # required to silence deprecation warnings
-zbufftest : $(ZSTD_OBJECTS) $(ZBUFF_FILES) $(PRGDIR)/util.c $(PRGDIR)/datagen.c zbufftest.c
+zbufftest : $(ZSTD_OBJECTS) $(ZBUFF_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
 zbufftest32 : CPPFLAGS += -I$(ZSTDDIR)/deprecated
 zbufftest32 : CFLAGS += -Wno-deprecated-declarations -m32
-zbufftest32 : $(ZSTD_FILES) $(ZBUFF_FILES) $(PRGDIR)/util.c $(PRGDIR)/datagen.c zbufftest.c
+zbufftest32 : $(ZSTD_FILES) $(ZBUFF_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
 zbufftest-dll : zstd-dll
 zbufftest-dll : CPPFLAGS += -I$(ZSTDDIR)/deprecated
 zbufftest-dll : CFLAGS += -Wno-deprecated-declarations   # required to silence deprecation warnings
 zbufftest-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd
-zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/datagen.c zbufftest.c
+zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/datagen.c zbufftest.c
 	$(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT)
 
-ZSTREAM_LOCAL_FILES := $(PRGDIR)/datagen.c $(PRGDIR)/util.c seqgen.c zstreamtest.c
+ZSTREAM_LOCAL_FILES := $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c seqgen.c zstreamtest.c
 ZSTREAM_PROPER_FILES := $(ZDICT_FILES) $(ZSTREAM_LOCAL_FILES)
 ZSTREAMFILES := $(ZSTD_FILES) $(ZSTREAM_PROPER_FILES)
 zstreamtest32 : CFLAGS += -m32
@@ -203,7 +203,7 @@ zstreamtest-dll : $(ZSTREAM_LOCAL_FILES)
 	$(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT)
 
 paramgrill : DEBUGFLAGS =  # turn off assert() by default for speed measurements
-paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/benchfn.c $(PRGDIR)/benchzstd.c $(PRGDIR)/datagen.c paramgrill.c
+paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c $(PRGDIR)/benchzstd.c $(PRGDIR)/datagen.c paramgrill.c
 	$(CC) $(FLAGS) $^ -lm -o $@$(EXT)
 
 datagen : $(PRGDIR)/datagen.c datagencli.c
@@ -222,7 +222,7 @@ legacy : CPPFLAGS += -I$(ZSTDDIR)/legacy -DZSTD_LEGACY_SUPPORT=4
 legacy : $(ZSTD_FILES) $(wildcard $(ZSTDDIR)/legacy/*.c) legacy.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
-decodecorpus : $(filter-out zstdc_zstd_compress.o, $(ZSTD_OBJECTS)) $(ZDICT_FILES) $(PRGDIR)/util.c decodecorpus.c
+decodecorpus : $(filter-out zstdc_zstd_compress.o, $(ZSTD_OBJECTS)) $(ZDICT_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c decodecorpus.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT) -lm
 
 symbols  : symbols.c zstd-dll
@@ -233,7 +233,7 @@ else
 	$(CC) $(FLAGS) $< -o $@$(EXT) -Wl,-rpath=$(ZSTDDIR) $(ZSTDDIR)/libzstd.so   # broken on Mac
 endif
 
-poolTests : $(PRGDIR)/util.c poolTests.c $(ZSTDDIR)/common/pool.c $(ZSTDDIR)/common/threading.c $(ZSTDDIR)/common/zstd_common.c $(ZSTDDIR)/common/error_private.c
+poolTests : $(PRGDIR)/util.c $(PRGDIR)/timefn.c poolTests.c $(ZSTDDIR)/common/pool.c $(ZSTDDIR)/common/threading.c $(ZSTDDIR)/common/zstd_common.c $(ZSTDDIR)/common/error_private.c
 	$(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT)
 
 .PHONY: versionsTest
diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
index b03dc55e..d8b33247 100644
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@@ -16,6 +16,7 @@
 #include <string.h>
 
 #include "util.h"
+#include "timefn.h"   /* UTIL_clockSpanMicro, SEC_TO_MICRO, UTIL_TIME_INITIALIZER */
 #include "zstd.h"
 #include "zstd_internal.h"
 #include "mem.h"
diff --git a/tests/fullbench.c b/tests/fullbench.c
index 8644a2e3..6e42d210 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -17,6 +17,7 @@
 #include <stdio.h>       /* fprintf, fopen, ftello64 */
 #include <assert.h>      /* assert */
 
+#include "timefn.h"      /* UTIL_clockSpanNano, UTIL_getTime */
 #include "mem.h"         /* U32 */
 #ifndef ZSTD_DLL_IMPORT
     #include "zstd_internal.h"   /* ZSTD_decodeSeqHeaders, ZSTD_blockHeaderSize, blockType_e, KB, MB */
@@ -67,12 +68,6 @@ static const size_t g_sampleSize = 10000000;
 static unsigned g_nbIterations = NBLOOPS;
 static double g_compressibility = COMPRESSIBILITY_DEFAULT;
 
-static void BMK_SetNbIterations(unsigned nbLoops)
-{
-    g_nbIterations = nbLoops;
-    DISPLAY("- %i iterations -\n", g_nbIterations);
-}
-
 
 /*_*******************************************************
 *  Private functions
@@ -316,9 +311,9 @@ static size_t local_ZSTD_decompressContinue(const void* src, size_t srcSize,
 /*_*******************************************************
 *  Bench functions
 *********************************************************/
-static size_t benchMem(unsigned benchNb,
-                       const void* src, size_t srcSize,
-                       int cLevel, ZSTD_compressionParameters cparams)
+static int benchMem(unsigned benchNb,
+                    const void* src, size_t srcSize,
+                    int cLevel, ZSTD_compressionParameters cparams)
 {
     size_t dstBuffSize = ZSTD_compressBound(srcSize);
     BYTE*  dstBuff;
@@ -395,22 +390,22 @@ static size_t benchMem(unsigned benchNb,
           cparams->minMatch, cparams->targetLength, cparams->strategy); */
 
     ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_compressionLevel, cLevel);
-    ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_windowLog, cparams.windowLog);
-    ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_hashLog, cparams.hashLog);
-    ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_chainLog, cparams.chainLog);
-    ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_searchLog, cparams.searchLog);
-    ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_minMatch, cparams.minMatch);
-    ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_targetLength, cparams.targetLength);
+    ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_windowLog, (int)cparams.windowLog);
+    ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_hashLog, (int)cparams.hashLog);
+    ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_chainLog, (int)cparams.chainLog);
+    ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_searchLog, (int)cparams.searchLog);
+    ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_minMatch, (int)cparams.minMatch);
+    ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_targetLength, (int)cparams.targetLength);
     ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_strategy, cparams.strategy);
 
 
     ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_compressionLevel, cLevel);
-    ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_windowLog, cparams.windowLog);
-    ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_hashLog, cparams.hashLog);
-    ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_chainLog, cparams.chainLog);
-    ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_searchLog, cparams.searchLog);
-    ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_minMatch, cparams.minMatch);
-    ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_targetLength, cparams.targetLength);
+    ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_windowLog, (int)cparams.windowLog);
+    ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_hashLog, (int)cparams.hashLog);
+    ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_chainLog, (int)cparams.chainLog);
+    ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_searchLog, (int)cparams.searchLog);
+    ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_minMatch, (int)cparams.minMatch);
+    ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_targetLength, (int)cparams.targetLength);
     ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_strategy, cparams.strategy);
 
     /* Preparation */
@@ -469,8 +464,9 @@ static size_t benchMem(unsigned benchNb,
             iend = ip + ZSTD_blockHeaderSize + cBlockSize;   /* End of first block */
             ip += ZSTD_blockHeaderSize;                      /* skip block header */
             ZSTD_decompressBegin(g_zdc);
-            ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, iend-ip);   /* skip literal segment */
-            g_cSize = iend-ip;
+            assert(iend > ip);
+            ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, (size_t)(iend-ip));   /* skip literal segment */
+            g_cSize = (size_t)(iend-ip);
             memcpy(buff2, ip, g_cSize);   /* copy rest of block (it starts by SeqHeader) */
             srcSize = srcSize > 128 KB ? 128 KB : srcSize;   /* speed relative to block */
             break;
@@ -501,7 +497,7 @@ static size_t benchMem(unsigned benchNb,
         BMK_benchParams_t bp;
         BMK_runTime_t bestResult;
         bestResult.sumOfReturn = 0;
-        bestResult.nanoSecPerRun = (unsigned long long)(-1LL);
+        bestResult.nanoSecPerRun = (double)(-1);
         assert(tfs != NULL);
 
         bp.benchFn = benchFunction;
@@ -654,7 +650,9 @@ static unsigned readU32FromChar(const char** stringPtr)
     while ((**stringPtr >='0') && (**stringPtr <='9')) {
         unsigned const max = (((unsigned)(-1)) / 10) - 1;
         if (result > max) ERROR_OUT(errorMsg);
-        result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
+        result *= 10;
+        result += (unsigned)(**stringPtr - '0');
+        (*stringPtr)++ ;
     }
     if ((**stringPtr=='K') || (**stringPtr=='M')) {
         unsigned const maxK = ((unsigned)(-1)) >> 10;
@@ -671,7 +669,7 @@ static unsigned readU32FromChar(const char** stringPtr)
     return result;
 }
 
-static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
+static int longCommandWArg(const char** stringPtr, const char* longCommand)
 {
     size_t const comSize = strlen(longCommand);
     int const result = !strncmp(*stringPtr, longCommand, comSize);
@@ -772,7 +770,7 @@ int main(int argc, const char** argv)
                     /* Modify Nb Iterations */
                 case 'i':
                     argument++;
-                    BMK_SetNbIterations((int)readU32FromChar(&argument));
+                    g_nbIterations = readU32FromChar(&argument);
                     break;
 
                     /* Select compressibility of synthetic sample */
@@ -782,7 +780,7 @@ int main(int argc, const char** argv)
                     break;
                 case 'l':
                     argument++;
-                    cLevel = readU32FromChar(&argument);
+                    cLevel = (int)readU32FromChar(&argument);
                     cparams = ZSTD_getCParams(cLevel, 0, 0);
                     break;
 
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index 01f87e6e..d5f872d2 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -38,6 +38,7 @@
 #define XXH_STATIC_LINKING_ONLY   /* XXH64_state_t */
 #include "xxhash.h"       /* XXH64 */
 #include "util.h"
+#include "timefn.h"       /* SEC_TO_MICRO, UTIL_time_t, UTIL_TIME_INITIALIZER, UTIL_clockSpanMicro, UTIL_getTime */
 
 
 /*-************************************
diff --git a/tests/paramgrill.c b/tests/paramgrill.c
index 415551b9..fb3c776b 100644
--- a/tests/paramgrill.c
+++ b/tests/paramgrill.c
@@ -19,6 +19,7 @@
 #include <math.h>      /* log */
 #include <assert.h>
 
+#include "timefn.h"    /* SEC_TO_MICRO, UTIL_time_t, UTIL_clockSpanMicro, UTIL_clockSpanNano, UTIL_getTime */
 #include "mem.h"
 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters, ZSTD_estimateCCtxSize */
 #include "zstd.h"
@@ -141,7 +142,7 @@ static const char* g_shortParamNames[NUM_PARAMS] =
 /* maps value from { 0 to rangetable[param] - 1 } to valid paramvalues */
 static U32 rangeMap(varInds_t param, int ind)
 {
-    ind = MAX(MIN(ind, (int)rangetable[param] - 1), 0);
+    U32 const uind = (U32)MAX(MIN(ind, (int)rangetable[param] - 1), 0);
     switch(param) {
         case wlog_ind: /* using default: triggers -Wswitch-enum */
         case clog_ind:
@@ -149,11 +150,11 @@ static U32 rangeMap(varInds_t param, int ind)
         case slog_ind:
         case mml_ind:
         case strt_ind:
-            return mintable[param] + ind;
+            return mintable[param] + uind;
         case tlen_ind:
-            return tlen_table[ind];
+            return tlen_table[uind];
         case fadt_ind: /* 0, 1, 2 -> -1, 0, 1 */
-            return ind - 1;
+            return uind - 1;
         case NUM_PARAMS:
         default:;
     }
@@ -173,7 +174,7 @@ static int invRangeMap(varInds_t param, U32 value)
         case slog_ind:
         case mml_ind:
         case strt_ind:
-            return value - mintable[param];
+            return (int)(value - mintable[param]);
         case tlen_ind: /* bin search */
         {
             int lo = 0;
@@ -493,13 +494,15 @@ static void
 paramVariation(paramValues_t* ptr, memoTable_t* mtAll, const U32 nbChanges)
 {
     paramValues_t p;
-    U32 validated = 0;
+    int validated = 0;
     while (!validated) {
         U32 i;
         p = *ptr;
         for (i = 0 ; i < nbChanges ; i++) {
             const U32 changeID = (U32)FUZ_rand(&g_rand) % (mtAll[p.vals[strt_ind]].varLen << 1);
-            paramVaryOnce(mtAll[p.vals[strt_ind]].varArray[changeID >> 1], ((changeID & 1) << 1) - 1, &p);
+            paramVaryOnce(mtAll[p.vals[strt_ind]].varArray[changeID >> 1],
+                          (int)((changeID & 1) << 1) - 1,
+                          &p);
         }
         validated = paramValid(p);
     }
@@ -511,7 +514,7 @@ static paramValues_t randomParams(void)
 {
     varInds_t v; paramValues_t p;
     for(v = 0; v < NUM_PARAMS; v++) {
-        p.vals[v] = rangeMap(v, FUZ_rand(&g_rand) % rangetable[v]);
+        p.vals[v] = rangeMap(v, (int)(FUZ_rand(&g_rand) % rangetable[v]));
     }
     return p;
 }
@@ -1638,7 +1641,7 @@ BMK_benchMemInvertible( buffers_t buf, contexts_t ctx,
                 return bOut;
             }
             {   BMK_runTime_t const rResult = BMK_extract_runTime(cOutcome);
-                bResult.cSpeed = (srcSize * TIMELOOP_NANOSEC) / rResult.nanoSecPerRun;
+                bResult.cSpeed = (unsigned long long)((double)srcSize * TIMELOOP_NANOSEC / rResult.nanoSecPerRun);
                 bResult.cSize = rResult.sumOfReturn;
             }
             compressionCompleted = BMK_isCompleted_TimedFn(timeStateCompress);
@@ -1656,7 +1659,7 @@ BMK_benchMemInvertible( buffers_t buf, contexts_t ctx,
                 return bOut;
             }
             {   BMK_runTime_t const rResult = BMK_extract_runTime(dOutcome);
-                bResult.dSpeed = (srcSize * TIMELOOP_NANOSEC) / rResult.nanoSecPerRun;
+                bResult.dSpeed = (unsigned long long)((double)srcSize * TIMELOOP_NANOSEC / rResult.nanoSecPerRun);
             }
             decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress);
         }
@@ -2199,7 +2202,9 @@ static winnerInfo_t climbOnce(const constraint_t target,
                 for (offset = -1; offset <= 1; offset += 2) {
                     CHECKTIME(winnerInfo);
                     candidateInfo.params = cparam;
-                    paramVaryOnce(mtAll[cparam.vals[strt_ind]].varArray[i], offset, &candidateInfo.params);
+                    paramVaryOnce(mtAll[cparam.vals[strt_ind]].varArray[i],
+                                  offset,
+                                  &candidateInfo.params);
 
                     if(paramValid(candidateInfo.params)) {
                         int res;
@@ -2351,7 +2356,7 @@ static int nextStrategy(const int currentStrategy, const int bestStrategy)
  * cLevel - compression level to exceed (all solutions must be > lvl in cSpeed + ratio)
  */
 
-static int g_maxTries = 5;
+static unsigned g_maxTries = 5;
 #define TRY_DECAY 1
 
 static int
@@ -2561,7 +2566,7 @@ _cleanUp:
  * @return 0 and doesn't modify *stringPtr otherwise.
  * from zstdcli.c
  */
-static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
+static int longCommandWArg(const char** stringPtr, const char* longCommand)
 {
     size_t const comSize = strlen(longCommand);
     int const result = !strncmp(*stringPtr, longCommand, comSize);
@@ -2588,7 +2593,10 @@ static unsigned readU32FromChar(const char** stringPtr)
     while ((**stringPtr >='0') && (**stringPtr <='9')) {
         unsigned const max = (((unsigned)(-1)) / 10) - 1;
         if (result > max) errorOut(errorMsg);
-        result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
+        result *= 10;
+        assert(**stringPtr >= '0');
+        result += (unsigned)(**stringPtr - '0');
+        (*stringPtr)++ ;
     }
     if ((**stringPtr=='K') || (**stringPtr=='M')) {
         unsigned const maxK = ((unsigned)(-1)) >> 10;
@@ -2726,7 +2734,7 @@ int main(int argc, const char** argv)
                 PARSE_SUB_ARGS("strict=", "stc=", g_strictness);
                 PARSE_SUB_ARGS("maxTries=", "tries=", g_maxTries);
                 PARSE_SUB_ARGS("memoLimitLog=", "memLog=", memoTableLog);
-                if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelOpt = readU32FromChar(&argument); g_optmode = 1; if (argument[0]==',') { argument++; continue; } else break; }
+                if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelOpt = (int)readU32FromChar(&argument); g_optmode = 1; if (argument[0]==',') { argument++; continue; } else break; }
                 if (longCommandWArg(&argument, "speedForRatio=") || longCommandWArg(&argument, "speedRatio=")) { g_ratioMultiplier = readDoubleFromChar(&argument); if (argument[0]==',') { argument++; continue; } else break; }
 
                 DISPLAY("invalid optimization parameter \n");
@@ -2743,7 +2751,7 @@ int main(int argc, const char** argv)
             g_singleRun = 1;
             for ( ; ;) {
                 if(parse_params(&argument, &g_params)) { if(argument[0] == ',') { argument++; continue; } else break; }
-                if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelRun = readU32FromChar(&argument); g_params = emptyParams(); if (argument[0]==',') { argument++; continue; } else break; }
+                if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelRun = (int)readU32FromChar(&argument); g_params = emptyParams(); if (argument[0]==',') { argument++; continue; } else break; }
 
                 DISPLAY("invalid compression parameter \n");
                 return 1;
@@ -2855,7 +2863,7 @@ int main(int argc, const char** argv)
                             continue;
                         case 'L':
                             {   argument++;
-                                cLevelRun = readU32FromChar(&argument);
+                                cLevelRun = (int)readU32FromChar(&argument);
                                 g_params = emptyParams();
                                 continue;
                             }
@@ -2944,7 +2952,8 @@ int main(int argc, const char** argv)
             }
         } else {
             if (g_optimizer) {
-                result = optimizeForSize(argv+filenamesStart, argc-filenamesStart, dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog);
+                assert(filenamesStart < argc);
+                result = optimizeForSize(argv+filenamesStart, (size_t)(argc-filenamesStart), dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog);
             } else {
                 result = benchFiles(argv+filenamesStart, argc-filenamesStart, dictFileName, cLevelRun);
             }
diff --git a/tests/poolTests.c b/tests/poolTests.c
index 8b9a4700..272e961d 100644
--- a/tests/poolTests.c
+++ b/tests/poolTests.c
@@ -12,6 +12,7 @@
 #include "pool.h"
 #include "threading.h"
 #include "util.h"
+#include "timefn.h"
 #include <stddef.h>
 #include <stdio.h>
 
@@ -25,25 +26,27 @@
 #define ASSERT_EQ(lhs, rhs) ASSERT_TRUE((lhs) == (rhs))
 
 struct data {
-  pthread_mutex_t mutex;
+  ZSTD_pthread_mutex_t mutex;
   unsigned data[16];
   size_t i;
 };
 
-static void fn(void *opaque) {
+static void fn(void *opaque)
+{
   struct data *data = (struct data *)opaque;
   ZSTD_pthread_mutex_lock(&data->mutex);
-  data->data[data->i] = data->i;
+  data->data[data->i] = (unsigned)(data->i);
   ++data->i;
   ZSTD_pthread_mutex_unlock(&data->mutex);
 }
 
-static int testOrder(size_t numThreads, size_t queueSize) {
+static int testOrder(size_t numThreads, size_t queueSize)
+{
   struct data data;
-  POOL_ctx *ctx = POOL_create(numThreads, queueSize);
+  POOL_ctx* const ctx = POOL_create(numThreads, queueSize);
   ASSERT_TRUE(ctx);
   data.i = 0;
-  ZSTD_pthread_mutex_init(&data.mutex, NULL);
+  (void)ZSTD_pthread_mutex_init(&data.mutex, NULL);
   { size_t i;
     for (i = 0; i < 16; ++i) {
       POOL_add(ctx, &fn, &data);
@@ -71,7 +74,7 @@ static void waitFn(void *opaque) {
 /* Tests for deadlock */
 static int testWait(size_t numThreads, size_t queueSize) {
   struct data data;
-  POOL_ctx *ctx = POOL_create(numThreads, queueSize);
+  POOL_ctx* const ctx = POOL_create(numThreads, queueSize);
   ASSERT_TRUE(ctx);
   { size_t i;
     for (i = 0; i < 16; ++i) {
@@ -93,7 +96,7 @@ typedef struct {
 } poolTest_t;
 
 static void waitLongFn(void *opaque) {
-  poolTest_t* test = (poolTest_t*) opaque;
+  poolTest_t* const test = (poolTest_t*) opaque;
   UTIL_sleepMilli(10);
   ZSTD_pthread_mutex_lock(&test->mut);
   test->val = test->val + 1;
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 51bf236a..e6f69d78 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -37,6 +37,7 @@
 #include "xxhash.h"       /* XXH64_* */
 #include "seqgen.h"
 #include "util.h"
+#include "timefn.h"       /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */
 
 
 /*-************************************
diff --git a/zlibWrapper/Makefile b/zlibWrapper/Makefile
index 0c19107b..d4fc33b5 100644
--- a/zlibWrapper/Makefile
+++ b/zlibWrapper/Makefile
@@ -88,7 +88,7 @@ fitblk: $(EXAMPLE_PATH)/fitblk.o $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.o $(ZSTDLI
 fitblk_zstd: $(EXAMPLE_PATH)/fitblk.o $(ZLIBWRAPPER_PATH)/zstdTurnedOn_zlibwrapper.o $(ZSTDLIBRARY)
 	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@
 
-zwrapbench: $(EXAMPLE_PATH)/zwrapbench.o $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.o $(PROGRAMS_PATH)/util.o $(PROGRAMS_PATH)/datagen.o $(ZSTDLIBRARY)
+zwrapbench: $(EXAMPLE_PATH)/zwrapbench.o $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.o $(PROGRAMS_PATH)/util.o $(PROGRAMS_PATH)/timefn.o $(PROGRAMS_PATH)/datagen.o $(ZSTDLIBRARY)
 	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $^ $(ZLIB_LIBRARY) -o $@
 
 
diff --git a/zlibWrapper/examples/zwrapbench.c b/zlibWrapper/examples/zwrapbench.c
index e071c309..99f9e11b 100644
--- a/zlibWrapper/examples/zwrapbench.c
+++ b/zlibWrapper/examples/zwrapbench.c
@@ -19,6 +19,7 @@
 #include <ctype.h>       /* toupper */
 #include <errno.h>       /* errno */
 
+#include "timefn.h"      /* UTIL_time_t, UTIL_getTime, UTIL_clockSpanMicro, UTIL_waitForNextTick */
 #include "mem.h"
 #define ZSTD_STATIC_LINKING_ONLY
 #include "zstd.h"