diff --git a/Makefile b/Makefile
index 8ffc9ae9..14d1510a 100644
--- a/Makefile
+++ b/Makefile
@@ -70,7 +70,7 @@ clean:
 	@$(MAKE) -C $(TESTDIR) $@ > $(VOID)
 	@$(MAKE) -C $(ZWRAPDIR) $@ > $(VOID)
 	@$(MAKE) -C examples/ $@ > $(VOID)
-	@$(RM) zstd$(EXT) tmp*
+	@$(RM) zstd$(EXT) zstdmt$(EXT) tmp*
 	@echo Cleaning completed
 
 
diff --git a/NEWS b/NEWS
index 1b132ca9..46bdb25a 100644
--- a/NEWS
+++ b/NEWS
@@ -1,7 +1,9 @@
 v1.1.3
 cli : new : advanced commands for detailed parameters, by Przemyslaw Skibinski
 cli : fix zstdless on Mac OS-X, by Andrew Janke
+dictBuilder : improved dictionary generation quality, thanks to Nick Terrell
 API : fix : all symbols properly exposed in libzstd, by Nick Terrell
+API : fix : ZSTD_initCStream_usingCDict() properly writes dictID into frame header, by Gregory Szorc (#511)
 API : new : ZSTD_create?Dict_byReference(), requested by Bartosz Taudul
 API : new : ZDICT_finalizeDictionary()
 
diff --git a/build/VS2005/fuzzer/fuzzer.vcproj b/build/VS2005/fuzzer/fuzzer.vcproj
index b1ac8136..d6ec14d1 100644
--- a/build/VS2005/fuzzer/fuzzer.vcproj
+++ b/build/VS2005/fuzzer/fuzzer.vcproj
@@ -331,6 +331,10 @@
 				RelativePath="..\..\..\programs\datagen.c"
 				>
 			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\cover.c"
+				>
+			</File>
 			<File
 				RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
 				>
diff --git a/build/VS2005/zstd/zstd.vcproj b/build/VS2005/zstd/zstd.vcproj
index 9f49e3cb..5ef7a98f 100644
--- a/build/VS2005/zstd/zstd.vcproj
+++ b/build/VS2005/zstd/zstd.vcproj
@@ -343,6 +343,10 @@
 				RelativePath="..\..\..\programs\dibio.c"
 				>
 			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\cover.c"
+				>
+			</File>
 			<File
 				RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
 				>
diff --git a/build/VS2005/zstdlib/zstdlib.vcproj b/build/VS2005/zstdlib/zstdlib.vcproj
index d95212b3..828cc828 100644
--- a/build/VS2005/zstdlib/zstdlib.vcproj
+++ b/build/VS2005/zstdlib/zstdlib.vcproj
@@ -327,6 +327,10 @@
 			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
 			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
 			>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\cover.c"
+				>
+			</File>
 			<File
 				RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
 				>
diff --git a/build/VS2008/fuzzer/fuzzer.vcproj b/build/VS2008/fuzzer/fuzzer.vcproj
index 311b7990..f6912b8d 100644
--- a/build/VS2008/fuzzer/fuzzer.vcproj
+++ b/build/VS2008/fuzzer/fuzzer.vcproj
@@ -332,6 +332,10 @@
 				RelativePath="..\..\..\programs\datagen.c"
 				>
 			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\cover.c"
+				>
+			</File>
 			<File
 				RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
 				>
diff --git a/build/VS2008/zstd/zstd.vcproj b/build/VS2008/zstd/zstd.vcproj
index f5b3f558..0beb59dd 100644
--- a/build/VS2008/zstd/zstd.vcproj
+++ b/build/VS2008/zstd/zstd.vcproj
@@ -344,6 +344,10 @@
 				RelativePath="..\..\..\programs\dibio.c"
 				>
 			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\cover.c"
+				>
+			</File>
 			<File
 				RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
 				>
diff --git a/build/VS2008/zstdlib/zstdlib.vcproj b/build/VS2008/zstdlib/zstdlib.vcproj
index b1c103e3..69b742d1 100644
--- a/build/VS2008/zstdlib/zstdlib.vcproj
+++ b/build/VS2008/zstdlib/zstdlib.vcproj
@@ -328,6 +328,10 @@
 			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
 			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
 			>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\cover.c"
+				>
+			</File>
 			<File
 				RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
 				>
diff --git a/build/VS2010/fuzzer/fuzzer.vcxproj b/build/VS2010/fuzzer/fuzzer.vcxproj
index 7227c7ec..623a9ca4 100644
--- a/build/VS2010/fuzzer/fuzzer.vcxproj
+++ b/build/VS2010/fuzzer/fuzzer.vcxproj
@@ -165,6 +165,7 @@
     <ClCompile Include="..\..\..\lib\compress\zstd_compress.c" />
     <ClCompile Include="..\..\..\lib\decompress\huf_decompress.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
+    <ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
     <ClCompile Include="..\..\..\programs\datagen.c" />
diff --git a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
index f1ea5c82..f0feecb2 100644
--- a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
+++ b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
@@ -32,6 +32,7 @@
     <ClCompile Include="..\..\..\lib\deprecated\zbuff_common.c" />
     <ClCompile Include="..\..\..\lib\deprecated\zbuff_compress.c" />
     <ClCompile Include="..\..\..\lib\deprecated\zbuff_decompress.c" />
+    <ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
     <ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />
diff --git a/build/VS2010/libzstd/libzstd.vcxproj b/build/VS2010/libzstd/libzstd.vcxproj
index 228e83da..c8d21dd4 100644
--- a/build/VS2010/libzstd/libzstd.vcxproj
+++ b/build/VS2010/libzstd/libzstd.vcxproj
@@ -32,6 +32,7 @@
     <ClCompile Include="..\..\..\lib\deprecated\zbuff_common.c" />
     <ClCompile Include="..\..\..\lib\deprecated\zbuff_compress.c" />
     <ClCompile Include="..\..\..\lib\deprecated\zbuff_decompress.c" />
+    <ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
     <ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />
diff --git a/build/VS2010/zstd/zstd.vcxproj b/build/VS2010/zstd/zstd.vcxproj
index 9886af0b..3939c554 100644
--- a/build/VS2010/zstd/zstd.vcxproj
+++ b/build/VS2010/zstd/zstd.vcxproj
@@ -32,6 +32,7 @@
     <ClCompile Include="..\..\..\lib\compress\zstd_compress.c" />
     <ClCompile Include="..\..\..\lib\decompress\huf_decompress.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
+    <ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
     <ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />
diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt
index dce39aba..db752784 100644
--- a/build/cmake/lib/CMakeLists.txt
+++ b/build/cmake/lib/CMakeLists.txt
@@ -50,6 +50,7 @@ SET(Sources
         ${LIBRARY_DIR}/compress/zstdmt_compress.c
         ${LIBRARY_DIR}/decompress/huf_decompress.c
         ${LIBRARY_DIR}/decompress/zstd_decompress.c
+        ${LIBRARY_DIR}/dictBuilder/cover.c
         ${LIBRARY_DIR}/dictBuilder/divsufsort.c
         ${LIBRARY_DIR}/dictBuilder/zdict.c
         ${LIBRARY_DIR}/deprecated/zbuff_common.c
diff --git a/lib/Makefile b/lib/Makefile
index 34363b7b..c4a5ecb9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -1,11 +1,13 @@
-# ################################################################
+# ##########################################################################
 # Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
+# This Makefile is validated for Linux, macOS, *BSD, Hurd, Solaris, MSYS2 targets
+#
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree. An additional grant
 # of patent rights can be found in the PATENTS file in the same directory.
-# ################################################################
+# ##########################################################################
 
 # Version numbers
 LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ./zstd.h`
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index d984af1f..e8a37511 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -2961,7 +2961,7 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
         if (zcs->outBuff == NULL) return ERROR(memory_allocation);
     }
 
-    if (dict) {
+    if (dict && dictSize >= 8) {
         ZSTD_freeCDict(zcs->cdictLocal);
         zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0, params, zcs->customMem);
         if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
@@ -2980,6 +2980,7 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
     ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
     size_t const initError =  ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0);
     zcs->cdict = cdict;
+    zcs->cctx->dictID = params.fParams.noDictIDFlag ? 0 : cdict->refContext->dictID;
     return initError;
 }
 
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 02f3bf45..c53f3c3d 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -1444,7 +1444,7 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
     if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize);
 #endif
-    ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
+    CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
     ZSTD_checkContinuity(dctx, dst);
     return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
 }
@@ -1936,7 +1936,7 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di
     zds->stage = zdss_loadHeader;
     zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
     ZSTD_freeDDict(zds->ddictLocal);
-    if (dict) {
+    if (dict && dictSize >= 8) {
         zds->ddictLocal = ZSTD_createDDict(dict, dictSize);
         if (zds->ddictLocal == NULL) return ERROR(memory_allocation);
     } else zds->ddictLocal = NULL;
diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c
new file mode 100644
index 00000000..089f077c
--- /dev/null
+++ b/lib/dictBuilder/cover.c
@@ -0,0 +1,1023 @@
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include <stdio.h>  /* fprintf */
+#include <stdlib.h> /* malloc, free, qsort */
+#include <string.h> /* memset */
+#include <time.h>   /* clock */
+#ifdef ZSTD_PTHREAD
+#include "threading.h"
+#endif
+
+#include "mem.h"           /* read */
+#include "zstd_internal.h" /* includes zstd.h */
+#ifndef ZDICT_STATIC_LINKING_ONLY
+#define ZDICT_STATIC_LINKING_ONLY
+#endif
+#include "zdict.h"
+
+/*-*************************************
+*  Constants
+***************************************/
+#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
+
+/*-*************************************
+*  Console display
+***************************************/
+static int g_displayLevel = 2;
+#define DISPLAY(...)                                                           \
+  {                                                                            \
+    fprintf(stderr, __VA_ARGS__);                                              \
+    fflush(stderr);                                                            \
+  }
+#define LOCALDISPLAYLEVEL(displayLevel, l, ...)                                \
+  if (displayLevel >= l) {                                                     \
+    DISPLAY(__VA_ARGS__);                                                      \
+  } /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
+#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
+
+#define LOCALDISPLAYUPDATE(displayLevel, l, ...)                               \
+  if (displayLevel >= l) {                                                     \
+    if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) {             \
+      g_time = clock();                                                        \
+      DISPLAY(__VA_ARGS__);                                                    \
+      if (displayLevel >= 4)                                                   \
+        fflush(stdout);                                                        \
+    }                                                                          \
+  }
+#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
+static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
+static clock_t g_time = 0;
+
+/*-*************************************
+* Hash table
+***************************************
+* A small specialized hash map for storing activeDmers.
+* The map does not resize, so if it becomes full it will loop forever.
+* Thus, the map must be large enough to store every value.
+* The map implements linear probing and keeps its load less than 0.5.
+*/
+
+#define MAP_EMPTY_VALUE ((U32)-1)
+typedef struct COVER_map_pair_t_s {
+  U32 key;
+  U32 value;
+} COVER_map_pair_t;
+
+typedef struct COVER_map_s {
+  COVER_map_pair_t *data;
+  U32 sizeLog;
+  U32 size;
+  U32 sizeMask;
+} COVER_map_t;
+
+/**
+ * Clear the map.
+ */
+static void COVER_map_clear(COVER_map_t *map) {
+  memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t));
+}
+
+/**
+ * Initializes a map of the given size.
+ * Returns 1 on success and 0 on failure.
+ * The map must be destroyed with COVER_map_destroy().
+ * The map is only guaranteed to be large enough to hold size elements.
+ */
+static int COVER_map_init(COVER_map_t *map, U32 size) {
+  map->sizeLog = ZSTD_highbit32(size) + 2;
+  map->size = (U32)1 << map->sizeLog;
+  map->sizeMask = map->size - 1;
+  map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t));
+  if (!map->data) {
+    map->sizeLog = 0;
+    map->size = 0;
+    return 0;
+  }
+  COVER_map_clear(map);
+  return 1;
+}
+
+/**
+ * Internal hash function
+ */
+static const U32 prime4bytes = 2654435761U;
+static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
+  return (key * prime4bytes) >> (32 - map->sizeLog);
+}
+
+/**
+ * Helper function that returns the index that a key should be placed into.
+ */
+static U32 COVER_map_index(COVER_map_t *map, U32 key) {
+  const U32 hash = COVER_map_hash(map, key);
+  U32 i;
+  for (i = hash;; i = (i + 1) & map->sizeMask) {
+    COVER_map_pair_t *pos = &map->data[i];
+    if (pos->value == MAP_EMPTY_VALUE) {
+      return i;
+    }
+    if (pos->key == key) {
+      return i;
+    }
+  }
+}
+
+/**
+ * Returns the pointer to the value for key.
+ * If key is not in the map, it is inserted and the value is set to 0.
+ * The map must not be full.
+ */
+static U32 *COVER_map_at(COVER_map_t *map, U32 key) {
+  COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)];
+  if (pos->value == MAP_EMPTY_VALUE) {
+    pos->key = key;
+    pos->value = 0;
+  }
+  return &pos->value;
+}
+
+/**
+ * Deletes key from the map if present.
+ */
+static void COVER_map_remove(COVER_map_t *map, U32 key) {
+  U32 i = COVER_map_index(map, key);
+  COVER_map_pair_t *del = &map->data[i];
+  U32 shift = 1;
+  if (del->value == MAP_EMPTY_VALUE) {
+    return;
+  }
+  for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) {
+    COVER_map_pair_t *const pos = &map->data[i];
+    /* If the position is empty we are done */
+    if (pos->value == MAP_EMPTY_VALUE) {
+      del->value = MAP_EMPTY_VALUE;
+      return;
+    }
+    /* If pos can be moved to del do so */
+    if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) {
+      del->key = pos->key;
+      del->value = pos->value;
+      del = pos;
+      shift = 1;
+    } else {
+      ++shift;
+    }
+  }
+}
+
+/**
+ * Destroyes a map that is inited with COVER_map_init().
+ */
+static void COVER_map_destroy(COVER_map_t *map) {
+  if (map->data) {
+    free(map->data);
+  }
+  map->data = NULL;
+  map->size = 0;
+}
+
+/*-*************************************
+* Context
+***************************************/
+
+typedef struct {
+  const BYTE *samples;
+  size_t *offsets;
+  const size_t *samplesSizes;
+  size_t nbSamples;
+  U32 *suffix;
+  size_t suffixSize;
+  U32 *freqs;
+  U32 *dmerAt;
+  unsigned d;
+} COVER_ctx_t;
+
+/* We need a global context for qsort... */
+static COVER_ctx_t *g_ctx = NULL;
+
+/*-*************************************
+*  Helper functions
+***************************************/
+
+/**
+ * Returns the sum of the sample sizes.
+ */
+static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
+  size_t sum = 0;
+  size_t i;
+  for (i = 0; i < nbSamples; ++i) {
+    sum += samplesSizes[i];
+  }
+  return sum;
+}
+
+/**
+ * Returns -1 if the dmer at lp is less than the dmer at rp.
+ * Return 0 if the dmers at lp and rp are equal.
+ * Returns 1 if the dmer at lp is greater than the dmer at rp.
+ */
+static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
+  const U32 lhs = *(const U32 *)lp;
+  const U32 rhs = *(const U32 *)rp;
+  return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
+}
+
+/**
+ * Same as COVER_cmp() except ties are broken by pointer value
+ * NOTE: g_ctx must be set to call this function.  A global is required because
+ * qsort doesn't take an opaque pointer.
+ */
+static int COVER_strict_cmp(const void *lp, const void *rp) {
+  int result = COVER_cmp(g_ctx, lp, rp);
+  if (result == 0) {
+    result = lp < rp ? -1 : 1;
+  }
+  return result;
+}
+
+/**
+ * Returns the first pointer in [first, last) whose element does not compare
+ * less than value.  If no such element exists it returns last.
+ */
+static const size_t *COVER_lower_bound(const size_t *first, const size_t *last,
+                                       size_t value) {
+  size_t count = last - first;
+  while (count != 0) {
+    size_t step = count / 2;
+    const size_t *ptr = first;
+    ptr += step;
+    if (*ptr < value) {
+      first = ++ptr;
+      count -= step + 1;
+    } else {
+      count = step;
+    }
+  }
+  return first;
+}
+
+/**
+ * Generic groupBy function.
+ * Groups an array sorted by cmp into groups with equivalent values.
+ * Calls grp for each group.
+ */
+static void
+COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx,
+              int (*cmp)(COVER_ctx_t *, const void *, const void *),
+              void (*grp)(COVER_ctx_t *, const void *, const void *)) {
+  const BYTE *ptr = (const BYTE *)data;
+  size_t num = 0;
+  while (num < count) {
+    const BYTE *grpEnd = ptr + size;
+    ++num;
+    while (num < count && cmp(ctx, ptr, grpEnd) == 0) {
+      grpEnd += size;
+      ++num;
+    }
+    grp(ctx, ptr, grpEnd);
+    ptr = grpEnd;
+  }
+}
+
+/*-*************************************
+*  Cover functions
+***************************************/
+
+/**
+ * Called on each group of positions with the same dmer.
+ * Counts the frequency of each dmer and saves it in the suffix array.
+ * Fills `ctx->dmerAt`.
+ */
+static void COVER_group(COVER_ctx_t *ctx, const void *group,
+                        const void *groupEnd) {
+  /* The group consists of all the positions with the same first d bytes. */
+  const U32 *grpPtr = (const U32 *)group;
+  const U32 *grpEnd = (const U32 *)groupEnd;
+  /* The dmerId is how we will reference this dmer.
+   * This allows us to map the whole dmer space to a much smaller space, the
+   * size of the suffix array.
+   */
+  const U32 dmerId = (U32)(grpPtr - ctx->suffix);
+  /* Count the number of samples this dmer shows up in */
+  U32 freq = 0;
+  /* Details */
+  const size_t *curOffsetPtr = ctx->offsets;
+  const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples;
+  /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a
+   * different sample than the last.
+   */
+  size_t curSampleEnd = ctx->offsets[0];
+  for (; grpPtr != grpEnd; ++grpPtr) {
+    /* Save the dmerId for this position so we can get back to it. */
+    ctx->dmerAt[*grpPtr] = dmerId;
+    /* Dictionaries only help for the first reference to the dmer.
+     * After that zstd can reference the match from the previous reference.
+     * So only count each dmer once for each sample it is in.
+     */
+    if (*grpPtr < curSampleEnd) {
+      continue;
+    }
+    freq += 1;
+    /* Binary search to find the end of the sample *grpPtr is in.
+     * In the common case that grpPtr + 1 == grpEnd we can skip the binary
+     * search because the loop is over.
+     */
+    if (grpPtr + 1 != grpEnd) {
+      const size_t *sampleEndPtr =
+          COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr);
+      curSampleEnd = *sampleEndPtr;
+      curOffsetPtr = sampleEndPtr + 1;
+    }
+  }
+  /* At this point we are never going to look at this segment of the suffix
+   * array again.  We take advantage of this fact to save memory.
+   * We store the frequency of the dmer in the first position of the group,
+   * which is dmerId.
+   */
+  ctx->suffix[dmerId] = freq;
+}
+
+/**
+ * A segment is a range in the source as well as the score of the segment.
+ */
+typedef struct {
+  U32 begin;
+  U32 end;
+  double score;
+} COVER_segment_t;
+
+/**
+ * Selects the best segment in an epoch.
+ * Segments of are scored according to the function:
+ *
+ * Let F(d) be the frequency of dmer d.
+ * Let S_i be the dmer at position i of segment S which has length k.
+ *
+ *     Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
+ *
+ * Once the dmer d is in the dictionay we set F(d) = 0.
+ */
+static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
+                                           COVER_map_t *activeDmers, U32 begin,
+                                           U32 end, COVER_params_t parameters) {
+  /* Constants */
+  const U32 k = parameters.k;
+  const U32 d = parameters.d;
+  const U32 dmersInK = k - d + 1;
+  /* Try each segment (activeSegment) and save the best (bestSegment) */
+  COVER_segment_t bestSegment = {0, 0, 0};
+  COVER_segment_t activeSegment;
+  /* Reset the activeDmers in the segment */
+  COVER_map_clear(activeDmers);
+  /* The activeSegment starts at the beginning of the epoch. */
+  activeSegment.begin = begin;
+  activeSegment.end = begin;
+  activeSegment.score = 0;
+  /* Slide the activeSegment through the whole epoch.
+   * Save the best segment in bestSegment.
+   */
+  while (activeSegment.end < end) {
+    /* The dmerId for the dmer at the next position */
+    U32 newDmer = ctx->dmerAt[activeSegment.end];
+    /* The entry in activeDmers for this dmerId */
+    U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer);
+    /* If the dmer isn't already present in the segment add its score. */
+    if (*newDmerOcc == 0) {
+      /* The paper suggest using the L-0.5 norm, but experiments show that it
+       * doesn't help.
+       */
+      activeSegment.score += freqs[newDmer];
+    }
+    /* Add the dmer to the segment */
+    activeSegment.end += 1;
+    *newDmerOcc += 1;
+
+    /* If the window is now too large, drop the first position */
+    if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
+      U32 delDmer = ctx->dmerAt[activeSegment.begin];
+      U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
+      activeSegment.begin += 1;
+      *delDmerOcc -= 1;
+      /* If this is the last occurence of the dmer, subtract its score */
+      if (*delDmerOcc == 0) {
+        COVER_map_remove(activeDmers, delDmer);
+        activeSegment.score -= freqs[delDmer];
+      }
+    }
+
+    /* If this segment is the best so far save it */
+    if (activeSegment.score > bestSegment.score) {
+      bestSegment = activeSegment;
+    }
+  }
+  {
+    /* Trim off the zero frequency head and tail from the segment. */
+    U32 newBegin = bestSegment.end;
+    U32 newEnd = bestSegment.begin;
+    U32 pos;
+    for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
+      U32 freq = freqs[ctx->dmerAt[pos]];
+      if (freq != 0) {
+        newBegin = MIN(newBegin, pos);
+        newEnd = pos + 1;
+      }
+    }
+    bestSegment.begin = newBegin;
+    bestSegment.end = newEnd;
+  }
+  {
+    /* Zero out the frequency of each dmer covered by the chosen segment. */
+    U32 pos;
+    for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
+      freqs[ctx->dmerAt[pos]] = 0;
+    }
+  }
+  return bestSegment;
+}
+
+/**
+ * Check the validity of the parameters.
+ * Returns non-zero if the parameters are valid and 0 otherwise.
+ */
+static int COVER_checkParameters(COVER_params_t parameters) {
+  /* k and d are required parameters */
+  if (parameters.d == 0 || parameters.k == 0) {
+    return 0;
+  }
+  /* d <= k */
+  if (parameters.d > parameters.k) {
+    return 0;
+  }
+  return 1;
+}
+
+/**
+ * Clean up a context initialized with `COVER_ctx_init()`.
+ */
+static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
+  if (!ctx) {
+    return;
+  }
+  if (ctx->suffix) {
+    free(ctx->suffix);
+    ctx->suffix = NULL;
+  }
+  if (ctx->freqs) {
+    free(ctx->freqs);
+    ctx->freqs = NULL;
+  }
+  if (ctx->dmerAt) {
+    free(ctx->dmerAt);
+    ctx->dmerAt = NULL;
+  }
+  if (ctx->offsets) {
+    free(ctx->offsets);
+    ctx->offsets = NULL;
+  }
+}
+
+/**
+ * Prepare a context for dictionary building.
+ * The context is only dependent on the parameter `d` and can used multiple
+ * times.
+ * Returns 1 on success or zero on error.
+ * The context must be destroyed with `COVER_ctx_destroy()`.
+ */
+static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
+                          const size_t *samplesSizes, unsigned nbSamples,
+                          unsigned d) {
+  const BYTE *const samples = (const BYTE *)samplesBuffer;
+  const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
+  /* Checks */
+  if (totalSamplesSize < d ||
+      totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
+    DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
+                 (COVER_MAX_SAMPLES_SIZE >> 20));
+    return 0;
+  }
+  /* Zero the context */
+  memset(ctx, 0, sizeof(*ctx));
+  DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbSamples,
+               (U32)totalSamplesSize);
+  ctx->samples = samples;
+  ctx->samplesSizes = samplesSizes;
+  ctx->nbSamples = nbSamples;
+  /* Partial suffix array */
+  ctx->suffixSize = totalSamplesSize - d + 1;
+  ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+  /* Maps index to the dmerID */
+  ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+  /* The offsets of each file */
+  ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
+  if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
+    DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
+    COVER_ctx_destroy(ctx);
+    return 0;
+  }
+  ctx->freqs = NULL;
+  ctx->d = d;
+
+  /* Fill offsets from the samlesSizes */
+  {
+    U32 i;
+    ctx->offsets[0] = 0;
+    for (i = 1; i <= nbSamples; ++i) {
+      ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
+    }
+  }
+  DISPLAYLEVEL(2, "Constructing partial suffix array\n");
+  {
+    /* suffix is a partial suffix array.
+     * It only sorts suffixes by their first parameters.d bytes.
+     * The sort is stable, so each dmer group is sorted by position in input.
+     */
+    U32 i;
+    for (i = 0; i < ctx->suffixSize; ++i) {
+      ctx->suffix[i] = i;
+    }
+    /* qsort doesn't take an opaque pointer, so pass as a global */
+    g_ctx = ctx;
+    qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), &COVER_strict_cmp);
+  }
+  DISPLAYLEVEL(2, "Computing frequencies\n");
+  /* For each dmer group (group of positions with the same first d bytes):
+   * 1. For each position we set dmerAt[position] = dmerID.  The dmerID is
+   *    (groupBeginPtr - suffix).  This allows us to go from position to
+   *    dmerID so we can look up values in freq.
+   * 2. We calculate how many samples the dmer occurs in and save it in
+   *    freqs[dmerId].
+   */
+  COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, &COVER_cmp,
+                &COVER_group);
+  ctx->freqs = ctx->suffix;
+  ctx->suffix = NULL;
+  return 1;
+}
+
+/**
+ * Given the prepared context build the dictionary.
+ */
+static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
+                                    COVER_map_t *activeDmers, void *dictBuffer,
+                                    size_t dictBufferCapacity,
+                                    COVER_params_t parameters) {
+  BYTE *const dict = (BYTE *)dictBuffer;
+  size_t tail = dictBufferCapacity;
+  /* Divide the data up into epochs of equal size.
+   * We will select at least one segment from each epoch.
+   */
+  const U32 epochs = (U32)(dictBufferCapacity / parameters.k);
+  const U32 epochSize = (U32)(ctx->suffixSize / epochs);
+  size_t epoch;
+  DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
+               epochSize);
+  /* Loop through the epochs until there are no more segments or the dictionary
+   * is full.
+   */
+  for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
+    const U32 epochBegin = (U32)(epoch * epochSize);
+    const U32 epochEnd = epochBegin + epochSize;
+    size_t segmentSize;
+    /* Select a segment */
+    COVER_segment_t segment = COVER_selectSegment(
+        ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
+    /* Trim the segment if necessary and if it is empty then we are done */
+    segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
+    if (segmentSize == 0) {
+      break;
+    }
+    /* We fill the dictionary from the back to allow the best segments to be
+     * referenced with the smallest offsets.
+     */
+    tail -= segmentSize;
+    memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
+    DISPLAYUPDATE(
+        2, "\r%u%%       ",
+        (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
+  }
+  DISPLAYLEVEL(2, "\r%79s\r", "");
+  return tail;
+}
+
+/**
+ * Translate from COVER_params_t to ZDICT_params_t required for finalizing the
+ * dictionary.
+ */
+static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
+  ZDICT_params_t zdictParams;
+  memset(&zdictParams, 0, sizeof(zdictParams));
+  zdictParams.notificationLevel = 1;
+  zdictParams.dictID = parameters.dictID;
+  zdictParams.compressionLevel = parameters.compressionLevel;
+  return zdictParams;
+}
+
+/**
+ * Constructs a dictionary using a heuristic based on the following paper:
+ *
+ * Liao, Petri, Moffat, Wirth
+ * Effective Construction of Relative Lempel-Ziv Dictionaries
+ * Published in WWW 2016.
+ */
+ZDICTLIB_API size_t COVER_trainFromBuffer(
+    void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
+    const size_t *samplesSizes, unsigned nbSamples, COVER_params_t parameters) {
+  BYTE *const dict = (BYTE *)dictBuffer;
+  COVER_ctx_t ctx;
+  COVER_map_t activeDmers;
+  /* Checks */
+  if (!COVER_checkParameters(parameters)) {
+    DISPLAYLEVEL(1, "Cover parameters incorrect\n");
+    return ERROR(GENERIC);
+  }
+  if (nbSamples == 0) {
+    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
+    return ERROR(GENERIC);
+  }
+  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
+    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
+                 ZDICT_DICTSIZE_MIN);
+    return ERROR(dstSize_tooSmall);
+  }
+  /* Initialize global data */
+  g_displayLevel = parameters.notificationLevel;
+  /* Initialize context and activeDmers */
+  if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
+                      parameters.d)) {
+    return ERROR(GENERIC);
+  }
+  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
+    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
+    COVER_ctx_destroy(&ctx);
+    return ERROR(GENERIC);
+  }
+
+  DISPLAYLEVEL(2, "Building dictionary\n");
+  {
+    const size_t tail =
+        COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
+                              dictBufferCapacity, parameters);
+    ZDICT_params_t zdictParams = COVER_translateParams(parameters);
+    const size_t dictionarySize = ZDICT_finalizeDictionary(
+        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
+        samplesBuffer, samplesSizes, nbSamples, zdictParams);
+    if (!ZSTD_isError(dictionarySize)) {
+      DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
+                   (U32)dictionarySize);
+    }
+    COVER_ctx_destroy(&ctx);
+    COVER_map_destroy(&activeDmers);
+    return dictionarySize;
+  }
+}
+
+/**
+ * COVER_best_t is used for two purposes:
+ * 1. Synchronizing threads.
+ * 2. Saving the best parameters and dictionary.
+ *
+ * All of the methods except COVER_best_init() are thread safe if zstd is
+ * compiled with multithreaded support.
+ */
+typedef struct COVER_best_s {
+#ifdef ZSTD_PTHREAD
+  pthread_mutex_t mutex;
+  pthread_cond_t cond;
+  size_t liveJobs;
+#endif
+  void *dict;
+  size_t dictSize;
+  COVER_params_t parameters;
+  size_t compressedSize;
+} COVER_best_t;
+
+/**
+ * Initialize the `COVER_best_t`.
+ */
+static void COVER_best_init(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+#ifdef ZSTD_PTHREAD
+  pthread_mutex_init(&best->mutex, NULL);
+  pthread_cond_init(&best->cond, NULL);
+  best->liveJobs = 0;
+#endif
+  best->dict = NULL;
+  best->dictSize = 0;
+  best->compressedSize = (size_t)-1;
+  memset(&best->parameters, 0, sizeof(best->parameters));
+}
+
+/**
+ * Wait until liveJobs == 0.
+ */
+static void COVER_best_wait(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+#ifdef ZSTD_PTHREAD
+  pthread_mutex_lock(&best->mutex);
+  while (best->liveJobs != 0) {
+    pthread_cond_wait(&best->cond, &best->mutex);
+  }
+  pthread_mutex_unlock(&best->mutex);
+#endif
+}
+
+/**
+ * Call COVER_best_wait() and then destroy the COVER_best_t.
+ */
+static void COVER_best_destroy(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+  COVER_best_wait(best);
+  if (best->dict) {
+    free(best->dict);
+  }
+#ifdef ZSTD_PTHREAD
+  pthread_mutex_destroy(&best->mutex);
+  pthread_cond_destroy(&best->cond);
+#endif
+}
+
+/**
+ * Called when a thread is about to be launched.
+ * Increments liveJobs.
+ */
+static void COVER_best_start(COVER_best_t *best) {
+  if (!best) {
+    return;
+  }
+#ifdef ZSTD_PTHREAD
+  pthread_mutex_lock(&best->mutex);
+  ++best->liveJobs;
+  pthread_mutex_unlock(&best->mutex);
+#endif
+}
+
+/**
+ * Called when a thread finishes executing, both on error or success.
+ * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
+ * If this dictionary is the best so far save it and its parameters.
+ */
+static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
+                              COVER_params_t parameters, void *dict,
+                              size_t dictSize) {
+  if (!best) {
+    return;
+  }
+  {
+#ifdef ZSTD_PTHREAD
+    size_t liveJobs;
+    pthread_mutex_lock(&best->mutex);
+    --best->liveJobs;
+    liveJobs = best->liveJobs;
+#endif
+    /* If the new dictionary is better */
+    if (compressedSize < best->compressedSize) {
+      /* Allocate space if necessary */
+      if (!best->dict || best->dictSize < dictSize) {
+        if (best->dict) {
+          free(best->dict);
+        }
+        best->dict = malloc(dictSize);
+        if (!best->dict) {
+          best->compressedSize = ERROR(GENERIC);
+          best->dictSize = 0;
+          return;
+        }
+      }
+      /* Save the dictionary, parameters, and size */
+      memcpy(best->dict, dict, dictSize);
+      best->dictSize = dictSize;
+      best->parameters = parameters;
+      best->compressedSize = compressedSize;
+    }
+#ifdef ZSTD_PTHREAD
+    pthread_mutex_unlock(&best->mutex);
+    if (liveJobs == 0) {
+      pthread_cond_broadcast(&best->cond);
+    }
+#endif
+  }
+}
+
+/**
+ * Parameters for COVER_tryParameters().
+ */
+typedef struct COVER_tryParameters_data_s {
+  const COVER_ctx_t *ctx;
+  COVER_best_t *best;
+  size_t dictBufferCapacity;
+  COVER_params_t parameters;
+} COVER_tryParameters_data_t;
+
+/**
+ * Tries a set of parameters and upates the COVER_best_t with the results.
+ * This function is thread safe if zstd is compiled with multithreaded support.
+ * It takes its parameters as an *OWNING* opaque pointer to support threading.
+ */
+static void COVER_tryParameters(void *opaque) {
+  /* Save parameters as local variables */
+  COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
+  const COVER_ctx_t *const ctx = data->ctx;
+  const COVER_params_t parameters = data->parameters;
+  size_t dictBufferCapacity = data->dictBufferCapacity;
+  size_t totalCompressedSize = ERROR(GENERIC);
+  /* Allocate space for hash table, dict, and freqs */
+  COVER_map_t activeDmers;
+  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
+  U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
+  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
+    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
+    goto _cleanup;
+  }
+  if (!dict || !freqs) {
+    DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
+    goto _cleanup;
+  }
+  /* Copy the frequencies because we need to modify them */
+  memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32));
+  /* Build the dictionary */
+  {
+    const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
+                                              dictBufferCapacity, parameters);
+    const ZDICT_params_t zdictParams = COVER_translateParams(parameters);
+    dictBufferCapacity = ZDICT_finalizeDictionary(
+        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
+        ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples, zdictParams);
+    if (ZDICT_isError(dictBufferCapacity)) {
+      DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
+      goto _cleanup;
+    }
+  }
+  /* Check total compressed size */
+  {
+    /* Pointers */
+    ZSTD_CCtx *cctx;
+    ZSTD_CDict *cdict;
+    void *dst;
+    /* Local variables */
+    size_t dstCapacity;
+    size_t i;
+    /* Allocate dst with enough space to compress the maximum sized sample */
+    {
+      size_t maxSampleSize = 0;
+      for (i = 0; i < ctx->nbSamples; ++i) {
+        maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
+      }
+      dstCapacity = ZSTD_compressBound(maxSampleSize);
+      dst = malloc(dstCapacity);
+    }
+    /* Create the cctx and cdict */
+    cctx = ZSTD_createCCtx();
+    cdict =
+        ZSTD_createCDict(dict, dictBufferCapacity, parameters.compressionLevel);
+    if (!dst || !cctx || !cdict) {
+      goto _compressCleanup;
+    }
+    /* Compress each sample and sum their sizes (or error) */
+    totalCompressedSize = 0;
+    for (i = 0; i < ctx->nbSamples; ++i) {
+      const size_t size = ZSTD_compress_usingCDict(
+          cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
+          ctx->samplesSizes[i], cdict);
+      if (ZSTD_isError(size)) {
+        totalCompressedSize = ERROR(GENERIC);
+        goto _compressCleanup;
+      }
+      totalCompressedSize += size;
+    }
+  _compressCleanup:
+    ZSTD_freeCCtx(cctx);
+    ZSTD_freeCDict(cdict);
+    if (dst) {
+      free(dst);
+    }
+  }
+
+_cleanup:
+  COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
+                    dictBufferCapacity);
+  free(data);
+  COVER_map_destroy(&activeDmers);
+  if (dict) {
+    free(dict);
+  }
+  if (freqs) {
+    free(freqs);
+  }
+}
+
+ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
+                                                  size_t dictBufferCapacity,
+                                                  const void *samplesBuffer,
+                                                  const size_t *samplesSizes,
+                                                  unsigned nbSamples,
+                                                  COVER_params_t *parameters) {
+  /* constants */
+  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
+  const unsigned kMaxD = parameters->d == 0 ? 16 : parameters->d;
+  const unsigned kMinK = parameters->k == 0 ? kMaxD : parameters->k;
+  const unsigned kMaxK = parameters->k == 0 ? 2048 : parameters->k;
+  const unsigned kSteps = parameters->steps == 0 ? 256 : parameters->steps;
+  const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
+  const unsigned kIterations =
+      (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
+  /* Local variables */
+  const int displayLevel = parameters->notificationLevel;
+  unsigned iteration = 1;
+  unsigned d;
+  unsigned k;
+  COVER_best_t best;
+  /* Checks */
+  if (kMinK < kMaxD || kMaxK < kMinK) {
+    LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
+    return ERROR(GENERIC);
+  }
+  if (nbSamples == 0) {
+    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
+    return ERROR(GENERIC);
+  }
+  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
+    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
+                 ZDICT_DICTSIZE_MIN);
+    return ERROR(dstSize_tooSmall);
+  }
+  /* Initialization */
+  COVER_best_init(&best);
+  /* Turn down global display level to clean up display at level 2 and below */
+  g_displayLevel = parameters->notificationLevel - 1;
+  /* Loop through d first because each new value needs a new context */
+  LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
+                    kIterations);
+  for (d = kMinD; d <= kMaxD; d += 2) {
+    /* Initialize the context for this value of d */
+    COVER_ctx_t ctx;
+    LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
+    if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d)) {
+      LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
+      COVER_best_destroy(&best);
+      return ERROR(GENERIC);
+    }
+    /* Loop through k reusing the same context */
+    for (k = kMinK; k <= kMaxK; k += kStepSize) {
+      /* Prepare the arguments */
+      COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc(
+          sizeof(COVER_tryParameters_data_t));
+      LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
+      if (!data) {
+        LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
+        COVER_best_destroy(&best);
+        COVER_ctx_destroy(&ctx);
+        return ERROR(GENERIC);
+      }
+      data->ctx = &ctx;
+      data->best = &best;
+      data->dictBufferCapacity = dictBufferCapacity;
+      data->parameters = *parameters;
+      data->parameters.k = k;
+      data->parameters.d = d;
+      data->parameters.steps = kSteps;
+      /* Check the parameters */
+      if (!COVER_checkParameters(data->parameters)) {
+        DISPLAYLEVEL(1, "Cover parameters incorrect\n");
+        continue;
+      }
+      /* Call the function and pass ownership of data to it */
+      COVER_best_start(&best);
+      COVER_tryParameters(data);
+      /* Print status */
+      LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%%       ",
+                         (U32)((iteration * 100) / kIterations));
+      ++iteration;
+    }
+    COVER_best_wait(&best);
+    COVER_ctx_destroy(&ctx);
+  }
+  LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
+  /* Fill the output buffer and parameters with output of the best parameters */
+  {
+    const size_t dictSize = best.dictSize;
+    if (ZSTD_isError(best.compressedSize)) {
+      COVER_best_destroy(&best);
+      return best.compressedSize;
+    }
+    *parameters = best.parameters;
+    memcpy(dictBuffer, best.dict, dictSize);
+    COVER_best_destroy(&best);
+    return dictSize;
+  }
+}
diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h
index 63b8f072..1b3bcb5b 100644
--- a/lib/dictBuilder/zdict.h
+++ b/lib/dictBuilder/zdict.h
@@ -86,6 +86,57 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dict
                                 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                                 ZDICT_params_t parameters);
 
+/*! COVER_params_t :
+    For all values 0 means default.
+    kMin and d are the only required parameters.
+*/
+typedef struct {
+    unsigned k;                  /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
+    unsigned d;                  /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
+    unsigned steps;              /* Number of steps : Only used for optimization : 0 means default (256) : Higher means more parameters checked */
+
+    unsigned notificationLevel;  /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
+    unsigned dictID;             /* 0 means auto mode (32-bits random value); other : force dictID value */
+    int      compressionLevel;   /* 0 means default; target a specific zstd compression level */
+} COVER_params_t;
+
+
+/*! COVER_trainFromBuffer() :
+    Train a dictionary from an array of samples using the COVER algorithm.
+    Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+    supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+    The resulting dictionary will be saved into `dictBuffer`.
+    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+              or an error code, which can be tested with ZDICT_isError().
+    Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
+    Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
+           It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
+           In general, it's recommended to provide a few thousands samples, but this can vary a lot.
+           It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
+*/
+ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
+                              const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                              COVER_params_t parameters);
+
+/*! COVER_optimizeTrainFromBuffer() :
+    The same requirements as above hold for all the parameters except `parameters`.
+    This function tries many parameter combinations and picks the best parameters.
+    `*parameters` is filled with the best parameters found, and the dictionary
+    constructed with those parameters is stored in `dictBuffer`.
+
+    All of the parameters d, k, steps are optional.
+    If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
+    if steps is zero it defaults to its default value.
+    If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
+
+    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+              or an error code, which can be tested with ZDICT_isError().
+              On success `*parameters` contains the parameters selected.
+    Note : COVER_optimizeTrainFromBuffer() requires about 9 bytes of memory for each input byte.
+*/
+ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
+                                     const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
+                                     COVER_params_t *parameters);
 
 /*! ZDICT_finalizeDictionary() :
 
diff --git a/lib/zstd.h b/lib/zstd.h
index 198f45ea..a0d5c785 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -510,7 +510,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
 /*=====   Advanced Streaming compression functions  =====*/
 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
 ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize);   /**< pledgedSrcSize must be correct */
-ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
 ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
                                              ZSTD_parameters params, unsigned long long pledgedSrcSize);  /**< pledgedSrcSize is optional and can be zero == unknown */
 ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);  /**< note : cdict will just be referenced, and must outlive compression session */
@@ -521,7 +521,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
 /*=====   Advanced Streaming decompression functions  =====*/
 typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e;
 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */
 ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
 ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);  /**< note : ddict will just be referenced, and must outlive decompression session */
 ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);  /**< re-use decompression parameters from previous init; saves dictionary loading */
diff --git a/programs/Makefile b/programs/Makefile
index f2a0ff26..4392939d 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -2,6 +2,8 @@
 # Copyright (c) 2015-present, Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
+# This Makefile is validated for Linux, macOS, *BSD, Hurd, Solaris, MSYS2 targets
+#
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree. An additional grant
 # of patent rights can be found in the PATENTS file in the same directory.
diff --git a/programs/dibio.c b/programs/dibio.c
index b95bab34..5ef202c8 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -42,6 +42,7 @@
 
 #define SAMPLESIZE_MAX (128 KB)
 #define MEMMULT 11    /* rough estimation : memory cost to analyze 1 byte of sample */
+#define COVER_MEMMULT 9    /* rough estimation : memory cost to analyze 1 byte of sample */
 static const size_t maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
 
 #define NOISELENGTH 32
@@ -118,10 +119,36 @@ static unsigned DiB_loadFiles(void* buffer, size_t* bufferSizePtr,
             fileSizes[n] = fileSize;
             fclose(f);
     }   }
+    DISPLAYLEVEL(2, "\r%79s\r", "");
     *bufferSizePtr = pos;
     return n;
 }
 
+#define DiB_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+static U32 DiB_rand(U32* src)
+{
+    static const U32 prime1 = 2654435761U;
+    static const U32 prime2 = 2246822519U;
+    U32 rand32 = *src;
+    rand32 *= prime1;
+    rand32 ^= prime2;
+    rand32  = DiB_rotl32(rand32, 13);
+    *src = rand32;
+    return rand32 >> 5;
+}
+
+static void DiB_shuffle(const char** fileNamesTable, unsigned nbFiles) {
+  /* Initialize the pseudorandom number generator */
+  U32 seed = 0xFD2FB528;
+  unsigned i;
+  for (i = nbFiles - 1; i > 0; --i) {
+    unsigned const j = DiB_rand(&seed) % (i + 1);
+    const char* tmp = fileNamesTable[j];
+    fileNamesTable[j] = fileNamesTable[i];
+    fileNamesTable[i] = tmp;
+  }
+}
+
 
 /*-********************************************************
 *  Dictionary training functions
@@ -202,19 +229,23 @@ size_t ZDICT_trainFromBuffer_unsafe(void* dictBuffer, size_t dictBufferCapacity,
 
 int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
                        const char** fileNamesTable, unsigned nbFiles,
-                       ZDICT_params_t params)
+                       ZDICT_params_t *params, COVER_params_t *coverParams,
+                       int optimizeCover)
 {
     void* const dictBuffer = malloc(maxDictSize);
     size_t* const fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
     unsigned long long const totalSizeToLoad = DiB_getTotalCappedFileSize(fileNamesTable, nbFiles);
-    size_t const maxMem =  DiB_findMaxMem(totalSizeToLoad * MEMMULT) / MEMMULT;
+    size_t const memMult = params ? MEMMULT : COVER_MEMMULT;
+    size_t const maxMem =  DiB_findMaxMem(totalSizeToLoad * memMult) / memMult;
     size_t benchedSize = (size_t) MIN ((unsigned long long)maxMem, totalSizeToLoad);
     void* const srcBuffer = malloc(benchedSize+NOISELENGTH);
     int result = 0;
 
     /* Checks */
+    if (params) g_displayLevel = params->notificationLevel;
+    else if (coverParams) g_displayLevel = coverParams->notificationLevel;
+    else EXM_THROW(13, "Neither dictionary algorith selected");   /* should not happen */
     if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles");   /* should not happen */
-    g_displayLevel = params.notificationLevel;
     if (g_tooLargeSamples) {
         DISPLAYLEVEL(2, "!  Warning : some samples are very large \n");
         DISPLAYLEVEL(2, "!  Note that dictionary is only useful for small files or beginning of large files. \n");
@@ -233,12 +264,29 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
         DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(benchedSize >> 20));
 
     /* Load input buffer */
+    DISPLAYLEVEL(3, "Shuffling input files\n");
+    DiB_shuffle(fileNamesTable, nbFiles);
     nbFiles = DiB_loadFiles(srcBuffer, &benchedSize, fileSizes, fileNamesTable, nbFiles);
-    DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH);   /* guard band, for end of buffer condition */
 
-    {   size_t const dictSize = ZDICT_trainFromBuffer_unsafe(dictBuffer, maxDictSize,
-                            srcBuffer, fileSizes, nbFiles,
-                            params);
+    {
+        size_t dictSize;
+        if (params) {
+            DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH);   /* guard band, for end of buffer condition */
+            dictSize = ZDICT_trainFromBuffer_unsafe(dictBuffer, maxDictSize,
+                                                    srcBuffer, fileSizes, nbFiles,
+                                                    *params);
+        } else if (optimizeCover) {
+            dictSize = COVER_optimizeTrainFromBuffer(
+                dictBuffer, maxDictSize, srcBuffer, fileSizes, nbFiles,
+                coverParams);
+            if (!ZDICT_isError(dictSize)) {
+              DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\n", coverParams->k, coverParams->d, coverParams->steps);
+            }
+        } else {
+            dictSize = COVER_trainFromBuffer(dictBuffer, maxDictSize,
+                                             srcBuffer, fileSizes, nbFiles,
+                                             *coverParams);
+        }
         if (ZDICT_isError(dictSize)) {
             DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize));   /* should not happen */
             result = 1;
diff --git a/programs/dibio.h b/programs/dibio.h
index 6780d869..e61d0042 100644
--- a/programs/dibio.h
+++ b/programs/dibio.h
@@ -32,7 +32,7 @@
 */
 int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
                        const char** fileNamesTable, unsigned nbFiles,
-                       ZDICT_params_t parameters);
-
+                       ZDICT_params_t *params, COVER_params_t *coverParams,
+                       int optimizeCover);
 
 #endif
diff --git a/programs/zstd.1 b/programs/zstd.1
index 9b10b187..db79be59 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -69,6 +69,8 @@ from standard input if it is a terminal.
 .PP
 Unless
 .B \-\-stdout
+or
+.B \-o
 is specified,
 .I files
 are written to a new file whose name is derived from the source
@@ -159,7 +161,8 @@ No files are created or removed.
  # compression level [1-19] (default:3)
 .TP
 .BR \--ultra
- unlocks high compression levels 20+ (maximum 22), using a lot more memory
+ unlocks high compression levels 20+ (maximum 22), using a lot more memory.
+Note that decompression will also require more memory when using these levels.
 .TP
 .B \-D file
  use `file` as Dictionary to compress or decompress FILE(s)
@@ -293,7 +296,7 @@ There are 8 strategies numbered from 0 to 7, from faster to stronger:
 .PD
 Specify the maximum number of bits for a match distance.
 .IP ""
-The higher number of bits increases the chance to find a match what usually improves compression ratio. 
+The higher number of bits increases the chance to find a match what usually improves compression ratio.
 It also increases memory requirements for compressor and decompressor.
 .IP ""
 The minimum \fIwlog\fR is 10 (1 KiB) and the maximum is 25 (32 MiB) for 32-bit compilation and 27 (128 MiB) for 64-bit compilation.
@@ -319,7 +322,7 @@ The minimum \fIhlog\fR is 6 (64 B) and the maximum is 25 (32 MiB) for 32-bit com
 .PD
 Specify the maximum number of bits for a hash chain or a binary tree.
 .IP ""
-The higher number of bits increases the chance to find a match what usually improves compression ratio. 
+The higher number of bits increases the chance to find a match what usually improves compression ratio.
 It also slows down compression speed and increases memory requirements for compression.
 This option is ignored for the ZSTD_fast strategy.
 .IP ""
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index de25d0f0..785ecede 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -130,6 +130,8 @@ static int usage_advanced(const char* programName)
     DISPLAY( "\n");
     DISPLAY( "Dictionary builder :\n");
     DISPLAY( "--train ## : create a dictionary from a training set of files \n");
+    DISPLAY( "--cover=k=#,d=# : use the cover algorithm with parameters k and d \n");
+    DISPLAY( "--optimize-cover[=steps=#,k=#,d=#] : optimize cover parameters with optional parameters\n");
     DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName);
     DISPLAY( "--maxdict ## : limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize);
     DISPLAY( " -s#    : dictionary selectivity level (default: %u)\n", g_defaultSelectivityLevel);
@@ -195,6 +197,27 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
 }
 
 
+#ifndef ZSTD_NODICT
+/**
+ * parseCoverParameters() :
+ * reads cover parameters from *stringPtr (e.g. "--cover=smoothing=100,kmin=48,kstep=4,kmax=64,d=8") into *params
+ * @return 1 means that cover parameters were correct
+ * @return 0 in case of malformed parameters
+ */
+static unsigned parseCoverParameters(const char* stringPtr, COVER_params_t *params)
+{
+    memset(params, 0, sizeof(*params));
+    for (; ;) {
+        if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
+        return 0;
+    }
+    if (stringPtr[0] != 0) return 0;
+    DISPLAYLEVEL(4, "k=%u\nd=%u\nsteps=%u\n", params->k, params->d, params->steps);
+    return 1;
+}
+#endif
 /** parseCompressionParameters() :
  *  reads compression parameters from *stringPtr (e.g. "--zstd=wlog=23,clog=23,hlog=22,slog=6,slen=3,tlen=48,strat=6") into *params
  *  @return 1 means that compression parameters were correct
@@ -260,6 +283,10 @@ int main(int argCount, const char* argv[])
     char* fileNamesBuf = NULL;
     unsigned fileNamesNb;
 #endif
+#ifndef ZSTD_NODICT
+    COVER_params_t coverParams;
+    int cover = 0;
+#endif
 
     /* init */
     (void)recursive; (void)cLevelLast;    /* not used when ZSTD_NOBENCH set */
@@ -324,6 +351,20 @@ int main(int argCount, const char* argv[])
                     if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; }
 
                     /* long commands with arguments */
+#ifndef  ZSTD_NODICT
+                    if (longCommandWArg(&argument, "--cover=")) {
+                      cover=1; if (!parseCoverParameters(argument, &coverParams)) CLEAN_RETURN(badusage(programName));
+                      continue;
+                    }
+                    if (longCommandWArg(&argument, "--optimize-cover")) {
+                      cover=2;
+                      /* Allow optional arguments following an = */
+                      if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
+                      else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
+                      else if (!parseCoverParameters(argument, &coverParams)) { CLEAN_RETURN(badusage(programName)); }
+                      continue;
+                    }
+#endif
                     if (longCommandWArg(&argument, "--memlimit=")) { memLimit = readU32FromChar(&argument); continue; }
                     if (longCommandWArg(&argument, "--memory=")) { memLimit = readU32FromChar(&argument); continue; }
                     if (longCommandWArg(&argument, "--memlimit-decompress=")) { memLimit = readU32FromChar(&argument); continue; }
@@ -533,13 +574,20 @@ int main(int argCount, const char* argv[])
     /* Check if dictionary builder is selected */
     if (operation==zom_train) {
 #ifndef ZSTD_NODICT
-        ZDICT_params_t dictParams;
-        memset(&dictParams, 0, sizeof(dictParams));
-        dictParams.compressionLevel = dictCLevel;
-        dictParams.selectivityLevel = dictSelect;
-        dictParams.notificationLevel = displayLevel;
-        dictParams.dictID = dictID;
-        DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, dictParams);
+        if (cover) {
+            coverParams.compressionLevel = dictCLevel;
+            coverParams.notificationLevel = displayLevel;
+            coverParams.dictID = dictID;
+            DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, NULL, &coverParams, cover - 1);
+        } else {
+            ZDICT_params_t dictParams;
+            memset(&dictParams, 0, sizeof(dictParams));
+            dictParams.compressionLevel = dictCLevel;
+            dictParams.selectivityLevel = dictSelect;
+            dictParams.notificationLevel = displayLevel;
+            dictParams.dictID = dictID;
+            DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, &dictParams, NULL, 0);
+        }
 #endif
         goto _end;
     }
diff --git a/tests/Makefile b/tests/Makefile
index bbc8d3de..937f3b41 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -2,6 +2,8 @@
 # Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
 # All rights reserved.
 #
+# This Makefile is validated for Linux, macOS, *BSD, Hurd, Solaris, MSYS2 targets
+#
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree. An additional grant
 # of patent rights can be found in the PATENTS file in the same directory.
@@ -121,10 +123,10 @@ zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c zbufftest.c
 	$(MAKE) -C $(ZSTDDIR) libzstd
 	$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@$(EXT)
 
-zstreamtest  : $(ZSTD_FILES) $(PRGDIR)/datagen.c zstreamtest.c
+zstreamtest  : $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c zstreamtest.c
 	$(CC)      $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT)
 
-zstreamtest32 : $(ZSTD_FILES) $(PRGDIR)/datagen.c zstreamtest.c
+zstreamtest32 : $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c zstreamtest.c
 	$(CC) -m32  $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT)
 
 zstreamtest-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd
diff --git a/tests/fuzzer.c b/tests/fuzzer.c
index 86d4c6be..00cfb057 100644
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@@ -28,6 +28,7 @@
 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_compressContinue, ZSTD_compressBlock */
 #include "zstd.h"         /* ZSTD_VERSION_STRING */
 #include "zstd_errors.h"  /* ZSTD_getErrorCode */
+#define ZDICT_STATIC_LINKING_ONLY
 #include "zdict.h"        /* ZDICT_trainFromBuffer */
 #include "datagen.h"      /* RDG_genBuffer */
 #include "mem.h"
@@ -311,6 +312,70 @@ static int basicUnitTests(U32 seed, double compressibility)
                   if (r != CNBuffSize) goto _output_error);
         DISPLAYLEVEL(4, "OK \n");
 
+        DISPLAYLEVEL(4, "test%3i : dictionary containing only header should return error : ", testNb++);
+        {
+          const size_t ret = ZSTD_decompress_usingDict(
+              dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize,
+              "\x37\xa4\x30\xec\x11\x22\x33\x44", 8);
+          if (ZSTD_getErrorCode(ret) != ZSTD_error_dictionary_corrupted) goto _output_error;
+        }
+        DISPLAYLEVEL(4, "OK \n");
+
+        ZSTD_freeCCtx(cctx);
+        ZSTD_freeDCtx(dctx);
+        free(dictBuffer);
+        free(samplesSizes);
+    }
+
+    /* COVER dictionary builder tests */
+    {   ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+        ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+        size_t dictSize = 16 KB;
+        size_t optDictSize = dictSize;
+        void* dictBuffer = malloc(dictSize);
+        size_t const totalSampleSize = 1 MB;
+        size_t const sampleUnitSize = 8 KB;
+        U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
+        size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
+        COVER_params_t params;
+        U32 dictID;
+
+        if (dictBuffer==NULL || samplesSizes==NULL) {
+            free(dictBuffer);
+            free(samplesSizes);
+            goto _output_error;
+        }
+
+        DISPLAYLEVEL(4, "test%3i : COVER_trainFromBuffer : ", testNb++);
+        { U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
+        memset(&params, 0, sizeof(params));
+        params.d = 1 + (FUZ_rand(&seed) % 16);
+        params.k = params.d + (FUZ_rand(&seed) % 256);
+        dictSize = COVER_trainFromBuffer(dictBuffer, dictSize,
+                                         CNBuffer, samplesSizes, nbSamples,
+                                         params);
+        if (ZDICT_isError(dictSize)) goto _output_error;
+        DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)dictSize);
+
+        DISPLAYLEVEL(4, "test%3i : check dictID : ", testNb++);
+        dictID = ZDICT_getDictID(dictBuffer, dictSize);
+        if (dictID==0) goto _output_error;
+        DISPLAYLEVEL(4, "OK : %u \n", dictID);
+
+        DISPLAYLEVEL(4, "test%3i : COVER_optimizeTrainFromBuffer : ", testNb++);
+        memset(&params, 0, sizeof(params));
+        params.steps = 4;
+        optDictSize = COVER_optimizeTrainFromBuffer(dictBuffer, optDictSize,
+                                                    CNBuffer, samplesSizes, nbSamples,
+                                                    &params);
+        if (ZDICT_isError(optDictSize)) goto _output_error;
+        DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)optDictSize);
+
+        DISPLAYLEVEL(4, "test%3i : check dictID : ", testNb++);
+        dictID = ZDICT_getDictID(dictBuffer, optDictSize);
+        if (dictID==0) goto _output_error;
+        DISPLAYLEVEL(4, "OK : %u \n", dictID);
+
         ZSTD_freeCCtx(cctx);
         ZSTD_freeDCtx(dctx);
         free(dictBuffer);
diff --git a/tests/playTests.sh b/tests/playTests.sh
index dfc90c33..5bb882aa 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -255,6 +255,27 @@ rm -rf dirTestDict
 rm tmp*
 
 
+$ECHO "\n**** cover dictionary tests **** "
+
+TESTFILE=../programs/zstdcli.c
+./datagen > tmpDict
+$ECHO "- Create first dictionary"
+$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c -o tmpDict
+cp $TESTFILE tmp
+$ZSTD -f tmp -D tmpDict
+$ZSTD -d tmp.zst -D tmpDict -fo result
+$DIFF $TESTFILE result
+$ECHO "- Create second (different) dictionary"
+$ZSTD --train --cover=k=56,d=8 *.c ../programs/*.c ../programs/*.h -o tmpDictC
+$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
+$ECHO "- Create dictionary with short dictID"
+$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c --dictID 1 -o tmpDict1
+cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
+$ECHO "- Create dictionary with size limit"
+$ZSTD --train --optimize-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict 4K
+rm tmp*
+
+
 $ECHO "\n**** integrity tests **** "
 
 $ECHO "test one file (tmp1.zst) "
diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c
index 1feec450..9efba323 100644
--- a/tests/zstreamtest.c
+++ b/tests/zstreamtest.c
@@ -26,10 +26,11 @@
 #include <time.h>         /* clock_t, clock() */
 #include <string.h>       /* strcmp */
 #include "mem.h"
-#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_maxCLevel, ZSTD_customMem */
+#define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_maxCLevel, ZSTD_customMem, ZSTD_getDictID_fromFrame */
 #include "zstd.h"         /* ZSTD_compressBound */
 #include "zstd_errors.h"  /* ZSTD_error_srcSize_wrong */
 #include "zstdmt_compress.h"
+#include "zdict.h"        /* ZDICT_trainFromBuffer */
 #include "datagen.h"      /* RDG_genBuffer */
 #define XXH_STATIC_LINKING_ONLY   /* XXH64_state_t */
 #include "xxhash.h"       /* XXH64_* */
@@ -45,8 +46,7 @@
 static const U32 nbTestsDefault = 10000;
 #define COMPRESSIBLE_NOISE_LENGTH (10 MB)
 #define FUZ_COMPRESSIBILITY_DEFAULT 50
-static const U32 prime1 = 2654435761U;
-static const U32 prime2 = 2246822519U;
+static const U32 prime32 = 2654435761U;
 
 
 /*-************************************
@@ -82,8 +82,9 @@ static clock_t FUZ_GetClockSpan(clock_t clockStart)
 #define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
 unsigned int FUZ_rand(unsigned int* seedPtr)
 {
+    static const U32 prime2 = 2246822519U;
     U32 rand32 = *seedPtr;
-    rand32 *= prime1;
+    rand32 *= prime32;
     rand32 += prime2;
     rand32  = FUZ_rotl32(rand32, 13);
     *seedPtr = rand32;
@@ -108,6 +109,41 @@ static void freeFunction(void* opaque, void* address)
 *   Basic Unit tests
 ======================================================*/
 
+typedef struct {
+    void* start;
+    size_t size;
+    size_t filled;
+} buffer_t;
+
+static const buffer_t g_nullBuffer = { NULL, 0 , 0 };
+
+static buffer_t FUZ_createDictionary(const void* src, size_t srcSize, size_t blockSize, size_t requestedDictSize)
+{
+    buffer_t dict = { NULL, 0, 0 };
+    size_t const nbBlocks = (srcSize + (blockSize-1)) / blockSize;
+    size_t* const blockSizes = (size_t*) malloc(nbBlocks * sizeof(size_t));
+    if (!blockSizes) return dict;
+    dict.start = malloc(requestedDictSize);
+    if (!dict.start) { free(blockSizes); return dict; }
+    {   size_t nb;
+        for (nb=0; nb<nbBlocks-1; nb++) blockSizes[nb] = blockSize;
+        blockSizes[nbBlocks-1] = srcSize - (blockSize * (nbBlocks-1));
+    }
+    {   size_t const dictSize = ZDICT_trainFromBuffer(dict.start, requestedDictSize, src, blockSizes, (unsigned)nbBlocks);
+        free(blockSizes);
+        if (ZDICT_isError(dictSize)) { free(dict.start); return (buffer_t){ NULL, 0, 0 }; }
+        dict.size = requestedDictSize;
+        dict.filled = dictSize;
+        return dict;   /* how to return dictSize ? */
+    }
+}
+
+static void FUZ_freeDictionary(buffer_t dict)
+{
+    free(dict.start);
+}
+
+
 static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem customMem)
 {
     size_t const CNBufferSize = COMPRESSIBLE_NOISE_LENGTH;
@@ -124,14 +160,25 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
     ZSTD_DStream* zd = ZSTD_createDStream_advanced(customMem);
     ZSTD_inBuffer  inBuff, inBuff2;
     ZSTD_outBuffer outBuff;
+    buffer_t dictionary = g_nullBuffer;
+    unsigned dictID = 0;
 
     /* Create compressible test buffer */
     if (!CNBuffer || !compressedBuffer || !decodedBuffer || !zc || !zd) {
-        DISPLAY("Not enough memory, aborting\n");
+        DISPLAY("Not enough memory, aborting \n");
         goto _output_error;
     }
     RDG_genBuffer(CNBuffer, CNBufferSize, compressibility, 0., seed);
 
+    /* Create dictionary */
+    MEM_STATIC_ASSERT(COMPRESSIBLE_NOISE_LENGTH >= 4 MB);
+    dictionary = FUZ_createDictionary(CNBuffer, 4 MB, 4 KB, 40 KB);
+    if (!dictionary.start) {
+        DISPLAY("Error creating dictionary, aborting \n");
+        goto _output_error;
+    }
+    dictID = ZDICT_getDictID(dictionary.start, dictionary.filled);
+
     /* generate skippable frame */
     MEM_writeLE32(compressedBuffer, ZSTD_MAGIC_SKIPPABLE_START);
     MEM_writeLE32(((char*)compressedBuffer)+4, (U32)skippableFrameSize);
@@ -319,7 +366,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
 
     /* CDict scenario */
     DISPLAYLEVEL(3, "test%3i : digested dictionary : ", testNb++);
-    {   ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, 128 KB, 1);
+    {   ZSTD_CDict* const cdict = ZSTD_createCDict(dictionary.start, dictionary.filled, 1);
         size_t const initError = ZSTD_initCStream_usingCDict(zc, cdict);
         if (ZSTD_isError(initError)) goto _output_error;
         cSize = 0;
@@ -345,9 +392,15 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
       DISPLAYLEVEL(3, "OK (%u bytes) \n", (U32)s);
     }
 
+    DISPLAYLEVEL(4, "test%3i : check Dictionary ID : ", testNb++);
+    { unsigned const dID = ZSTD_getDictID_fromFrame(compressedBuffer, cSize);
+      if (dID != dictID) goto _output_error;
+      DISPLAYLEVEL(4, "OK (%u) \n", dID);
+    }
+
     /* DDict scenario */
     DISPLAYLEVEL(3, "test%3i : decompress %u bytes with digested dictionary : ", testNb++, (U32)CNBufferSize);
-    {   ZSTD_DDict* const ddict = ZSTD_createDDict(CNBuffer, 128 KB);
+    {   ZSTD_DDict* const ddict = ZSTD_createDDict(dictionary.start, dictionary.filled);
         size_t const initError = ZSTD_initDStream_usingDDict(zd, ddict);
         if (ZSTD_isError(initError)) goto _output_error;
         inBuff.src = compressedBuffer;
@@ -387,6 +440,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
 
 
 _end:
+    FUZ_freeDictionary(dictionary);
     ZSTD_freeCStream(zc);
     ZSTD_freeDStream(zd);
     free(CNBuffer);
@@ -492,7 +546,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres
         if (nbTests >= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u    ", testNb, nbTests); }
         else { DISPLAYUPDATE(2, "\r%6u          ", testNb); }
         FUZ_rand(&coreSeed);
-        lseed = coreSeed ^ prime1;
+        lseed = coreSeed ^ prime32;
 
         /* states full reset (deliberately not synchronized) */
         /* some issues can only happen when reusing states */
@@ -721,7 +775,7 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp
         if (nbTests >= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u    ", testNb, nbTests); }
         else { DISPLAYUPDATE(2, "\r%6u          ", testNb); }
         FUZ_rand(&coreSeed);
-        lseed = coreSeed ^ prime1;
+        lseed = coreSeed ^ prime32;
 
         /* states full reset (deliberately not synchronized) */
         /* some issues can only happen when reusing states */
diff --git a/zlibWrapper/.gitignore b/zlibWrapper/.gitignore
index 8ce15613..23d2f3a6 100644
--- a/zlibWrapper/.gitignore
+++ b/zlibWrapper/.gitignore
@@ -20,3 +20,6 @@ zwrapbench
 *.bat
 *.zip
 *.txt
+
+# Directories
+minizip/
\ No newline at end of file
diff --git a/zlibWrapper/gzcompatibility.h b/zlibWrapper/gzcompatibility.h
index a4f275e1..e2ec1add 100644
--- a/zlibWrapper/gzcompatibility.h
+++ b/zlibWrapper/gzcompatibility.h
@@ -43,3 +43,25 @@ ZEXTERN gzFile         ZEXPORT gzopen_w OF((const wchar_t *path,
                                             const char *mode));
 #endif
 #endif
+
+
+#if ZLIB_VERNUM < 0x12B0
+#ifdef Z_SOLO
+   typedef unsigned long z_size_t;
+#else
+#  define z_longlong long long
+#  if defined(NO_SIZE_T)
+     typedef unsigned NO_SIZE_T z_size_t;
+#  elif defined(STDC)
+#    include <stddef.h>
+     typedef size_t z_size_t;
+#  else
+     typedef unsigned long z_size_t;
+#  endif
+#  undef z_longlong
+#endif
+ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems,
+                                     gzFile file));
+ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size,
+                                      z_size_t nitems, gzFile file));
+#endif
diff --git a/zlibWrapper/gzguts.h b/zlibWrapper/gzguts.h
index 40536de4..84651b88 100644
--- a/zlibWrapper/gzguts.h
+++ b/zlibWrapper/gzguts.h
@@ -3,7 +3,7 @@
  * - gz_statep was converted to union to work with -Wstrict-aliasing=1      */
 
 /* gzguts.h -- zlib internal header definitions for gz* operations
- * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler
+ * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
  * For conditions of distribution and use, see http://www.zlib.net/zlib_license.html
  */
 
@@ -30,6 +30,10 @@
 #  include <stdlib.h>
 #  include <limits.h>
 #endif
+
+#ifndef _POSIX_SOURCE
+#  define _POSIX_SOURCE
+#endif
 #include <fcntl.h>
 
 #ifdef _WIN32
@@ -40,6 +44,10 @@
 #  include <io.h>
 #endif
 
+#if defined(_WIN32) || defined(__CYGWIN__)
+#  define WIDECHAR
+#endif
+
 #ifdef WINAPI_FAMILY
 #  define open _open
 #  define read _read
@@ -100,18 +108,19 @@
 #  endif
 #endif
 
-/* unlike snprintf (which is required in C99, yet still not supported by
-   Microsoft more than a decade later!), _snprintf does not guarantee null
-   termination of the result -- however this is only used in gzlib.c where
+/* unlike snprintf (which is required in C99), _snprintf does not guarantee
+   null termination of the result -- however this is only used in gzlib.c where
    the result is assured to fit in the space provided */
-#ifdef _MSC_VER
+#if defined(_MSC_VER) && _MSC_VER < 1900
 #  define snprintf _snprintf
 #endif
 
 #ifndef local
 #  define local static
 #endif
-/* compile with -Dlocal if your debugger can't find static symbols */
+/* since "static" is used to mean two completely different things in C, we
+   define "local" for the non-static meaning of "static", for readability
+   (compile with -Dlocal if your debugger can't find static symbols) */
 
 /* gz* functions always use library allocation functions */
 #ifndef STDC
@@ -175,7 +184,7 @@ typedef struct {
     char *path;             /* path or fd for error messages */
     unsigned size;          /* buffer size, zero if not allocated yet */
     unsigned want;          /* requested buffer size, default is GZBUFSIZE */
-    unsigned char *in;      /* input buffer */
+    unsigned char *in;      /* input buffer (double-sized when writing) */
     unsigned char *out;     /* output buffer (double-sized when reading) */
     int direct;             /* 0 if processing gzip, 1 if transparent */
         /* just for reading */
diff --git a/zlibWrapper/gzlib.c b/zlibWrapper/gzlib.c
index 932319af..aa94206a 100644
--- a/zlibWrapper/gzlib.c
+++ b/zlibWrapper/gzlib.c
@@ -1,14 +1,14 @@
 /* gzlib.c contains minimal changes required to be compiled with zlibWrapper:
- * - gz_statep was converted to union to work with -Wstrict-aliasing=1      */
+ * - gz_statep was converted to union to work with -Wstrict-aliasing=1      */ 
 
 /* gzlib.c -- zlib functions common to reading and writing gzip files
- * Copyright (C) 2004, 2010, 2011, 2012, 2013 Mark Adler
+ * Copyright (C) 2004-2017 Mark Adler
  * For conditions of distribution and use, see http://www.zlib.net/zlib_license.html
  */
 
 #include "gzguts.h"
 
-#if defined(_WIN32) && !defined(__BORLANDC__)
+#if defined(_WIN32) && !defined(__BORLANDC__) && !defined(__MINGW32__)
 #  define LSEEK _lseeki64
 #else
 #if defined(_LARGEFILE64_SOURCE) && _LFS64_LARGEFILE-0
@@ -97,7 +97,7 @@ local gzFile gz_open(path, fd, mode)
     const char *mode;
 {
     gz_statep state;
-    size_t len;
+    z_size_t len;
     int oflag;
 #ifdef O_CLOEXEC
     int cloexec = 0;
@@ -191,10 +191,10 @@ local gzFile gz_open(path, fd, mode)
     }
 
     /* save the path name for error messages */
-#ifdef _WIN32
+#ifdef WIDECHAR
     if (fd == -2) {
         len = wcstombs(NULL, path, 0);
-        if (len == (size_t)-1)
+        if (len == (z_size_t)-1)
             len = 0;
     }
     else
@@ -205,7 +205,7 @@ local gzFile gz_open(path, fd, mode)
         free(state.state);
         return NULL;
     }
-#ifdef _WIN32
+#ifdef WIDECHAR
     if (fd == -2)
         if (len)
             wcstombs(state.state->path, path, len + 1);
@@ -214,7 +214,7 @@ local gzFile gz_open(path, fd, mode)
     else
 #endif
 #if !defined(NO_snprintf) && !defined(NO_vsnprintf)
-        snprintf(state.state->path, len + 1, "%s", (const char *)path);
+        (void)snprintf(state.state->path, len + 1, "%s", (const char *)path);
 #else
         strcpy(state.state->path, path);
 #endif
@@ -242,7 +242,7 @@ local gzFile gz_open(path, fd, mode)
 
     /* open the file with the appropriate flags (or just use fd) */
     state.state->fd = fd > -1 ? fd : (
-#ifdef _WIN32
+#ifdef WIDECHAR
         fd == -2 ? _wopen(path, oflag, 0666) :
 #endif
         open((const char *)path, oflag, 0666));
@@ -251,8 +251,10 @@ local gzFile gz_open(path, fd, mode)
         free(state.state);
         return NULL;
     }
-    if (state.state->mode == GZ_APPEND)
+    if (state.state->mode == GZ_APPEND) {
+        LSEEK(state.state->fd, 0, SEEK_END);  /* so gzoffset() is correct */
         state.state->mode = GZ_WRITE;         /* simplify later checks */
+    }
 
     /* save the current position for rewinding (only if reading) */
     if (state.state->mode == GZ_READ) {
@@ -294,7 +296,7 @@ gzFile ZEXPORT gzdopen(fd, mode)
     if (fd == -1 || (path = (char *)malloc(7 + 3 * sizeof(int))) == NULL)
         return NULL;
 #if !defined(NO_snprintf) && !defined(NO_vsnprintf)
-    snprintf(path, 7 + 3 * sizeof(int), "<fd:%d>", fd); /* for debugging */
+    (void)snprintf(path, 7 + 3 * sizeof(int), "<fd:%d>", fd);
 #else
     sprintf(path, "<fd:%d>", fd);   /* for debugging */
 #endif
@@ -304,7 +306,7 @@ gzFile ZEXPORT gzdopen(fd, mode)
 }
 
 /* -- see zlib.h -- */
-#ifdef _WIN32
+#ifdef WIDECHAR
 gzFile ZEXPORT gzopen_w(path, mode)
     const wchar_t *path;
     const char *mode;
@@ -332,6 +334,8 @@ int ZEXPORT gzbuffer(file, size)
         return -1;
 
     /* check and set requested size */
+    if ((size << 1) < size)
+        return -1;              /* need to be able to double it */
     if (size < 2)
         size = 2;               /* need two bytes to check magic header */
     state.state->want = size;
@@ -569,8 +573,8 @@ void ZEXPORT gzclearerr(file)
     gz_error(state, Z_OK, NULL);
 }
 
-/* Create an error message in allocated memory and set state->err and
-   state->msg accordingly.  Free any previous error message already there.  Do
+/* Create an error message in allocated memory and set state.state->err and
+   state.state->msg accordingly.  Free any previous error message already there.  Do
    not try to free or allocate space if the error is Z_MEM_ERROR (out of
    memory).  Simply save the error message as a static string.  If there is an
    allocation failure constructing the error message, then convert the error to
@@ -587,7 +591,7 @@ void ZLIB_INTERNAL gz_error(state, err, msg)
         state.state->msg = NULL;
     }
 
-    /* if fatal, set state->x.have to 0 so that the gzgetc() macro fails */
+    /* if fatal, set state.state->x.have to 0 so that the gzgetc() macro fails */
     if (err != Z_OK && err != Z_BUF_ERROR)
         state.state->x.have = 0;
 
@@ -607,14 +611,13 @@ void ZLIB_INTERNAL gz_error(state, err, msg)
         return;
     }
 #if !defined(NO_snprintf) && !defined(NO_vsnprintf)
-    snprintf(state.state->msg, strlen(state.state->path) + strlen(msg) + 3,
-             "%s%s%s", state.state->path, ": ", msg);
+    (void)snprintf(state.state->msg, strlen(state.state->path) + strlen(msg) + 3,
+                   "%s%s%s", state.state->path, ": ", msg);
 #else
     strcpy(state.state->msg, state.state->path);
     strcat(state.state->msg, ": ");
     strcat(state.state->msg, msg);
 #endif
-    return;
 }
 
 #ifndef INT_MAX
diff --git a/zlibWrapper/gzread.c b/zlibWrapper/gzread.c
index f251e2fe..d37aaa1d 100644
--- a/zlibWrapper/gzread.c
+++ b/zlibWrapper/gzread.c
@@ -1,8 +1,8 @@
 /* gzread.c contains minimal changes required to be compiled with zlibWrapper:
- * - gz_statep was converted to union to work with -Wstrict-aliasing=1      */
-
-/* gzread.c -- zlib functions for reading gzip files
- * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler
+ * - gz_statep was converted to union to work with -Wstrict-aliasing=1      */ 
+ 
+ /* gzread.c -- zlib functions for reading gzip files
+ * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
  * For conditions of distribution and use, see http://www.zlib.net/zlib_license.html
  */
 
@@ -15,9 +15,10 @@ local int gz_look OF((gz_statep));
 local int gz_decomp OF((gz_statep));
 local int gz_fetch OF((gz_statep));
 local int gz_skip OF((gz_statep, z_off64_t));
+local z_size_t gz_read OF((gz_statep, voidp, z_size_t));
 
 /* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
-   state->fd, and update state->eof, state->err, and state->msg as appropriate.
+   state.state->fd, and update state.state->eof, state.state->err, and state.state->msg as appropriate.
    This function needs to loop on read(), since read() is not guaranteed to
    read the number of bytes requested, depending on the type of descriptor. */
 local int gz_load(state, buf, len, have)
@@ -26,14 +27,18 @@ local int gz_load(state, buf, len, have)
     unsigned len;
     unsigned *have;
 {
-    int ret;
+    ssize_t ret;
+    unsigned get, max = ((unsigned)-1 >> 2) + 1;
 
     *have = 0;
     do {
-        ret = (int)read(state.state->fd, buf + *have, len - *have);
+        get = len - *have;
+        if (get > max)
+            get = max;
+        ret = read(state.state->fd, buf + *have, get);
         if (ret <= 0)
             break;
-        *have += ret;
+        *have += (unsigned)ret;
     } while (*have < len);
     if (ret < 0) {
         gz_error(state, Z_ERRNO, zstrerror());
@@ -77,8 +82,8 @@ local int gz_avail(state)
     return 0;
 }
 
-/* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
-   If this is the first time in, allocate required memory.  state->how will be
+/* Look for gzip header, set up for inflate or copy.  state.state->x.have must be 0.
+   If this is the first time in, allocate required memory.  state.state->how will be
    left unchanged if there is no more input data available, will be set to COPY
    if there is no gzip header and direct copying will be performed, or it will
    be set to GZIP for decompression.  If direct copying, then leftover input
@@ -97,10 +102,8 @@ local int gz_look(state)
         state.state->in = (unsigned char *)malloc(state.state->want);
         state.state->out = (unsigned char *)malloc(state.state->want << 1);
         if (state.state->in == NULL || state.state->out == NULL) {
-            if (state.state->out != NULL)
-                free(state.state->out);
-            if (state.state->in != NULL)
-                free(state.state->in);
+            free(state.state->out);
+            free(state.state->in);
             gz_error(state, Z_MEM_ERROR, "out of memory");
             return -1;
         }
@@ -136,7 +139,6 @@ local int gz_look(state)
        file -- for here we assume that if a gzip file is being written, then
        the header will be written in a single operation, so that reading a
        single byte is sufficient indication that it is not a gzip file) */
-    //printf("strm->next_in[0]=%d strm->next_in[1]=%d\n", strm->next_in[0], strm->next_in[1]);
     if (strm->avail_in > 1 &&
             ((strm->next_in[0] == 31 && strm->next_in[1] == 139) /* gz header */
             || (strm->next_in[0] == 40 && strm->next_in[1] == 181))) { /* zstd header */
@@ -170,9 +172,9 @@ local int gz_look(state)
 }
 
 /* Decompress from input to the provided next_out and avail_out in the state.
-   On return, state->x.have and state->x.next point to the just decompressed
-   data.  If the gzip stream completes, state->how is reset to LOOK to look for
-   the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
+   On return, state.state->x.have and state.state->x.next point to the just decompressed
+   data.  If the gzip stream completes, state.state->how is reset to LOOK to look for
+   the next gzip stream or raw data, once state.state->x.have is depleted.  Returns 0
    on success, -1 on failure. */
 local int gz_decomp(state)
     gz_statep state;
@@ -222,11 +224,11 @@ local int gz_decomp(state)
     return 0;
 }
 
-/* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
+/* Fetch data and put it in the output buffer.  Assumes state.state->x.have is 0.
    Data is either copied from the input file or decompressed from the input
-   file depending on state->how.  If state->how is LOOK, then a gzip header is
+   file depending on state.state->how.  If state.state->how is LOOK, then a gzip header is
    looked for to determine whether to copy or decompress.  Returns -1 on error,
-   otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
+   otherwise 0.  gz_fetch() will leave state.state->how as COPY or GZIP unless the
    end of the input file has been reached and all data has been processed.  */
 local int gz_fetch(state)
     gz_statep state;
@@ -289,33 +291,17 @@ local int gz_skip(state, len)
     return 0;
 }
 
-/* -- see zlib.h -- */
-int ZEXPORT gzread(file, buf, len)
-    gzFile file;
-    voidp buf;
-    unsigned len;
-{
-    unsigned got, n;
+/* Read len bytes into buf from file, or less than len up to the end of the
+   input.  Return the number of bytes read.  If zero is returned, either the
+   end of file was reached, or there was an error.  state.state->err must be
+   consulted in that case to determine which. */
+local z_size_t gz_read(state, buf, len)
     gz_statep state;
-    z_streamp strm;
-
-    /* get internal structure */
-    if (file == NULL)
-        return -1;
-    state = (gz_statep)file;
-    strm = &(state.state->strm);
-
-    /* check that we're reading and that there's no (serious) error */
-    if (state.state->mode != GZ_READ ||
-            (state.state->err != Z_OK && state.state->err != Z_BUF_ERROR))
-        return -1;
-
-    /* since an int is returned, make sure len fits in one, otherwise return
-       with an error (this avoids the flaw in the interface) */
-    if ((int)len < 0) {
-        gz_error(state, Z_DATA_ERROR, "requested length does not fit in int");
-        return -1;
-    }
+    voidp buf;
+    z_size_t len;
+{
+    z_size_t got;
+    unsigned n;
 
     /* if len is zero, avoid unnecessary operations */
     if (len == 0)
@@ -325,32 +311,38 @@ int ZEXPORT gzread(file, buf, len)
     if (state.state->seek) {
         state.state->seek = 0;
         if (gz_skip(state, state.state->skip) == -1)
-            return -1;
+            return 0;
     }
 
     /* get len bytes to buf, or less than len if at the end */
     got = 0;
     do {
+        /* set n to the maximum amount of len that fits in an unsigned int */
+        n = -1;
+        if (n > len)
+            n = (unsigned)len;
+
         /* first just try copying data from the output buffer */
         if (state.state->x.have) {
-            n = state.state->x.have > len ? len : state.state->x.have;
+            if (state.state->x.have < n)
+                n = state.state->x.have;
             memcpy(buf, state.state->x.next, n);
             state.state->x.next += n;
             state.state->x.have -= n;
         }
 
         /* output buffer empty -- return if we're at the end of the input */
-        else if (state.state->eof && strm->avail_in == 0) {
+        else if (state.state->eof && state.state->strm.avail_in == 0) {
             state.state->past = 1;        /* tried to read past end */
             break;
         }
 
         /* need output data -- for small len or new stream load up our output
            buffer */
-        else if (state.state->how == LOOK || len < (state.state->size << 1)) {
+        else if (state.state->how == LOOK || n < (state.state->size << 1)) {
             /* get more output, looking for header if required */
             if (gz_fetch(state) == -1)
-                return -1;
+                return 0;
             continue;       /* no progress yet -- go back to copy above */
             /* the copy above assures that we will leave with space in the
                output buffer, allowing at least one gzungetc() to succeed */
@@ -358,16 +350,16 @@ int ZEXPORT gzread(file, buf, len)
 
         /* large len -- read directly into user buffer */
         else if (state.state->how == COPY) {      /* read directly */
-            if (gz_load(state, (unsigned char *)buf, len, &n) == -1)
-                return -1;
+            if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
+                return 0;
         }
 
         /* large len -- decompress directly into user buffer */
-        else {  /* state->how == GZIP */
-            strm->avail_out = len;
-            strm->next_out = (unsigned char *)buf;
+        else {  /* state.state->how == GZIP */
+            state.state->strm.avail_out = n;
+            state.state->strm.next_out = (unsigned char *)buf;
             if (gz_decomp(state) == -1)
-                return -1;
+                return 0;
             n = state.state->x.have;
             state.state->x.have = 0;
         }
@@ -379,8 +371,75 @@ int ZEXPORT gzread(file, buf, len)
         state.state->x.pos += n;
     } while (len);
 
-    /* return number of bytes read into user buffer (will fit in int) */
-    return (int)got;
+    /* return number of bytes read into user buffer */
+    return got;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzread(file, buf, len)
+    gzFile file;
+    voidp buf;
+    unsigned len;
+{
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+
+    /* check that we're reading and that there's no (serious) error */
+    if (state.state->mode != GZ_READ ||
+            (state.state->err != Z_OK && state.state->err != Z_BUF_ERROR))
+        return -1;
+
+    /* since an int is returned, make sure len fits in one, otherwise return
+       with an error (this avoids a flaw in the interface) */
+    if ((int)len < 0) {
+        gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
+        return -1;
+    }
+
+    /* read len or fewer bytes to buf */
+    len = (unsigned)gz_read(state, buf, len);
+
+    /* check for an error */
+    if (len == 0 && state.state->err != Z_OK && state.state->err != Z_BUF_ERROR)
+        return -1;
+
+    /* return the number of bytes read (this is assured to fit in an int) */
+    return (int)len;
+}
+
+/* -- see zlib.h -- */
+z_size_t ZEXPORT gzfread(buf, size, nitems, file)
+    voidp buf;
+    z_size_t size;
+    z_size_t nitems;
+    gzFile file;
+{
+    z_size_t len;
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return 0;
+    state = (gz_statep)file;
+
+    /* check that we're reading and that there's no (serious) error */
+    if (state.state->mode != GZ_READ ||
+            (state.state->err != Z_OK && state.state->err != Z_BUF_ERROR))
+        return 0;
+
+    /* compute bytes to read -- error on overflow */
+    len = nitems * size;
+    if (size && len / size != nitems) {
+        gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
+        return 0;
+    }
+
+    /* read len or fewer bytes to buf, return the number of full items read */
+    return len ? gz_read(state, buf, len) / size : 0;
 }
 
 /* -- see zlib.h -- */
@@ -401,7 +460,6 @@ ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
 ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));
 #endif
 
-
 int ZEXPORT gzgetc(file)
     gzFile file;
 {
@@ -426,8 +484,8 @@ int ZEXPORT gzgetc(file)
         return *(state.state->x.next)++;
     }
 
-    /* nothing there -- try gzread() */
-    ret = gzread(file, buf, 1);
+    /* nothing there -- try gz_read() */
+    ret = (unsigned)gz_read(state, buf, 1);
     return ret < 1 ? -1 : buf[0];
 }
 
diff --git a/zlibWrapper/gzwrite.c b/zlibWrapper/gzwrite.c
index 6f3c9658..bcda4774 100644
--- a/zlibWrapper/gzwrite.c
+++ b/zlibWrapper/gzwrite.c
@@ -1,8 +1,8 @@
 /* gzwrite.c contains minimal changes required to be compiled with zlibWrapper:
- * - gz_statep was converted to union to work with -Wstrict-aliasing=1      */
-
-/* gzwrite.c -- zlib functions for writing gzip files
- * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler
+ * - gz_statep was converted to union to work with -Wstrict-aliasing=1      */ 
+ 
+ /* gzwrite.c -- zlib functions for writing gzip files
+ * Copyright (C) 2004-2017 Mark Adler
  * For conditions of distribution and use, see http://www.zlib.net/zlib_license.html
  */
 
@@ -12,17 +12,19 @@
 local int gz_init OF((gz_statep));
 local int gz_comp OF((gz_statep, int));
 local int gz_zero OF((gz_statep, z_off64_t));
+local z_size_t gz_write OF((gz_statep, voidpc, z_size_t));
 
 /* Initialize state for writing a gzip file.  Mark initialization by setting
-   state->size to non-zero.  Return -1 on failure or 0 on success. */
+   state.state->size to non-zero.  Return -1 on a memory allocation failure, or 0 on
+   success. */
 local int gz_init(state)
     gz_statep state;
 {
     int ret;
     z_streamp strm = &(state.state->strm);
 
-    /* allocate input buffer */
-    state.state->in = (unsigned char *)malloc(state.state->want);
+    /* allocate input buffer (double size for gzprintf) */
+    state.state->in = (unsigned char *)malloc(state.state->want << 1);
     if (state.state->in == NULL) {
         gz_error(state, Z_MEM_ERROR, "out of memory");
         return -1;
@@ -50,6 +52,7 @@ local int gz_init(state)
             gz_error(state, Z_MEM_ERROR, "out of memory");
             return -1;
         }
+        strm->next_in = NULL;
     }
 
     /* mark state as initialized */
@@ -65,17 +68,17 @@ local int gz_init(state)
 }
 
 /* Compress whatever is at avail_in and next_in and write to the output file.
-   Return -1 if there is an error writing to the output file, otherwise 0.
-   flush is assumed to be a valid deflate() flush value.  If flush is Z_FINISH,
-   then the deflate() state is reset to start a new gzip stream.  If gz->direct
-   is true, then simply write to the output file without compressing, and
-   ignore flush. */
+   Return -1 if there is an error writing to the output file or if gz_init()
+   fails to allocate memory, otherwise 0.  flush is assumed to be a valid
+   deflate() flush value.  If flush is Z_FINISH, then the deflate() state is
+   reset to start a new gzip stream.  If gz->direct is true, then simply write
+   to the output file without compressing, and ignore flush. */
 local int gz_comp(state, flush)
     gz_statep state;
     int flush;
 {
-    int ret, got;
-    unsigned have;
+    int ret, writ;
+    unsigned have, put, max = ((unsigned)-1 >> 2) + 1;
     z_streamp strm = &(state.state->strm);
 
     /* allocate memory if this is the first time through */
@@ -84,12 +87,16 @@ local int gz_comp(state, flush)
 
     /* write directly if requested */
     if (state.state->direct) {
-        got = (int)write(state.state->fd, strm->next_in, strm->avail_in);
-        if (got < 0 || (unsigned)got != strm->avail_in) {
-            gz_error(state, Z_ERRNO, zstrerror());
-            return -1;
+        while (strm->avail_in) {
+            put = strm->avail_in > max ? max : strm->avail_in;
+            writ = (int)write(state.state->fd, strm->next_in, put);
+            if (writ < 0) {
+                gz_error(state, Z_ERRNO, zstrerror());
+                return -1;
+            }
+            strm->avail_in -= (unsigned)writ;
+            strm->next_in += writ;
         }
-        strm->avail_in = 0;
         return 0;
     }
 
@@ -100,17 +107,21 @@ local int gz_comp(state, flush)
            doing Z_FINISH then don't write until we get to Z_STREAM_END */
         if (strm->avail_out == 0 || (flush != Z_NO_FLUSH &&
             (flush != Z_FINISH || ret == Z_STREAM_END))) {
-            have = (unsigned)(strm->next_out - state.state->x.next);
-            if (have && ((got = (int)write(state.state->fd, state.state->x.next, have)) < 0 ||
-                         (unsigned)got != have)) {
-                gz_error(state, Z_ERRNO, zstrerror());
-                return -1;
+            while (strm->next_out > state.state->x.next) {
+                put = strm->next_out - state.state->x.next > (int)max ? max :
+                      (unsigned)(strm->next_out - state.state->x.next);
+                writ = (int)write(state.state->fd, state.state->x.next, put);
+                if (writ < 0) {
+                    gz_error(state, Z_ERRNO, zstrerror());
+                    return -1;
+                }
+                state.state->x.next += writ;
             }
             if (strm->avail_out == 0) {
                 strm->avail_out = state.state->size;
                 strm->next_out = state.state->out;
+                state.state->x.next = state.state->out;
             }
-            state.state->x.next = strm->next_out;
         }
 
         /* compress */
@@ -132,7 +143,8 @@ local int gz_comp(state, flush)
     return 0;
 }
 
-/* Compress len zeros to output.  Return -1 on error, 0 on success. */
+/* Compress len zeros to output.  Return -1 on a write error or memory
+   allocation failure by gz_comp(), or 0 on success. */
 local int gz_zero(state, len)
     gz_statep state;
     z_off64_t len;
@@ -164,32 +176,14 @@ local int gz_zero(state, len)
     return 0;
 }
 
-/* -- see zlib.h -- */
-int ZEXPORT gzwrite(file, buf, len)
-    gzFile file;
-    voidpc buf;
-    unsigned len;
-{
-    unsigned put = len;
+/* Write len bytes from buf to file.  Return the number of bytes written.  If
+   the returned value is less than len, then there was an error. */
+local z_size_t gz_write(state, buf, len)
     gz_statep state;
-    z_streamp strm;
-
-    /* get internal structure */
-    if (file == NULL)
-        return 0;
-    state = (gz_statep)file;
-    strm = &(state.state->strm);
-
-    /* check that we're writing and that there's no error */
-    if (state.state->mode != GZ_WRITE || state.state->err != Z_OK)
-        return 0;
-
-    /* since an int is returned, make sure len fits in one, otherwise return
-       with an error (this avoids the flaw in the interface) */
-    if ((int)len < 0) {
-        gz_error(state, Z_DATA_ERROR, "requested length does not fit in int");
-        return 0;
-    }
+    voidpc buf;
+    z_size_t len;
+{
+    z_size_t put = len;
 
     /* if len is zero, avoid unnecessary operations */
     if (len == 0)
@@ -210,16 +204,17 @@ int ZEXPORT gzwrite(file, buf, len)
     if (len < state.state->size) {
         /* copy to input buffer, compress when full */
         do {
-            unsigned have, copy;
+            z_size_t have, copy;
 
-            if (strm->avail_in == 0)
-                strm->next_in = state.state->in;
-            have = (unsigned)((strm->next_in + strm->avail_in) - state.state->in);
+            if (state.state->strm.avail_in == 0)
+                state.state->strm.next_in = state.state->in;
+            have = (unsigned)((state.state->strm.next_in + state.state->strm.avail_in) -
+                              state.state->in);
             copy = state.state->size - have;
             if (copy > len)
                 copy = len;
             memcpy(state.state->in + have, buf, copy);
-            strm->avail_in += copy;
+            state.state->strm.avail_in += copy;
             state.state->x.pos += copy;
             buf = (const char *)buf + copy;
             len -= copy;
@@ -229,19 +224,83 @@ int ZEXPORT gzwrite(file, buf, len)
     }
     else {
         /* consume whatever's left in the input buffer */
-        if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1)
+        if (state.state->strm.avail_in && gz_comp(state, Z_NO_FLUSH) == -1)
             return 0;
 
         /* directly compress user buffer to file */
-        strm->avail_in = len;
-        strm->next_in = (z_const Bytef *)buf;
-        state.state->x.pos += len;
-        if (gz_comp(state, Z_NO_FLUSH) == -1)
-            return 0;
+        state.state->strm.next_in = (z_const Bytef *)buf;
+        do {
+            z_size_t n = (unsigned)-1;
+            if (n > len)
+                n = len;
+            state.state->strm.avail_in = (z_uInt)n;
+            state.state->x.pos += n;
+            if (gz_comp(state, Z_NO_FLUSH) == -1)
+                return 0;
+            len -= n;
+        } while (len);
     }
 
-    /* input was all buffered or compressed (put will fit in int) */
-    return (int)put;
+    /* input was all buffered or compressed */
+    return put;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzwrite(file, buf, len)
+    gzFile file;
+    voidpc buf;
+    unsigned len;
+{
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return 0;
+    state = (gz_statep)file;
+
+    /* check that we're writing and that there's no error */
+    if (state.state->mode != GZ_WRITE || state.state->err != Z_OK)
+        return 0;
+
+    /* since an int is returned, make sure len fits in one, otherwise return
+       with an error (this avoids a flaw in the interface) */
+    if ((int)len < 0) {
+        gz_error(state, Z_DATA_ERROR, "requested length does not fit in int");
+        return 0;
+    }
+
+    /* write len bytes from buf (the return value will fit in an int) */
+    return (int)gz_write(state, buf, len);
+}
+
+/* -- see zlib.h -- */
+z_size_t ZEXPORT gzfwrite(buf, size, nitems, file)
+    voidpc buf;
+    z_size_t size;
+    z_size_t nitems;
+    gzFile file;
+{
+    z_size_t len;
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return 0;
+    state = (gz_statep)file;
+
+    /* check that we're writing and that there's no error */
+    if (state.state->mode != GZ_WRITE || state.state->err != Z_OK)
+        return 0;
+
+    /* compute bytes to read -- error on overflow */
+    len = nitems * size;
+    if (size && len / size != nitems) {
+        gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
+        return 0;
+    }
+
+    /* write len bytes to buf, return the number of full items written */
+    return len ? gz_write(state, buf, len) / size : 0;
 }
 
 /* -- see zlib.h -- */
@@ -271,7 +330,7 @@ int ZEXPORT gzputc(file, c)
             return -1;
     }
 
-    /* try writing to input buffer for speed (state->size == 0 if buffer not
+    /* try writing to input buffer for speed (state.state->size == 0 if buffer not
        initialized) */
     if (state.state->size) {
         if (strm->avail_in == 0)
@@ -287,7 +346,7 @@ int ZEXPORT gzputc(file, c)
 
     /* no room in buffer or not initialized, use gz_write() */
     buf[0] = (unsigned char)c;
-    if (gzwrite(file, buf, 1) != 1)
+    if (gz_write(state, buf, 1) != 1)
         return -1;
     return c & 0xff;
 }
@@ -298,11 +357,21 @@ int ZEXPORT gzputs(file, str)
     const char *str;
 {
     int ret;
-    unsigned len;
+    z_size_t len;
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+
+    /* check that we're writing and that there's no error */
+    if (state.state->mode != GZ_WRITE || state.state->err != Z_OK)
+        return -1;
 
     /* write string */
-    len = (unsigned)strlen(str);
-    ret = gzwrite(file, str, len);
+    len = strlen(str);
+    ret = (int)gz_write(state, str, len);
     return ret == 0 && len != 0 ? -1 : ret;
 }
 
@@ -312,63 +381,73 @@ int ZEXPORT gzputs(file, str)
 /* -- see zlib.h -- */
 int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va)
 {
-    int size, len;
+    int len;
+    unsigned left;
+    char *next;
     gz_statep state;
     z_streamp strm;
 
     /* get internal structure */
     if (file == NULL)
-        return -1;
+        return Z_STREAM_ERROR;
     state = (gz_statep)file;
     strm = &(state.state->strm);
 
     /* check that we're writing and that there's no error */
     if (state.state->mode != GZ_WRITE || state.state->err != Z_OK)
-        return 0;
+        return Z_STREAM_ERROR;
 
     /* make sure we have some buffer space */
     if (state.state->size == 0 && gz_init(state) == -1)
-        return 0;
+        return state.state->err;
 
     /* check for seek request */
     if (state.state->seek) {
         state.state->seek = 0;
         if (gz_zero(state, state.state->skip) == -1)
-            return 0;
+            return state.state->err;
     }
 
-    /* consume whatever's left in the input buffer */
-    if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1)
-        return 0;
-
-    /* do the printf() into the input buffer, put length in len */
-    size = (int)(state.state->size);
-    state.state->in[size - 1] = 0;
+    /* do the printf() into the input buffer, put length in len -- the input
+       buffer is double-sized just for this function, so there is guaranteed to
+       be state.state->size bytes available after the current contents */
+    if (strm->avail_in == 0)
+        strm->next_in = state.state->in;
+    next = (char *)(state.state->in + (strm->next_in - state.state->in) + strm->avail_in);
+    next[state.state->size - 1] = 0;
 #ifdef NO_vsnprintf
 #  ifdef HAS_vsprintf_void
-    (void)vsprintf((char *)(state.state->in), format, va);
-    for (len = 0; len < size; len++)
-        if (state.state->in[len] == 0) break;
+    (void)vsprintf(next, format, va);
+    for (len = 0; len < state.state->size; len++)
+        if (next[len] == 0) break;
 #  else
-    len = vsprintf((char *)(state.state->in), format, va);
+    len = vsprintf(next, format, va);
 #  endif
 #else
 #  ifdef HAS_vsnprintf_void
-    (void)vsnprintf((char *)(state.state->in), size, format, va);
-    len = strlen((char *)(state.state->in));
+    (void)vsnprintf(next, state.state->size, format, va);
+    len = strlen(next);
 #  else
-    len = vsnprintf((char *)(state.state->in), size, format, va);
+    len = vsnprintf(next, state.state->size, format, va);
 #  endif
 #endif
 
     /* check that printf() results fit in buffer */
-    if (len <= 0 || len >= (int)size || state.state->in[size - 1] != 0)
+    if (len == 0 || (unsigned)len >= state.state->size || next[state.state->size - 1] != 0)
         return 0;
 
-    /* update buffer and position, defer compression until needed */
-    strm->avail_in = (unsigned)len;
-    strm->next_in = state.state->in;
+    /* update buffer and position, compress first half if past that */
+    strm->avail_in += (unsigned)len;
     state.state->x.pos += len;
+    if (strm->avail_in >= state.state->size) {
+        left = strm->avail_in - state.state->size;
+        strm->avail_in = state.state->size;
+        if (gz_comp(state, Z_NO_FLUSH) == -1)
+            return state.state->err;
+        memcpy(state.state->in, state.state->in + state.state->size, left);
+        strm->next_in = state.state->in;
+        strm->avail_in = left;
+    }
     return len;
 }
 
@@ -393,73 +472,82 @@ int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
     int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
         a11, a12, a13, a14, a15, a16, a17, a18, a19, a20;
 {
-    int size, len;
+    unsigned len, left;
+    char *next;
     gz_statep state;
     z_streamp strm;
 
     /* get internal structure */
     if (file == NULL)
-        return -1;
+        return Z_STREAM_ERROR;
     state = (gz_statep)file;
     strm = &(state.state->strm);
 
     /* check that can really pass pointer in ints */
     if (sizeof(int) != sizeof(void *))
-        return 0;
+        return Z_STREAM_ERROR;
 
     /* check that we're writing and that there's no error */
     if (state.state->mode != GZ_WRITE || state.state->err != Z_OK)
-        return 0;
+        return Z_STREAM_ERROR;
 
     /* make sure we have some buffer space */
     if (state.state->size == 0 && gz_init(state) == -1)
-        return 0;
+        return state.state->error;
 
     /* check for seek request */
     if (state.state->seek) {
         state.state->seek = 0;
         if (gz_zero(state, state.state->skip) == -1)
-            return 0;
+            return state.state->error;
     }
 
-    /* consume whatever's left in the input buffer */
-    if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1)
-        return 0;
-
-    /* do the printf() into the input buffer, put length in len */
-    size = (int)(state.state->size);
-    state.state->in[size - 1] = 0;
+    /* do the printf() into the input buffer, put length in len -- the input
+       buffer is double-sized just for this function, so there is guaranteed to
+       be state.state->size bytes available after the current contents */
+    if (strm->avail_in == 0)
+        strm->next_in = state.state->in;
+    next = (char *)(strm->next_in + strm->avail_in);
+    next[state.state->size - 1] = 0;
 #ifdef NO_snprintf
 #  ifdef HAS_sprintf_void
-    sprintf((char *)(state.state->in), format, a1, a2, a3, a4, a5, a6, a7, a8,
-            a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
+    sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12,
+            a13, a14, a15, a16, a17, a18, a19, a20);
     for (len = 0; len < size; len++)
-        if (state.state->in[len] == 0) break;
+        if (next[len] == 0)
+            break;
 #  else
-    len = sprintf((char *)(state.state->in), format, a1, a2, a3, a4, a5, a6, a7, a8,
-                  a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
+    len = sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11,
+                  a12, a13, a14, a15, a16, a17, a18, a19, a20);
 #  endif
 #else
 #  ifdef HAS_snprintf_void
-    snprintf((char *)(state.state->in), size, format, a1, a2, a3, a4, a5, a6, a7, a8,
-             a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
-    len = strlen((char *)(state.state->in));
+    snprintf(next, state.state->size, format, a1, a2, a3, a4, a5, a6, a7, a8, a9,
+             a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
+    len = strlen(next);
 #  else
-    len = snprintf((char *)(state.state->in), size, format, a1, a2, a3, a4, a5, a6,
-                   a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18,
-                   a19, a20);
+    len = snprintf(next, state.state->size, format, a1, a2, a3, a4, a5, a6, a7, a8,
+                   a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
 #  endif
 #endif
 
     /* check that printf() results fit in buffer */
-    if (len <= 0 || len >= (int)size || state.state->in[size - 1] != 0)
+    if (len == 0 || len >= state.state->size || next[state.state->size - 1] != 0)
         return 0;
 
-    /* update buffer and position, defer compression until needed */
-    strm->avail_in = (unsigned)len;
-    strm->next_in = state.state->in;
+    /* update buffer and position, compress first half if past that */
+    strm->avail_in += len;
     state.state->x.pos += len;
-    return len;
+    if (strm->avail_in >= state.state->size) {
+        left = strm->avail_in - state.state->size;
+        strm->avail_in = state.state->size;
+        if (gz_comp(state, Z_NO_FLUSH) == -1)
+            return state.state->err;
+        memcpy(state.state->in, state.state->in + state.state->size, left);
+        strm->next_in = state.state->in;
+        strm->avail_in = left;
+    }
+    return (int)len;
 }
 
 #endif
@@ -473,7 +561,7 @@ int ZEXPORT gzflush(file, flush)
 
     /* get internal structure */
     if (file == NULL)
-        return -1;
+        return Z_STREAM_ERROR;
     state = (gz_statep)file;
 
     /* check that we're writing and that there's no error */
@@ -488,11 +576,11 @@ int ZEXPORT gzflush(file, flush)
     if (state.state->seek) {
         state.state->seek = 0;
         if (gz_zero(state, state.state->skip) == -1)
-            return -1;
+            return state.state->err;
     }
 
     /* compress remaining data with requested flush */
-    gz_comp(state, flush);
+    (void)gz_comp(state, flush);
     return state.state->err;
 }
 
@@ -523,13 +611,13 @@ int ZEXPORT gzsetparams(file, level, strategy)
     if (state.state->seek) {
         state.state->seek = 0;
         if (gz_zero(state, state.state->skip) == -1)
-            return -1;
+            return state.state->err;
     }
 
     /* change compression parameters for subsequent input */
     if (state.state->size) {
         /* flush previous input with previous parameters before changing */
-        if (strm->avail_in && gz_comp(state, Z_PARTIAL_FLUSH) == -1)
+        if (strm->avail_in && gz_comp(state, Z_BLOCK) == -1)
             return state.state->err;
         deflateParams(strm, level, strategy);
     }
diff --git a/zlibWrapper/zstd_zlibwrapper.c b/zlibWrapper/zstd_zlibwrapper.c
index 23851052..1960d19e 100644
--- a/zlibWrapper/zstd_zlibwrapper.c
+++ b/zlibWrapper/zstd_zlibwrapper.c
@@ -81,7 +81,8 @@ typedef enum { ZWRAP_useInit, ZWRAP_useReset, ZWRAP_streamEnd } ZWRAP_state_t;
 typedef struct {
     ZSTD_CStream* zbc;
     int compressionLevel;
-    int streamEnd;
+    int streamEnd; /* a flag to signal the end of a stream */
+    unsigned long long totalInBytes; /* we need it as strm->total_in can be reset by user */
     ZSTD_customMem customMem;
     z_stream allocFunc; /* copy of zalloc, zfree, opaque */
     ZSTD_inBuffer inBuffer;
@@ -189,6 +190,7 @@ ZEXTERN int ZEXPORT z_deflateInit_ OF((z_streamp strm, int level,
         level = ZWRAP_DEFAULT_CLEVEL;
 
     zwc->streamEnd = 0;
+    zwc->totalInBytes = 0;
     zwc->compressionLevel = level;
     strm->state = (struct internal_state*) zwc; /* use state which in not used by user */
     strm->total_in = 0;
@@ -217,7 +219,10 @@ int ZWRAP_deflateReset_keepDict(z_streamp strm)
         return deflateReset(strm);
 
     { ZWRAP_CCtx* zwc = (ZWRAP_CCtx*) strm->state;
-      if (zwc) zwc->streamEnd = 0;
+      if (zwc) { 
+          zwc->streamEnd = 0;
+          zwc->totalInBytes = 0;
+      }
     }
 
     strm->total_in = 0;
@@ -289,7 +294,7 @@ ZEXTERN int ZEXPORT z_deflate OF((z_streamp strm, int flush))
         if (res != Z_OK) return ZWRAPC_finishWithError(zwc, strm, res);
         if (flush != Z_FINISH) zwc->comprState = ZWRAP_useReset;
     } else {
-        if (strm->total_in == 0) {
+        if (zwc->totalInBytes == 0) {
             if (zwc->comprState == ZWRAP_useReset) {
                 size_t const errorCode = ZSTD_resetCStream(zwc->zbc, (flush == Z_FINISH) ? strm->avail_in : zwc->pledgedSrcSize);
                 if (ZSTD_isError(errorCode)) { LOG_WRAPPERC("ERROR: ZSTD_resetCStream errorCode=%s\n", ZSTD_getErrorName(errorCode)); return ZWRAPC_finishWithError(zwc, strm, 0); }
@@ -317,6 +322,7 @@ ZEXTERN int ZEXPORT z_deflate OF((z_streamp strm, int flush))
         strm->total_out += zwc->outBuffer.pos;
         strm->avail_out -= zwc->outBuffer.pos;
         strm->total_in += zwc->inBuffer.pos;
+        zwc->totalInBytes += zwc->inBuffer.pos;
         strm->next_in += zwc->inBuffer.pos;
         strm->avail_in -= zwc->inBuffer.pos;
     }
@@ -411,6 +417,7 @@ typedef struct {
     ZSTD_DStream* zbd;
     char headerBuf[16]; /* should be equal or bigger than ZSTD_frameHeaderSize_min */
     int errorCount;
+    unsigned long long totalInBytes; /* we need it as strm->total_in can be reset by user */
     ZWRAP_state_t decompState;
     ZSTD_inBuffer inBuffer;
     ZSTD_outBuffer outBuffer;
@@ -511,6 +518,7 @@ ZEXTERN int ZEXPORT z_inflateInit_ OF((z_streamp strm,
     strcpy(zwd->version, version);
 
     zwd->stream_size = stream_size;
+    zwd->totalInBytes = 0;
     strm->state = (struct internal_state*) zwd; /* use state which in not used by user */
     strm->total_in = 0;
     strm->total_out = 0;
@@ -551,6 +559,7 @@ int ZWRAP_inflateReset_keepDict(z_streamp strm)
         if (zwd == NULL) return Z_STREAM_ERROR;
         ZWRAP_initDCtx(zwd);
         zwd->decompState = ZWRAP_useReset;
+        zwd->totalInBytes = 0;
     }
 
     strm->total_in = 0;
@@ -610,9 +619,9 @@ ZEXTERN int ZEXPORT z_inflateSetDictionary OF((z_streamp strm,
         if (ZSTD_isError(errorCode)) return ZWRAPD_finishWithError(zwd, strm, 0);
         zwd->decompState = ZWRAP_useReset;
 
-        if (strm->total_in == ZSTD_HEADERSIZE) {
+        if (zwd->totalInBytes == ZSTD_HEADERSIZE) {
             zwd->inBuffer.src = zwd->headerBuf;
-            zwd->inBuffer.size = strm->total_in;
+            zwd->inBuffer.size = zwd->totalInBytes;
             zwd->inBuffer.pos = 0;
             zwd->outBuffer.dst = strm->next_out;
             zwd->outBuffer.size = 0;
@@ -650,8 +659,8 @@ ZEXTERN int ZEXPORT z_inflate OF((z_streamp strm, int flush))
         if (zwd == NULL) return Z_STREAM_ERROR;
         if (zwd->decompState == ZWRAP_streamEnd) return Z_STREAM_END;
 
-        if (strm->total_in < ZLIB_HEADERSIZE) {
-            if (strm->total_in == 0 && strm->avail_in >= ZLIB_HEADERSIZE) {
+        if (zwd->totalInBytes < ZLIB_HEADERSIZE) {
+            if (zwd->totalInBytes == 0 && strm->avail_in >= ZLIB_HEADERSIZE) {
                 if (MEM_readLE32(strm->next_in) != ZSTD_MAGICNUMBER) {
                     if (zwd->windowBits)
                         errorCode = inflateInit2_(strm, zwd->windowBits, zwd->version, zwd->stream_size);
@@ -668,12 +677,13 @@ ZEXTERN int ZEXPORT z_inflate OF((z_streamp strm, int flush))
                     return res;
                 }
             } else {
-                srcSize = MIN(strm->avail_in, ZLIB_HEADERSIZE - strm->total_in);
-                memcpy(zwd->headerBuf+strm->total_in, strm->next_in, srcSize);
+                srcSize = MIN(strm->avail_in, ZLIB_HEADERSIZE - zwd->totalInBytes);
+                memcpy(zwd->headerBuf+zwd->totalInBytes, strm->next_in, srcSize);
                 strm->total_in += srcSize;
+                zwd->totalInBytes += srcSize;
                 strm->next_in += srcSize;
                 strm->avail_in -= srcSize;
-                if (strm->total_in < ZLIB_HEADERSIZE) return Z_OK;
+                if (zwd->totalInBytes < ZLIB_HEADERSIZE) return Z_OK;
 
                 if (MEM_readLE32(zwd->headerBuf) != ZSTD_MAGICNUMBER) {
                     z_stream strm2;
@@ -725,9 +735,9 @@ ZEXTERN int ZEXPORT z_inflate OF((z_streamp strm, int flush))
             zwd->decompState = ZWRAP_useInit;
         }
 
-        if (strm->total_in < ZSTD_HEADERSIZE)
+        if (zwd->totalInBytes < ZSTD_HEADERSIZE)
         {
-            if (strm->total_in == 0 && strm->avail_in >= ZSTD_HEADERSIZE) {
+            if (zwd->totalInBytes == 0 && strm->avail_in >= ZSTD_HEADERSIZE) {
                 if (zwd->decompState == ZWRAP_useInit) {
                     errorCode = ZSTD_initDStream(zwd->zbd);
                     if (ZSTD_isError(errorCode)) { LOG_WRAPPERD("ERROR: ZSTD_initDStream errorCode=%s\n", ZSTD_getErrorName(errorCode)); goto error; }
@@ -736,12 +746,13 @@ ZEXTERN int ZEXPORT z_inflate OF((z_streamp strm, int flush))
                     if (ZSTD_isError(errorCode)) goto error;
                 }
             } else {
-                srcSize = MIN(strm->avail_in, ZSTD_HEADERSIZE - strm->total_in);
-                memcpy(zwd->headerBuf+strm->total_in, strm->next_in, srcSize);
+                srcSize = MIN(strm->avail_in, ZSTD_HEADERSIZE - zwd->totalInBytes);
+                memcpy(zwd->headerBuf+zwd->totalInBytes, strm->next_in, srcSize);
                 strm->total_in += srcSize;
+                zwd->totalInBytes += srcSize;
                 strm->next_in += srcSize;
                 strm->avail_in -= srcSize;
-                if (strm->total_in < ZSTD_HEADERSIZE) return Z_OK;
+                if (zwd->totalInBytes < ZSTD_HEADERSIZE) return Z_OK;
 
                 if (zwd->decompState == ZWRAP_useInit) {
                     errorCode = ZSTD_initDStream(zwd->zbd);
@@ -785,6 +796,7 @@ ZEXTERN int ZEXPORT z_inflate OF((z_streamp strm, int flush))
         strm->total_out += zwd->outBuffer.pos;
         strm->avail_out -= zwd->outBuffer.pos;
         strm->total_in += zwd->inBuffer.pos;
+        zwd->totalInBytes += zwd->inBuffer.pos;
         strm->next_in += zwd->inBuffer.pos;
         strm->avail_in -= zwd->inBuffer.pos;
         if (errorCode == 0) {
@@ -1042,3 +1054,24 @@ ZEXTERN uLong ZEXPORT z_crc32   OF((uLong crc, const Bytef *buf, uInt len))
 {
     return crc32(crc, buf, len);
 }
+
+
+#if ZLIB_VERNUM >= 0x12B0
+ZEXTERN uLong ZEXPORT z_adler32_z OF((uLong adler, const Bytef *buf, z_size_t len))
+{
+    return adler32_z(adler, buf, len);
+}
+
+ZEXTERN uLong ZEXPORT z_crc32_z OF((uLong crc, const Bytef *buf, z_size_t len))
+{
+    return crc32_z(crc, buf, len);
+}
+#endif
+
+
+#if ZLIB_VERNUM >= 0x1270
+ZEXTERN const z_crc_t FAR * ZEXPORT z_get_crc_table    OF((void))
+{
+    return get_crc_table();
+}
+#endif