Merge branch 'dev' of github.com:facebook/zstd into dev

2017-05-02 12:13:52 -07:00 · 2017-05-02 12:13:52 -07:00 · 606c04c228
parent b184589c4c 072484a3bf
commit 606c04c228
7 changed files with 181 additions and 91 deletions
--- a/lib/dictBuilder/cover.c
+++ b/lib/dictBuilder/cover.c
@ -234,10 +234,22 @@ static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
 * Returns 1 if the dmer at lp is greater than the dmer at rp.
 */
 static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
-  const U32 lhs = *(const U32 *)lp;
-  const U32 rhs = *(const U32 *)rp;
+  U32 const lhs = *(U32 const *)lp;
+  U32 const rhs = *(U32 const *)rp;
  return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
 }
+/**
+ * Faster version for d <= 8.
+ */
+static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
+  U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
+  U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
+  U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
+  if (lhs < rhs) {
+    return -1;
+  }
+  return (lhs > rhs);
+}

 /**
 * Same as COVER_cmp() except ties are broken by pointer value
@ -251,6 +263,16 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
  }
  return result;
 }
+/**
+ * Faster version for d <= 8.
+ */
+static int COVER_strict_cmp8(const void *lp, const void *rp) {
+  int result = COVER_cmp8(g_ctx, lp, rp);
+  if (result == 0) {
+    result = lp < rp ? -1 : 1;
+  }
+  return result;
+}

 /**
 * Returns the first pointer in [first, last) whose element does not compare
@ -506,7 +528,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
  const BYTE *const samples = (const BYTE *)samplesBuffer;
  const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
  /* Checks */
-  if (totalSamplesSize < d ||
+  if (totalSamplesSize < MAX(d, sizeof(U64)) ||
      totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
    DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
                 (COVER_MAX_SAMPLES_SIZE >> 20));
@ -520,7 +542,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
  ctx->samplesSizes = samplesSizes;
  ctx->nbSamples = nbSamples;
  /* Partial suffix array */
-  ctx->suffixSize = totalSamplesSize - d + 1;
+  ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1;
  ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
  /* Maps index to the dmerID */
  ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
@ -554,7 +576,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
    }
    /* qsort doesn't take an opaque pointer, so pass as a global */
    g_ctx = ctx;
-    qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), &COVER_strict_cmp);
+    qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
+          (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
  }
  DISPLAYLEVEL(2, "Computing frequencies\n");
  /* For each dmer group (group of positions with the same first d bytes):
@ -564,8 +587,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
   * 2. We calculate how many samples the dmer occurs in and save it in
   *    freqs[dmerId].
   */
-  COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, &COVER_cmp,
-                &COVER_group);
+  COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
+                (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
  ctx->freqs = ctx->suffix;
  ctx->suffix = NULL;
  return 1;
@ -916,10 +939,10 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
  /* constants */
  const unsigned nbThreads = parameters->nbThreads;
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
-  const unsigned kMaxD = parameters->d == 0 ? 16 : parameters->d;
-  const unsigned kMinK = parameters->k == 0 ? kMaxD : parameters->k;
-  const unsigned kMaxK = parameters->k == 0 ? 2048 : parameters->k;
-  const unsigned kSteps = parameters->steps == 0 ? 32 : parameters->steps;
+  const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
+  const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
+  const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
+  const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
  const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
  const unsigned kIterations =
      (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
--- a/lib/dictBuilder/zdict.h
+++ b/lib/dictBuilder/zdict.h
@ -88,7 +88,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dict

 /*! COVER_params_t :
    For all values 0 means default.
-    kMin and d are the only required parameters.
+    k and d are the only required parameters.
 */
 typedef struct {
    unsigned k;                  /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
--- a/programs/Makefile
+++ b/programs/Makefile
@ -145,7 +145,7 @@ zstd-nogz : ZLIB_MSG := $(NO_ZLIB_MSG)
 zstd-nogz : LZMA_MSG := $(NO_LZMA_MSG)
 xzstd : CPPFLAGS += $(ZLIBCPP) $(LZMACPP)
 xzstd : LDFLAGS += $(ZLIBLD) $(LZMALD)
-xzstd : LZ4_MSG := $(NO_LZMA_MSG)
+xzstd : LZ4_MSG := $(NO_LZ4_MSG)
 zstd4 : CPPFLAGS += $(ZLIBCPP) $(LZ4CPP)
 zstd4 : LDFLAGS += $(ZLIBLD) $(LZ4LD)
 zstd4 : LZMA_MSG := $(NO_LZMA_MSG)
--- a/programs/zstd.1
+++ b/programs/zstd.1
@ -1,5 +1,5 @@
 .
-.TH "ZSTD" "1" "April 2017" "zstd 1.1.5" "User Commands"
+.TH "ZSTD" "1" "May 2017" "zstd 1.2.0" "User Commands"
 .
 .SH "NAME"
 \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
@ -168,49 +168,57 @@ All arguments after \fB\-\-\fR are treated as files
 .
 .TP
 \fB\-\-train FILEs\fR
-use FILEs as training set to create a dictionary\. The training set should contain a lot of small files (> 100), and weight typically 100x the target dictionary size (for example, 10 MB for a 100 KB dictionary)\.
+Use FILEs as training set to create a dictionary\. The training set should contain a lot of small files (> 100), and weight typically 100x the target dictionary size (for example, 10 MB for a 100 KB dictionary)\.
+.
+.IP
+Supports multithreading if \fBzstd\fR is compiled with threading support\. Additional parameters can be specified with \fB\-\-train\-cover\fR\. The legacy dictionary builder can be accessed with \fB\-\-train\-legacy\fR\. Equivalent to \fB\-\-train\-cover=d=8,steps=4\fR\.
 .
 .TP
 \fB\-o file\fR
-dictionary saved into \fBfile\fR (default name: dictionary)
+Dictionary saved into \fBfile\fR (default name: dictionary)\.
 .
 .TP
 \fB\-\-maxdict=#\fR
-limit dictionary to specified size (default : (112640)
+Limit dictionary to specified size (default: 112640)\.
 .
 .TP
 \fB\-\-dictID=#\fR
 A dictionary ID is a locally unique ID that a decoder can use to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to give a precise number instead\. Short numbers have an advantage : an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\. However, it\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\.
 .
 .TP
-\fB\-s#\fR
-dictionary selectivity level (default: 9) the smaller the value, the denser the dictionary, improving its efficiency but reducing its possible maximum size\.
+\fB\-\-train\-cover[=k#,d=#,steps=#]\fR
+Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. Requires that \fId\fR <= \fIk\fR\.
+.
+.IP
+Selects segments of size \fIk\fR with highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], occasionally up to 16, but the algorithm will run faster with d <= \fI8\fR\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [2 * \fId\fR, 2000]\. Supports multithreading if \fBzstd\fR is compiled with threading support\.
+.
+.IP
+Examples:
+.
+.IP
+\fBzstd \-\-train\-cover FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=k=50,d=8 FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=d=8,steps=500 FILEs\fR
+.
+.IP
+\fBzstd \-\-train\-cover=k=50 FILEs\fR
 .
 .TP
-\fB\-\-cover=k#,d=#\fR
-Use alternate dictionary builder algorithm named cover with parameters \fIk\fR and \fId\fR with \fId\fR <= \fIk\fR\. Selects segments of size \fIk\fR with the highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of of size \fId\fR\. Generally \fId\fR should be in the range [6, 24]\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [32, 2048]\.
-.
-.br
-Example: \fB\-\-train \-\-cover=k=64,d=8 FILEs\fR\.
-.
-.TP
-\fB\-\-optimize\-cover[=steps=#,k=#,d=#]\fR
-If \fIsteps\fR is not specified, the default value of 32 is used\. If \fIk\fR is not specified, the \fIk\fR values in [16, 2048] are checked for each value of \fId\fR\. If \fId\fR is not specified, the values checked are [6, 8, \.\.\., 16]\.
+\fB\-\-train\-legacy[=selectivity=#]\fR
+Use legacy dictionary builder algorithm with the given dictionary \fIselectivity\fR (default: 9)\. The smaller the \fIselectivity\fR value, the denser the dictionary, improving its efficiency but reducing its possible maximum size\. \fB\-\-train\-legacy=s=#\fR is also accepted\.
 .
 .IP
-Runs the cover dictionary builder for each parameter set and saves the optimal parameters and dictionary\. Prints optimal parameters and writes optimal dictionary into output file\. Supports multithreading if \fBzstd\fR is compiled with threading support\.
+Examples:
 .
 .IP
-The parameter \fIk\fR is more sensitive than \fId\fR, and is faster to optimize over\. Suggested use is to run with a \fIsteps\fR <= 32 with neither \fIk\fR nor \fId\fR set\. Once it completes, use the value of \fId\fR it selects with a higher \fIsteps\fR (in the range [256, 1024])\.
+\fBzstd \-\-train\-legacy FILEs\fR
 .
 .IP
-Examples :
-.
-.IP
-\fBzstd \-\-train \-\-optimize\-cover FILEs\fR
-.
-.IP
-\fBzstd \-\-train \-\-optimize\-cover=d=d,steps=512 FILEs\fR
+\fBzstd \-\-train\-legacy=selectivity=8 FILEs\fR
 .
 .SH "BENCHMARK"
 .
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@ -158,14 +158,19 @@ It will improve compression ratio of small files.
 Typical gains range from 10% (at 64KB) to x5 better (at <1KB).

 * `--train FILEs`:
-    use FILEs as training set to create a dictionary.
+    Use FILEs as training set to create a dictionary.
    The training set should contain a lot of small files (> 100),
    and weight typically 100x the target dictionary size
    (for example, 10 MB for a 100 KB dictionary).
+
+    Supports multithreading if `zstd` is compiled with threading support.
+    Additional parameters can be specified with `--train-cover`.
+    The legacy dictionary builder can be accessed with `--train-legacy`.
+    Equivalent to `--train-cover=d=8,steps=4`.
 * `-o file`:
-    dictionary saved into `file` (default name: dictionary)
+    Dictionary saved into `file` (default name: dictionary).
 * `--maxdict=#`:
-    limit dictionary to specified size (default : (112640)
+    Limit dictionary to specified size (default: 112640).
 * `--dictID=#`:
    A dictionary ID is a locally unique ID that a decoder can use to verify it is
    using the right dictionary.
@ -176,42 +181,44 @@ Typical gains range from 10% (at 64KB) to x5 better (at <1KB).
    This compares favorably to 4 bytes default.
    However, it's up to the dictionary manager to not assign twice the same ID to
    2 different dictionaries.
-* `-s#`:
-    dictionary selectivity level (default: 9)
-    the smaller the value, the denser the dictionary,
-    improving its efficiency but reducing its possible maximum size.
-* `--cover=k#,d=#`:
-    Use alternate dictionary builder algorithm named cover with parameters
-    _k_ and _d_ with _d_ <= _k_.
-    Selects segments of size _k_ with the highest score to put in the dictionary.
+* `--train-cover[=k#,d=#,steps=#]`:
+    Select parameters for the default dictionary builder algorithm named cover.
+    If _d_ is not specified, then it tries _d_ = 6 and _d_ = 8.
+    If _k_ is not specified, then it tries _steps_ values in the range [50, 2000].
+    If _steps_ is not specified, then the default value of 40 is used.
+    Requires that _d_ <= _k_.
+
+    Selects segments of size _k_ with highest score to put in the dictionary.
    The score of a segment is computed by the sum of the frequencies of all the
-    subsegments of of size _d_.
-    Generally _d_ should be in the range [6, 24].
-    Good values for _k_ vary widely based on the input data,
-    but a safe range is [32, 2048].<br />
-    Example: `--train --cover=k=64,d=8 FILEs`.
-
-* `--optimize-cover[=steps=#,k=#,d=#]`:
-    If _steps_ is not specified, the default value of 32 is used.
-    If _k_ is not specified, the _k_ values in [16, 2048] are checked for each
-    value of _d_.
-    If _d_ is not specified, the values checked are [6, 8, ..., 16].
-
-    Runs the cover dictionary builder for each parameter set
-    and saves the optimal parameters and dictionary.
-    Prints optimal parameters and writes optimal dictionary into output file.
+    subsegments of size _d_.
+    Generally _d_ should be in the range [6, 8], occasionally up to 16, but the
+    algorithm will run faster with d <= _8_.
+    Good values for _k_ vary widely based on the input data, but a safe range is
+    [2 * _d_, 2000].
    Supports multithreading if `zstd` is compiled with threading support.

-    The parameter _k_ is more sensitive than _d_, and is faster to optimize over.
-    Suggested use is to run with a _steps_ <= 32 with neither _k_ nor _d_ set.
-    Once it completes, use the value of _d_ it selects with a higher _steps_
-    (in the range [256, 1024]).
+    Examples:

-    Examples :
+    `zstd --train-cover FILEs`

-    `zstd --train --optimize-cover FILEs`
+    `zstd --train-cover=k=50,d=8 FILEs`

-    `zstd --train --optimize-cover=d=d,steps=512 FILEs`
+    `zstd --train-cover=d=8,steps=500 FILEs`
+
+    `zstd --train-cover=k=50 FILEs`
+
+* `--train-legacy[=selectivity=#]`:
+    Use legacy dictionary builder algorithm with the given dictionary
+    _selectivity_ (default: 9).
+    The smaller the _selectivity_ value, the denser the dictionary,
+    improving its efficiency but reducing its possible maximum size.
+    `--train-legacy=s=#` is also accepted.
+
+    Examples:
+
+    `zstd --train-legacy FILEs`
+
+    `zstd --train-legacy=selectivity=8 FILEs`


 BENCHMARK
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@ -153,11 +153,10 @@ static int usage_advanced(const char* programName)
    DISPLAY( "\n");
    DISPLAY( "Dictionary builder :\n");
    DISPLAY( "--train ## : create a dictionary from a training set of files \n");
-    DISPLAY( "--cover=k=#,d=# : use the cover algorithm with parameters k and d \n");
-    DISPLAY( "--optimize-cover[=steps=#,k=#,d=#] : optimize cover parameters with optional parameters\n");
+    DISPLAY( "--train-cover[=k=#,d=#,steps=#] : use the cover algorithm with optional args\n");
+    DISPLAY( "--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u)\n", g_defaultSelectivityLevel);
    DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName);
    DISPLAY( "--maxdict=# : limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize);
-    DISPLAY( " -s#    : dictionary selectivity level (default: %u)\n", g_defaultSelectivityLevel);
    DISPLAY( "--dictID=# : force dictionary ID to specified value (default: random)\n");
 #endif
 #ifndef ZSTD_NOBENCH
@ -241,11 +240,11 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
 #ifndef ZSTD_NODICT
 /**
 * parseCoverParameters() :
- * reads cover parameters from *stringPtr (e.g. "--cover=smoothing=100,kmin=48,kstep=4,kmax=64,d=8") into *params
+ * reads cover parameters from *stringPtr (e.g. "--train-cover=k=48,d=8,steps=32") into *params
 * @return 1 means that cover parameters were correct
 * @return 0 in case of malformed parameters
 */
-static unsigned parseCoverParameters(const char* stringPtr, COVER_params_t *params)
+static unsigned parseCoverParameters(const char* stringPtr, COVER_params_t* params)
 {
    memset(params, 0, sizeof(*params));
    for (; ;) {
@ -255,9 +254,33 @@ static unsigned parseCoverParameters(const char* stringPtr, COVER_params_t *para
        return 0;
    }
    if (stringPtr[0] != 0) return 0;
-    DISPLAYLEVEL(4, "k=%u\nd=%u\nsteps=%u\n", params->k, params->d, params->steps);
+    DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\n", params->k, params->d, params->steps);
    return 1;
 }
+
+/**
+ * parseLegacyParameters() :
+ * reads legacy dictioanry builter parameters from *stringPtr (e.g. "--train-legacy=selectivity=8") into *selectivity
+ * @return 1 means that legacy dictionary builder parameters were correct
+ * @return 0 in case of malformed parameters
+ */
+static unsigned parseLegacyParameters(const char* stringPtr, unsigned* selectivity)
+{
+    if (!longCommandWArg(&stringPtr, "s=") && !longCommandWArg(&stringPtr, "selectivity=")) { return 0; }
+    *selectivity = readU32FromChar(&stringPtr);
+    if (stringPtr[0] != 0) return 0;
+    DISPLAYLEVEL(4, "legacy: selectivity=%u\n", *selectivity);
+    return 1;
+}
+
+static COVER_params_t defaultCoverParams(void)
+{
+    COVER_params_t params;
+    memset(&params, 0, sizeof(params));
+    params.d = 8;
+    params.steps = 4;
+    return params;
+}
 #endif


@ -331,8 +354,8 @@ int main(int argCount, const char* argv[])
    unsigned fileNamesNb;
 #endif
 #ifndef ZSTD_NODICT
-    COVER_params_t coverParams;
-    int cover = 0;
+    COVER_params_t coverParams = defaultCoverParams();
+    int cover = 1;
 #endif

    /* init */
@ -413,18 +436,26 @@ int main(int argCount, const char* argv[])

                    /* long commands with arguments */
 #ifndef ZSTD_NODICT
-                    if (longCommandWArg(&argument, "--cover=")) {
-                      cover=1; if (!parseCoverParameters(argument, &coverParams)) CLEAN_RETURN(badusage(programName));
-                      continue;
-                    }
-                    if (longCommandWArg(&argument, "--optimize-cover")) {
-                      cover=2;
+                    if (longCommandWArg(&argument, "--train-cover")) {
+                      operation = zom_train;
+                      outFileName = g_defaultDictName;
+                      cover = 1;
                      /* Allow optional arguments following an = */
                      if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
                      else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
                      else if (!parseCoverParameters(argument, &coverParams)) { CLEAN_RETURN(badusage(programName)); }
                      continue;
                    }
+                    if (longCommandWArg(&argument, "--train-legacy")) {
+                      operation = zom_train;
+                      outFileName = g_defaultDictName;
+                      cover = 0;
+                      /* Allow optional arguments following an = */
+                      if (*argument == 0) { continue; }
+                      else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
+                      else if (!parseLegacyParameters(argument, &dictSelect)) { CLEAN_RETURN(badusage(programName)); }
+                      continue;
+                    }
 #endif
                    if (longCommandWArg(&argument, "--threads=")) { nbThreads = readU32FromChar(&argument); continue; }
                    if (longCommandWArg(&argument, "--memlimit=")) { memLimit = readU32FromChar(&argument); continue; }
@ -659,11 +690,12 @@ int main(int argCount, const char* argv[])
    if (operation==zom_train) {
 #ifndef ZSTD_NODICT
        if (cover) {
+            int const optimize = !coverParams.k || !coverParams.d;
            coverParams.nbThreads = nbThreads;
            coverParams.compressionLevel = dictCLevel;
            coverParams.notificationLevel = g_displayLevel;
            coverParams.dictID = dictID;
-            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, NULL, &coverParams, cover - 1);
+            operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, NULL, &coverParams, optimize);
        } else {
            ZDICT_params_t dictParams;
            memset(&dictParams, 0, sizeof(dictParams));
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@ -314,9 +314,9 @@ esac
 rm -rf dirTestDict
 $ECHO "- dictionary builder on bogus input"
 $ECHO "Hello World" > tmp
-$ZSTD --train -q tmp && die "Dictionary training should fail : not enough input source"
+$ZSTD --train-legacy -q tmp && die "Dictionary training should fail : not enough input source"
 ./datagen -P0 -g10M > tmp
-$ZSTD --train -q tmp && die "Dictionary training should fail : source is pure noise"
+$ZSTD --train-legacy -q tmp && die "Dictionary training should fail : source is pure noise"
 rm tmp*


@ -325,19 +325,39 @@ $ECHO "\n**** cover dictionary tests **** "
 TESTFILE=../programs/zstdcli.c
 ./datagen > tmpDict
 $ECHO "- Create first dictionary"
-$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c -o tmpDict
+$ZSTD --train-cover=k=46,d=8 *.c ../programs/*.c -o tmpDict
 cp $TESTFILE tmp
 $ZSTD -f tmp -D tmpDict
 $ZSTD -d tmp.zst -D tmpDict -fo result
 $DIFF $TESTFILE result
 $ECHO "- Create second (different) dictionary"
-$ZSTD --train --cover=k=56,d=8 *.c ../programs/*.c ../programs/*.h -o tmpDictC
+$ZSTD --train-cover=k=56,d=8 *.c ../programs/*.c ../programs/*.h -o tmpDictC
 $ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
 $ECHO "- Create dictionary with short dictID"
-$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c --dictID=1 -o tmpDict1
+$ZSTD --train-cover=k=46,d=8 *.c ../programs/*.c --dictID=1 -o tmpDict1
 cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
 $ECHO "- Create dictionary with size limit"
-$ZSTD --train --optimize-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
+$ZSTD --train-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
+rm tmp*
+
+$ECHO "\n**** legacy dictionary tests **** "
+
+TESTFILE=../programs/zstdcli.c
+./datagen > tmpDict
+$ECHO "- Create first dictionary"
+$ZSTD --train-legacy=selectivity=8 *.c ../programs/*.c -o tmpDict
+cp $TESTFILE tmp
+$ZSTD -f tmp -D tmpDict
+$ZSTD -d tmp.zst -D tmpDict -fo result
+$DIFF $TESTFILE result
+$ECHO "- Create second (different) dictionary"
+$ZSTD --train-legacy=s=5 *.c ../programs/*.c ../programs/*.h -o tmpDictC
+$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
+$ECHO "- Create dictionary with short dictID"
+$ZSTD --train-legacy -s5 *.c ../programs/*.c --dictID=1 -o tmpDict1
+cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
+$ECHO "- Create dictionary with size limit"
+$ZSTD --train-legacy -s9 *.c ../programs/*.c -o tmpDict2 --maxdict=4K
 rm tmp*