diff --git a/lib/dictBuilder/fastcover.c b/lib/dictBuilder/fastcover.c index fbf5770b..6ce8c880 100644 --- a/lib/dictBuilder/fastcover.c +++ b/lib/dictBuilder/fastcover.c @@ -24,7 +24,7 @@ #define FASTCOVER_MAX_F 31 #define FASTCOVER_MAX_ACCEL 10 #define DEFAULT_SPLITPOINT 0.75 -#define DEFAULT_F 18 +#define DEFAULT_F 20 #define DEFAULT_ACCEL 1 diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h index cf598177..c9e0c295 100644 --- a/lib/dictBuilder/zdict.h +++ b/lib/dictBuilder/zdict.h @@ -40,7 +40,7 @@ extern "C" { /*! ZDICT_trainFromBuffer(): * Train a dictionary from an array of samples. * Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4, - * f=18, and accel=1. + * f=20, and accel=1. * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. * The resulting dictionary will be saved into `dictBuffer`. @@ -94,7 +94,7 @@ typedef struct { typedef struct { unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ - unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(18)*/ + unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/ unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */ unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */ @@ -170,7 +170,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer, * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}. * if steps is zero it defaults to its default value. * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000]. - * If f is zero, default value of 18 is used. + * If f is zero, default value of 20 is used. * If accel is zero, default value of 1 is used. * * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) diff --git a/programs/zstd.1 b/programs/zstd.1 index 526701a2..53fedbb9 100644 --- a/programs/zstd.1 +++ b/programs/zstd.1 @@ -244,7 +244,7 @@ Examples: Same as cover but with extra parameters \fIf\fR and \fIaccel\fR and different default value of split . .IP -If \fIsplit\fR is not specified, then it tries \fIsplit\fR = 75. If \fIf\fR is not specified, then it tries \fIf\fR = 18. Requires that 0 < \fIf\fR < 32. If \fIaccel\fR is not specified, then it tries \fIaccel\fR = 1. Requires that 0 < \fIaccel\fR <= 10. Requires that \fId\fR = 6 or \fId\fR = 8. +If \fIsplit\fR is not specified, then it tries \fIsplit\fR = 75. If \fIf\fR is not specified, then it tries \fIf\fR = 20. Requires that 0 < \fIf\fR < 32. If \fIaccel\fR is not specified, then it tries \fIaccel\fR = 1. Requires that 0 < \fIaccel\fR <= 10. Requires that \fId\fR = 6 or \fId\fR = 8. . .IP \fIf\fR is log of size of array that keeps track of frequency of subsegments of size \fId\fR. The subsegment is hashed to an index in the range [0,2^\fIf\fR - 1]. It is possible that 2 different subsegments are hashed to the same index, and they are considered as the same subsegment when computing frequency. Using a higher \fIf\fR reduces collision but takes longer. diff --git a/programs/zstd.1.md b/programs/zstd.1.md index 14c26265..21f456cf 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -258,7 +258,7 @@ Compression of small files similar to the sample set will be greatly improved. * `--train-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]`: Same as cover but with extra parameters _f_ and _accel_ and different default value of split If _split_ is not specified, then it tries _split_ = 75. - If _f_ is not specified, then it tries _f_ = 18. + If _f_ is not specified, then it tries _f_ = 20. Requires that 0 < _f_ < 32. If _accel_ is not specified, then it tries _accel_ = 1. Requires that 0 < _accel_ <= 10. diff --git a/programs/zstdcli.c b/programs/zstdcli.c index dcbf6ce4..0fccd34f 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -356,7 +356,7 @@ static ZDICT_fastCover_params_t defaultFastCoverParams(void) ZDICT_fastCover_params_t params; memset(¶ms, 0, sizeof(params)); params.d = 8; - params.f = 18; + params.f = 20; params.steps = 4; params.splitPoint = 0.75; /* different from default splitPoint of cover */ params.accel = DEFAULT_ACCEL;