Change default f to 20

dev
Jennifer Liu 2018-09-04 17:12:35 -07:00
parent 7a02df8dbe
commit 21721b75a3
5 changed files with 7 additions and 7 deletions

View File

@ -24,7 +24,7 @@
#define FASTCOVER_MAX_F 31 #define FASTCOVER_MAX_F 31
#define FASTCOVER_MAX_ACCEL 10 #define FASTCOVER_MAX_ACCEL 10
#define DEFAULT_SPLITPOINT 0.75 #define DEFAULT_SPLITPOINT 0.75
#define DEFAULT_F 18 #define DEFAULT_F 20
#define DEFAULT_ACCEL 1 #define DEFAULT_ACCEL 1

View File

@ -40,7 +40,7 @@ extern "C" {
/*! ZDICT_trainFromBuffer(): /*! ZDICT_trainFromBuffer():
* Train a dictionary from an array of samples. * Train a dictionary from an array of samples.
* Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4, * Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,
* f=18, and accel=1. * f=20, and accel=1.
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`, * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
* The resulting dictionary will be saved into `dictBuffer`. * The resulting dictionary will be saved into `dictBuffer`.
@ -94,7 +94,7 @@ typedef struct {
typedef struct { typedef struct {
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(18)*/ unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */ unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */ double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
@ -170,7 +170,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}. * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
* if steps is zero it defaults to its default value. * if steps is zero it defaults to its default value.
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000]. * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
* If f is zero, default value of 18 is used. * If f is zero, default value of 20 is used.
* If accel is zero, default value of 1 is used. * If accel is zero, default value of 1 is used.
* *
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)

View File

@ -244,7 +244,7 @@ Examples:
Same as cover but with extra parameters \fIf\fR and \fIaccel\fR and different default value of split Same as cover but with extra parameters \fIf\fR and \fIaccel\fR and different default value of split
. .
.IP .IP
If \fIsplit\fR is not specified, then it tries \fIsplit\fR = 75. If \fIf\fR is not specified, then it tries \fIf\fR = 18. Requires that 0 < \fIf\fR < 32. If \fIaccel\fR is not specified, then it tries \fIaccel\fR = 1. Requires that 0 < \fIaccel\fR <= 10. Requires that \fId\fR = 6 or \fId\fR = 8. If \fIsplit\fR is not specified, then it tries \fIsplit\fR = 75. If \fIf\fR is not specified, then it tries \fIf\fR = 20. Requires that 0 < \fIf\fR < 32. If \fIaccel\fR is not specified, then it tries \fIaccel\fR = 1. Requires that 0 < \fIaccel\fR <= 10. Requires that \fId\fR = 6 or \fId\fR = 8.
. .
.IP .IP
\fIf\fR is log of size of array that keeps track of frequency of subsegments of size \fId\fR. The subsegment is hashed to an index in the range [0,2^\fIf\fR - 1]. It is possible that 2 different subsegments are hashed to the same index, and they are considered as the same subsegment when computing frequency. Using a higher \fIf\fR reduces collision but takes longer. \fIf\fR is log of size of array that keeps track of frequency of subsegments of size \fId\fR. The subsegment is hashed to an index in the range [0,2^\fIf\fR - 1]. It is possible that 2 different subsegments are hashed to the same index, and they are considered as the same subsegment when computing frequency. Using a higher \fIf\fR reduces collision but takes longer.

View File

@ -258,7 +258,7 @@ Compression of small files similar to the sample set will be greatly improved.
* `--train-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]`: * `--train-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]`:
Same as cover but with extra parameters _f_ and _accel_ and different default value of split Same as cover but with extra parameters _f_ and _accel_ and different default value of split
If _split_ is not specified, then it tries _split_ = 75. If _split_ is not specified, then it tries _split_ = 75.
If _f_ is not specified, then it tries _f_ = 18. If _f_ is not specified, then it tries _f_ = 20.
Requires that 0 < _f_ < 32. Requires that 0 < _f_ < 32.
If _accel_ is not specified, then it tries _accel_ = 1. If _accel_ is not specified, then it tries _accel_ = 1.
Requires that 0 < _accel_ <= 10. Requires that 0 < _accel_ <= 10.

View File

@ -356,7 +356,7 @@ static ZDICT_fastCover_params_t defaultFastCoverParams(void)
ZDICT_fastCover_params_t params; ZDICT_fastCover_params_t params;
memset(&params, 0, sizeof(params)); memset(&params, 0, sizeof(params));
params.d = 8; params.d = 8;
params.f = 18; params.f = 20;
params.steps = 4; params.steps = 4;
params.splitPoint = 0.75; /* different from default splitPoint of cover */ params.splitPoint = 0.75; /* different from default splitPoint of cover */
params.accel = DEFAULT_ACCEL; params.accel = DEFAULT_ACCEL;