Merge pull request #1306 from jennifermliu/dev
Change default f of FASTCOVER to 20
This commit is contained in:
commit
d492ef4e07
@ -24,7 +24,7 @@
|
||||
#define FASTCOVER_MAX_F 31
|
||||
#define FASTCOVER_MAX_ACCEL 10
|
||||
#define DEFAULT_SPLITPOINT 0.75
|
||||
#define DEFAULT_F 18
|
||||
#define DEFAULT_F 20
|
||||
#define DEFAULT_ACCEL 1
|
||||
|
||||
|
||||
|
@ -40,7 +40,7 @@ extern "C" {
|
||||
/*! ZDICT_trainFromBuffer():
|
||||
* Train a dictionary from an array of samples.
|
||||
* Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,
|
||||
* f=18, and accel=1.
|
||||
* f=20, and accel=1.
|
||||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||
* The resulting dictionary will be saved into `dictBuffer`.
|
||||
@ -94,7 +94,7 @@ typedef struct {
|
||||
typedef struct {
|
||||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
||||
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
||||
unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(18)*/
|
||||
unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
|
||||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
|
||||
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
||||
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
|
||||
@ -170,7 +170,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
|
||||
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
|
||||
* if steps is zero it defaults to its default value.
|
||||
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
|
||||
* If f is zero, default value of 18 is used.
|
||||
* If f is zero, default value of 20 is used.
|
||||
* If accel is zero, default value of 1 is used.
|
||||
*
|
||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
|
@ -244,7 +244,7 @@ Examples:
|
||||
Same as cover but with extra parameters \fIf\fR and \fIaccel\fR and different default value of split
|
||||
.
|
||||
.IP
|
||||
If \fIsplit\fR is not specified, then it tries \fIsplit\fR = 75. If \fIf\fR is not specified, then it tries \fIf\fR = 18. Requires that 0 < \fIf\fR < 32. If \fIaccel\fR is not specified, then it tries \fIaccel\fR = 1. Requires that 0 < \fIaccel\fR <= 10. Requires that \fId\fR = 6 or \fId\fR = 8.
|
||||
If \fIsplit\fR is not specified, then it tries \fIsplit\fR = 75. If \fIf\fR is not specified, then it tries \fIf\fR = 20. Requires that 0 < \fIf\fR < 32. If \fIaccel\fR is not specified, then it tries \fIaccel\fR = 1. Requires that 0 < \fIaccel\fR <= 10. Requires that \fId\fR = 6 or \fId\fR = 8.
|
||||
.
|
||||
.IP
|
||||
\fIf\fR is log of size of array that keeps track of frequency of subsegments of size \fId\fR. The subsegment is hashed to an index in the range [0,2^\fIf\fR - 1]. It is possible that 2 different subsegments are hashed to the same index, and they are considered as the same subsegment when computing frequency. Using a higher \fIf\fR reduces collision but takes longer.
|
||||
|
@ -258,7 +258,7 @@ Compression of small files similar to the sample set will be greatly improved.
|
||||
* `--train-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]`:
|
||||
Same as cover but with extra parameters _f_ and _accel_ and different default value of split
|
||||
If _split_ is not specified, then it tries _split_ = 75.
|
||||
If _f_ is not specified, then it tries _f_ = 18.
|
||||
If _f_ is not specified, then it tries _f_ = 20.
|
||||
Requires that 0 < _f_ < 32.
|
||||
If _accel_ is not specified, then it tries _accel_ = 1.
|
||||
Requires that 0 < _accel_ <= 10.
|
||||
|
@ -356,7 +356,7 @@ static ZDICT_fastCover_params_t defaultFastCoverParams(void)
|
||||
ZDICT_fastCover_params_t params;
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
params.d = 8;
|
||||
params.f = 18;
|
||||
params.f = 20;
|
||||
params.steps = 4;
|
||||
params.splitPoint = 0.75; /* different from default splitPoint of cover */
|
||||
params.accel = DEFAULT_ACCEL;
|
||||
|
Loading…
x
Reference in New Issue
Block a user