added a test case for dictBuilder failure
cyclic data set makes the entropy stage fails now, onto a fix for #304 ...
This commit is contained in:
parent
06995775b0
commit
218e9fe0fc
@ -2869,7 +2869,7 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
|
|||||||
if (params.nbThreads > 1) {
|
if (params.nbThreads > 1) {
|
||||||
if (cctx->mtctx == NULL || (params.nbThreads != ZSTDMT_getNbThreads(cctx->mtctx))) {
|
if (cctx->mtctx == NULL || (params.nbThreads != ZSTDMT_getNbThreads(cctx->mtctx))) {
|
||||||
DEBUGLOG(4, "ZSTD_compress_generic: creating new mtctx for nbThreads=%u (previous: %u)",
|
DEBUGLOG(4, "ZSTD_compress_generic: creating new mtctx for nbThreads=%u (previous: %u)",
|
||||||
params.nbThreads, ZSTDMT_getNbThreads(cctx->mtctx));
|
params.nbThreads, (U32)ZSTDMT_getNbThreads(cctx->mtctx));
|
||||||
ZSTDMT_freeCCtx(cctx->mtctx);
|
ZSTDMT_freeCCtx(cctx->mtctx);
|
||||||
cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbThreads, cctx->customMem);
|
cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbThreads, cctx->customMem);
|
||||||
if (cctx->mtctx == NULL) return ERROR(memory_allocation);
|
if (cctx->mtctx == NULL) return ERROR(memory_allocation);
|
||||||
|
@ -537,8 +537,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|||||||
/* Checks */
|
/* Checks */
|
||||||
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
||||||
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
||||||
DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
|
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
||||||
(COVER_MAX_SAMPLES_SIZE >> 20));
|
(U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
/* Zero the context */
|
/* Zero the context */
|
||||||
@ -651,12 +651,16 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
||||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
void *dictBuffer, size_t dictBufferCapacity,
|
||||||
const size_t *samplesSizes, unsigned nbSamples,
|
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
||||||
ZDICT_cover_params_t parameters) {
|
ZDICT_cover_params_t parameters)
|
||||||
|
{
|
||||||
BYTE* const dict = (BYTE*)dictBuffer;
|
BYTE* const dict = (BYTE*)dictBuffer;
|
||||||
COVER_ctx_t ctx;
|
COVER_ctx_t ctx;
|
||||||
COVER_map_t activeDmers;
|
COVER_map_t activeDmers;
|
||||||
|
|
||||||
|
/* Initialize global data */
|
||||||
|
g_displayLevel = parameters.zParams.notificationLevel;
|
||||||
/* Checks */
|
/* Checks */
|
||||||
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
||||||
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
||||||
@ -671,8 +675,6 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|||||||
ZDICT_DICTSIZE_MIN);
|
ZDICT_DICTSIZE_MIN);
|
||||||
return ERROR(dstSize_tooSmall);
|
return ERROR(dstSize_tooSmall);
|
||||||
}
|
}
|
||||||
/* Initialize global data */
|
|
||||||
g_displayLevel = parameters.zParams.notificationLevel;
|
|
||||||
/* Initialize context and activeDmers */
|
/* Initialize context and activeDmers */
|
||||||
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
||||||
parameters.d)) {
|
parameters.d)) {
|
||||||
@ -947,6 +949,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|||||||
unsigned k;
|
unsigned k;
|
||||||
COVER_best_t best;
|
COVER_best_t best;
|
||||||
POOL_ctx *pool = NULL;
|
POOL_ctx *pool = NULL;
|
||||||
|
|
||||||
/* Checks */
|
/* Checks */
|
||||||
if (kMinK < kMaxD || kMaxK < kMinK) {
|
if (kMinK < kMaxD || kMaxK < kMinK) {
|
||||||
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
||||||
@ -1004,7 +1007,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|||||||
data->parameters.k = k;
|
data->parameters.k = k;
|
||||||
data->parameters.d = d;
|
data->parameters.d = d;
|
||||||
data->parameters.steps = kSteps;
|
data->parameters.steps = kSteps;
|
||||||
data->parameters.zParams.notificationLevel = g_displayLevel;
|
data->parameters.zParams.notificationLevel = displayLevel;
|
||||||
/* Check the parameters */
|
/* Check the parameters */
|
||||||
if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
|
if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
|
||||||
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
||||||
|
@ -207,7 +207,6 @@ static dictItem ZDICT_analyzePos(
|
|||||||
U32 cumulLength[LLIMIT] = {0};
|
U32 cumulLength[LLIMIT] = {0};
|
||||||
U32 savings[LLIMIT] = {0};
|
U32 savings[LLIMIT] = {0};
|
||||||
const BYTE* b = (const BYTE*)buffer;
|
const BYTE* b = (const BYTE*)buffer;
|
||||||
size_t length;
|
|
||||||
size_t maxLength = LLIMIT;
|
size_t maxLength = LLIMIT;
|
||||||
size_t pos = suffix[start];
|
size_t pos = suffix[start];
|
||||||
U32 end = start;
|
U32 end = start;
|
||||||
@ -222,26 +221,30 @@ static dictItem ZDICT_analyzePos(
|
|||||||
||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3))
|
||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3))
|
||||||
||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) {
|
||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) {
|
||||||
/* skip and mark segment */
|
/* skip and mark segment */
|
||||||
U16 u16 = MEM_read16(b+pos+4);
|
U16 const pattern16 = MEM_read16(b+pos+4);
|
||||||
U32 u, e = 6;
|
U32 u, patternEnd = 6;
|
||||||
while (MEM_read16(b+pos+e) == u16) e+=2 ;
|
while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ;
|
||||||
if (b[pos+e] == b[pos+e-1]) e++;
|
if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++;
|
||||||
for (u=1; u<e; u++)
|
for (u=1; u<patternEnd; u++)
|
||||||
doneMarks[pos+u] = 1;
|
doneMarks[pos+u] = 1;
|
||||||
return solution;
|
return solution;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* look forward */
|
/* look forward */
|
||||||
|
{ size_t length;
|
||||||
do {
|
do {
|
||||||
end++;
|
end++;
|
||||||
length = ZDICT_count(b + pos, b + suffix[end]);
|
length = ZDICT_count(b + pos, b + suffix[end]);
|
||||||
} while (length >= MINMATCHLENGTH);
|
} while (length >= MINMATCHLENGTH);
|
||||||
|
}
|
||||||
|
|
||||||
/* look backward */
|
/* look backward */
|
||||||
|
{ size_t length;
|
||||||
do {
|
do {
|
||||||
length = ZDICT_count(b + pos, b + *(suffix+start-1));
|
length = ZDICT_count(b + pos, b + *(suffix+start-1));
|
||||||
if (length >=MINMATCHLENGTH) start--;
|
if (length >=MINMATCHLENGTH) start--;
|
||||||
} while(length >= MINMATCHLENGTH);
|
} while(length >= MINMATCHLENGTH);
|
||||||
|
}
|
||||||
|
|
||||||
/* exit if not found a minimum nb of repetitions */
|
/* exit if not found a minimum nb of repetitions */
|
||||||
if (end-start < minRatio) {
|
if (end-start < minRatio) {
|
||||||
@ -297,21 +300,24 @@ static dictItem ZDICT_analyzePos(
|
|||||||
memset(lengthList, 0, sizeof(lengthList));
|
memset(lengthList, 0, sizeof(lengthList));
|
||||||
|
|
||||||
/* look forward */
|
/* look forward */
|
||||||
|
{ size_t length;
|
||||||
do {
|
do {
|
||||||
end++;
|
end++;
|
||||||
length = ZDICT_count(b + pos, b + suffix[end]);
|
length = ZDICT_count(b + pos, b + suffix[end]);
|
||||||
if (length >= LLIMIT) length = LLIMIT-1;
|
if (length >= LLIMIT) length = LLIMIT-1;
|
||||||
lengthList[length]++;
|
lengthList[length]++;
|
||||||
} while (length >=MINMATCHLENGTH);
|
} while (length >=MINMATCHLENGTH);
|
||||||
|
}
|
||||||
|
|
||||||
/* look backward */
|
/* look backward */
|
||||||
length = MINMATCHLENGTH;
|
{ size_t length = MINMATCHLENGTH;
|
||||||
while ((length >= MINMATCHLENGTH) & (start > 0)) {
|
while ((length >= MINMATCHLENGTH) & (start > 0)) {
|
||||||
length = ZDICT_count(b + pos, b + suffix[start - 1]);
|
length = ZDICT_count(b + pos, b + suffix[start - 1]);
|
||||||
if (length >= LLIMIT) length = LLIMIT - 1;
|
if (length >= LLIMIT) length = LLIMIT - 1;
|
||||||
lengthList[length]++;
|
lengthList[length]++;
|
||||||
if (length >= MINMATCHLENGTH) start--;
|
if (length >= MINMATCHLENGTH) start--;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* largest useful length */
|
/* largest useful length */
|
||||||
memset(cumulLength, 0, sizeof(cumulLength));
|
memset(cumulLength, 0, sizeof(cumulLength));
|
||||||
@ -345,12 +351,12 @@ static dictItem ZDICT_analyzePos(
|
|||||||
/* mark positions done */
|
/* mark positions done */
|
||||||
{ U32 id;
|
{ U32 id;
|
||||||
for (id=start; id<end; id++) {
|
for (id=start; id<end; id++) {
|
||||||
U32 p, pEnd;
|
U32 p, pEnd, length;
|
||||||
U32 const testedPos = suffix[id];
|
U32 const testedPos = suffix[id];
|
||||||
if (testedPos == pos)
|
if (testedPos == pos)
|
||||||
length = solution.length;
|
length = solution.length;
|
||||||
else {
|
else {
|
||||||
length = ZDICT_count(b+pos, b+testedPos);
|
length = (U32)ZDICT_count(b+pos, b+testedPos);
|
||||||
if (length > solution.length) length = solution.length;
|
if (length > solution.length) length = solution.length;
|
||||||
}
|
}
|
||||||
pEnd = (U32)(testedPos + length);
|
pEnd = (U32)(testedPos + length);
|
||||||
@ -575,8 +581,8 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
|
|||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
ZSTD_CCtx* ref;
|
ZSTD_CCtx* ref; /* contains reference to dictionary */
|
||||||
ZSTD_CCtx* zc;
|
ZSTD_CCtx* zc; /* working context */
|
||||||
void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
|
void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
|
||||||
} EStats_ress_t;
|
} EStats_ress_t;
|
||||||
|
|
||||||
@ -584,7 +590,8 @@ typedef struct
|
|||||||
|
|
||||||
static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
||||||
U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
|
U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
|
||||||
const void* src, size_t srcSize, U32 notificationLevel)
|
const void* src, size_t srcSize,
|
||||||
|
U32 notificationLevel)
|
||||||
{
|
{
|
||||||
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
|
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
|
||||||
size_t cSize;
|
size_t cSize;
|
||||||
@ -597,7 +604,7 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|||||||
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
|
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
|
||||||
|
|
||||||
if (cSize) { /* if == 0; block is not compressible */
|
if (cSize) { /* if == 0; block is not compressible */
|
||||||
const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
|
const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc);
|
||||||
|
|
||||||
/* literals stats */
|
/* literals stats */
|
||||||
{ const BYTE* bytePtr;
|
{ const BYTE* bytePtr;
|
||||||
@ -688,6 +695,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
BYTE* dstPtr = (BYTE*)dstBuffer;
|
BYTE* dstPtr = (BYTE*)dstBuffer;
|
||||||
|
|
||||||
/* init */
|
/* init */
|
||||||
|
DEBUGLOG(4, "ZDICT_analyzeEntropy");
|
||||||
esr.ref = ZSTD_createCCtx();
|
esr.ref = ZSTD_createCCtx();
|
||||||
esr.zc = ZSTD_createCCtx();
|
esr.zc = ZSTD_createCCtx();
|
||||||
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
|
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
|
||||||
@ -713,7 +721,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
} }
|
} }
|
||||||
|
|
||||||
/* collect stats on all files */
|
/* collect stats on all samples */
|
||||||
for (u=0; u<nbFiles; u++) {
|
for (u=0; u<nbFiles; u++) {
|
||||||
ZDICT_countEStats(esr, params,
|
ZDICT_countEStats(esr, params,
|
||||||
countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
|
countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
|
||||||
@ -850,6 +858,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|||||||
U32 const notificationLevel = params.notificationLevel;
|
U32 const notificationLevel = params.notificationLevel;
|
||||||
|
|
||||||
/* check conditions */
|
/* check conditions */
|
||||||
|
DEBUGLOG(4, "ZDICT_finalizeDictionary");
|
||||||
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
|
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
|
||||||
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
|
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
|
||||||
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
||||||
@ -1054,14 +1063,18 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
|||||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
|
||||||
{
|
{
|
||||||
ZDICT_cover_params_t params;
|
ZDICT_cover_params_t params;
|
||||||
|
DEBUGLOG(3, "ZDICT_trainFromBuffer");
|
||||||
memset(¶ms, 0, sizeof(params));
|
memset(¶ms, 0, sizeof(params));
|
||||||
params.d = 8;
|
params.d = 8;
|
||||||
params.steps = 4;
|
params.steps = 4;
|
||||||
/* Default to level 6 since no compression level information is avaialble */
|
/* Default to level 6 since no compression level information is available */
|
||||||
params.zParams.compressionLevel = 6;
|
params.zParams.compressionLevel = 6;
|
||||||
|
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
|
||||||
|
params.zParams.notificationLevel = ZSTD_DEBUG;
|
||||||
|
#endif
|
||||||
return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
|
return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
|
||||||
samplesBuffer, samplesSizes,
|
samplesBuffer, samplesSizes, nbSamples,
|
||||||
nbSamples, ¶ms);
|
¶ms);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
||||||
|
@ -39,7 +39,7 @@ extern "C" {
|
|||||||
|
|
||||||
/*! ZDICT_trainFromBuffer():
|
/*! ZDICT_trainFromBuffer():
|
||||||
* Train a dictionary from an array of samples.
|
* Train a dictionary from an array of samples.
|
||||||
* Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
|
* Redirect towards ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
|
||||||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||||
* The resulting dictionary will be saved into `dictBuffer`.
|
* The resulting dictionary will be saved into `dictBuffer`.
|
||||||
@ -47,8 +47,8 @@ extern "C" {
|
|||||||
* or an error code, which can be tested with ZDICT_isError().
|
* or an error code, which can be tested with ZDICT_isError().
|
||||||
* Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
|
* Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
|
||||||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||||
* It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
||||||
* In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
||||||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
||||||
*/
|
*/
|
||||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
||||||
@ -72,14 +72,14 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
|||||||
* ==================================================================================== */
|
* ==================================================================================== */
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int compressionLevel; /* 0 means default; target a specific zstd compression level */
|
int compressionLevel; /* optimize for a specific zstd compression level; 0 means default */
|
||||||
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
||||||
unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
|
unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */
|
||||||
} ZDICT_params_t;
|
} ZDICT_params_t;
|
||||||
|
|
||||||
/*! ZDICT_cover_params_t:
|
/*! ZDICT_cover_params_t:
|
||||||
* For all values 0 means default.
|
|
||||||
* k and d are the only required parameters.
|
* k and d are the only required parameters.
|
||||||
|
* For others, value 0 means default.
|
||||||
*/
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
||||||
@ -99,20 +99,20 @@ typedef struct {
|
|||||||
* or an error code, which can be tested with ZDICT_isError().
|
* or an error code, which can be tested with ZDICT_isError().
|
||||||
* Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
|
* Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
|
||||||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||||
* It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
||||||
* In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
||||||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
||||||
*/
|
*/
|
||||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
||||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
void *dictBuffer, size_t dictBufferCapacity,
|
||||||
const size_t *samplesSizes, unsigned nbSamples,
|
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
||||||
ZDICT_cover_params_t parameters);
|
ZDICT_cover_params_t parameters);
|
||||||
|
|
||||||
/*! ZDICT_optimizeTrainFromBuffer_cover():
|
/*! ZDICT_optimizeTrainFromBuffer_cover():
|
||||||
* The same requirements as above hold for all the parameters except `parameters`.
|
* The same requirements as above hold for all the parameters except `parameters`.
|
||||||
* This function tries many parameter combinations and picks the best parameters.
|
* This function tries many parameter combinations and picks the best parameters.
|
||||||
* `*parameters` is filled with the best parameters found, and the dictionary
|
* `*parameters` is filled with the best parameters found,
|
||||||
* constructed with those parameters is stored in `dictBuffer`.
|
* dictionary constructed with those parameters is stored in `dictBuffer`.
|
||||||
*
|
*
|
||||||
* All of the parameters d, k, steps are optional.
|
* All of the parameters d, k, steps are optional.
|
||||||
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
|
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
|
||||||
@ -125,8 +125,8 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|||||||
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
||||||
*/
|
*/
|
||||||
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
||||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
void* dictBuffer, size_t dictBufferCapacity,
|
||||||
const size_t *samplesSizes, unsigned nbSamples,
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||||
ZDICT_cover_params_t* parameters);
|
ZDICT_cover_params_t* parameters);
|
||||||
|
|
||||||
/*! ZDICT_finalizeDictionary():
|
/*! ZDICT_finalizeDictionary():
|
||||||
@ -165,14 +165,15 @@ typedef struct {
|
|||||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||||
* or an error code, which can be tested with ZDICT_isError().
|
* or an error code, which can be tested with ZDICT_isError().
|
||||||
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||||
* It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
||||||
* In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
||||||
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
||||||
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
|
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
|
||||||
*/
|
*/
|
||||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
||||||
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
void *dictBuffer, size_t dictBufferCapacity,
|
||||||
const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters);
|
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
||||||
|
ZDICT_legacy_params_t parameters);
|
||||||
|
|
||||||
/* Deprecation warnings */
|
/* Deprecation warnings */
|
||||||
/* It is generally possible to disable deprecation warnings from compiler,
|
/* It is generally possible to disable deprecation warnings from compiler,
|
||||||
|
@ -673,6 +673,15 @@ static int basicUnitTests(U32 seed, double compressibility)
|
|||||||
goto _output_error;
|
goto _output_error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DISPLAYLEVEL(4, "test%3i : dictBuilder on cyclic data : ", testNb++);
|
||||||
|
assert(compressedBufferSize >= totalSampleSize);
|
||||||
|
{ U32 u; for (u=0; u<totalSampleSize; u++) ((BYTE*)compressedBuffer)[u] = (BYTE)u; }
|
||||||
|
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
|
||||||
|
dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize,
|
||||||
|
compressedBuffer, samplesSizes, nbSamples);
|
||||||
|
if (ZDICT_isError(dictSize)) goto _output_error;
|
||||||
|
DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)dictSize);
|
||||||
|
|
||||||
DISPLAYLEVEL(4, "test%3i : dictBuilder : ", testNb++);
|
DISPLAYLEVEL(4, "test%3i : dictBuilder : ", testNb++);
|
||||||
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
|
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
|
||||||
dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize,
|
dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user