diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index 1863c8f3..06c1b9fa 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -398,7 +398,8 @@ typedef struct { */ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs, COVER_map_t *activeDmers, U32 begin, - U32 end, COVER_params_t parameters) { + U32 end, + ZDICT_cover_params_t parameters) { /* Constants */ const U32 k = parameters.k; const U32 d = parameters.d; @@ -478,7 +479,7 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs, * Check the validity of the parameters. * Returns non-zero if the parameters are valid and 0 otherwise. */ -static int COVER_checkParameters(COVER_params_t parameters) { +static int COVER_checkParameters(ZDICT_cover_params_t parameters) { /* k and d are required parameters */ if (parameters.d == 0 || parameters.k == 0) { return 0; @@ -600,7 +601,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, COVER_map_t *activeDmers, void *dictBuffer, size_t dictBufferCapacity, - COVER_params_t parameters) { + ZDICT_cover_params_t parameters) { BYTE *const dict = (BYTE *)dictBuffer; size_t tail = dictBufferCapacity; /* Divide the data up into epochs of equal size. @@ -639,22 +640,10 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, return tail; } -/** - * Translate from COVER_params_t to ZDICT_params_t required for finalizing the - * dictionary. - */ -static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) { - ZDICT_params_t zdictParams; - memset(&zdictParams, 0, sizeof(zdictParams)); - zdictParams.notificationLevel = 1; - zdictParams.dictID = parameters.dictID; - zdictParams.compressionLevel = parameters.compressionLevel; - return zdictParams; -} - -ZDICTLIB_API size_t COVER_trainFromBuffer( +ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, - const size_t *samplesSizes, unsigned nbSamples, COVER_params_t parameters) { + const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t parameters) { BYTE *const dict = (BYTE *)dictBuffer; COVER_ctx_t ctx; COVER_map_t activeDmers; @@ -673,7 +662,7 @@ ZDICTLIB_API size_t COVER_trainFromBuffer( return ERROR(dstSize_tooSmall); } /* Initialize global data */ - g_displayLevel = parameters.notificationLevel; + g_displayLevel = parameters.zParams.notificationLevel; /* Initialize context and activeDmers */ if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, parameters.d)) { @@ -690,10 +679,9 @@ ZDICTLIB_API size_t COVER_trainFromBuffer( const size_t tail = COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer, dictBufferCapacity, parameters); - ZDICT_params_t zdictParams = COVER_translateParams(parameters); const size_t dictionarySize = ZDICT_finalizeDictionary( dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, - samplesBuffer, samplesSizes, nbSamples, zdictParams); + samplesBuffer, samplesSizes, nbSamples, parameters.zParams); if (!ZSTD_isError(dictionarySize)) { DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", (U32)dictionarySize); @@ -718,7 +706,7 @@ typedef struct COVER_best_s { size_t liveJobs; void *dict; size_t dictSize; - COVER_params_t parameters; + ZDICT_cover_params_t parameters; size_t compressedSize; } COVER_best_t; @@ -786,7 +774,7 @@ static void COVER_best_start(COVER_best_t *best) { * If this dictionary is the best so far save it and its parameters. */ static void COVER_best_finish(COVER_best_t *best, size_t compressedSize, - COVER_params_t parameters, void *dict, + ZDICT_cover_params_t parameters, void *dict, size_t dictSize) { if (!best) { return; @@ -830,7 +818,7 @@ typedef struct COVER_tryParameters_data_s { const COVER_ctx_t *ctx; COVER_best_t *best; size_t dictBufferCapacity; - COVER_params_t parameters; + ZDICT_cover_params_t parameters; } COVER_tryParameters_data_t; /** @@ -842,7 +830,7 @@ static void COVER_tryParameters(void *opaque) { /* Save parameters as local variables */ COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque; const COVER_ctx_t *const ctx = data->ctx; - const COVER_params_t parameters = data->parameters; + const ZDICT_cover_params_t parameters = data->parameters; size_t dictBufferCapacity = data->dictBufferCapacity; size_t totalCompressedSize = ERROR(GENERIC); /* Allocate space for hash table, dict, and freqs */ @@ -863,10 +851,10 @@ static void COVER_tryParameters(void *opaque) { { const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, dictBufferCapacity, parameters); - const ZDICT_params_t zdictParams = COVER_translateParams(parameters); dictBufferCapacity = ZDICT_finalizeDictionary( dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, - ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples, zdictParams); + ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples, + parameters.zParams); if (ZDICT_isError(dictBufferCapacity)) { DISPLAYLEVEL(1, "Failed to finalize dictionary\n"); goto _cleanup; @@ -892,8 +880,8 @@ static void COVER_tryParameters(void *opaque) { } /* Create the cctx and cdict */ cctx = ZSTD_createCCtx(); - cdict = - ZSTD_createCDict(dict, dictBufferCapacity, parameters.compressionLevel); + cdict = ZSTD_createCDict(dict, dictBufferCapacity, + parameters.zParams.compressionLevel); if (!dst || !cctx || !cdict) { goto _compressCleanup; } @@ -930,12 +918,10 @@ _cleanup: } } -ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer, - size_t dictBufferCapacity, - const void *samplesBuffer, - const size_t *samplesSizes, - unsigned nbSamples, - COVER_params_t *parameters) { +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( + void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t *parameters) { /* constants */ const unsigned nbThreads = parameters->nbThreads; const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; @@ -947,7 +933,7 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer, const unsigned kIterations = (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); /* Local variables */ - const int displayLevel = parameters->notificationLevel; + const int displayLevel = parameters->zParams.notificationLevel; unsigned iteration = 1; unsigned d; unsigned k; @@ -976,7 +962,7 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer, /* Initialization */ COVER_best_init(&best); /* Turn down global display level to clean up display at level 2 and below */ - g_displayLevel = parameters->notificationLevel - 1; + g_displayLevel = parameters->zParams.notificationLevel - 1; /* Loop through d first because each new value needs a new context */ LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", kIterations); diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index 943ddde0..8bc6a019 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -487,7 +487,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList) } -static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize, +static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize, const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */ const size_t* fileSizes, unsigned nbFiles, U32 minRatio, U32 notificationLevel) @@ -634,17 +634,6 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params, } } } } -/* -static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles) -{ - unsigned u; - size_t max=0; - for (u=0; u= 3) { + if (params.zParams.notificationLevel>= 3) { U32 const nb = MIN(25, dictList[0].pos); U32 const dictContentSize = ZDICT_dictSize(dictList); U32 u; @@ -1026,7 +1015,7 @@ size_t ZDICT_trainFromBuffer_unsafe( dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize, samplesBuffer, samplesSizes, nbSamples, - params); + params.zParams); } /* clean up */ @@ -1037,9 +1026,9 @@ size_t ZDICT_trainFromBuffer_unsafe( /* issue : samplesBuffer need to be followed by a noisy guard band. * work around : duplicate the buffer, and add the noise */ -size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, - ZDICT_params_t params) +size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t params) { size_t result; void* newBuff; @@ -1052,10 +1041,9 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit memcpy(newBuff, samplesBuffer, sBuffSize); ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */ - result = ZDICT_trainFromBuffer_unsafe( - dictBuffer, dictBufferCapacity, - newBuff, samplesSizes, nbSamples, - params); + result = + ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff, + samplesSizes, nbSamples, params); free(newBuff); return result; } @@ -1064,11 +1052,13 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) { - ZDICT_params_t params; + ZDICT_cover_params_t params; memset(¶ms, 0, sizeof(params)); - return ZDICT_trainFromBuffer_advanced(dictBuffer, dictBufferCapacity, - samplesBuffer, samplesSizes, nbSamples, - params); + params.d = 8; + params.steps = 4; + return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity, + samplesBuffer, samplesSizes, + nbSamples, ¶ms); } size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h index 5ef2a3f5..7bfbb351 100644 --- a/lib/dictBuilder/zdict.h +++ b/lib/dictBuilder/zdict.h @@ -36,18 +36,20 @@ extern "C" { #endif -/*! ZDICT_trainFromBuffer() : - Train a dictionary from an array of samples. - Samples must be stored concatenated in a single flat buffer `samplesBuffer`, - supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. - The resulting dictionary will be saved into `dictBuffer`. - @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) - or an error code, which can be tested with ZDICT_isError(). - Tips : In general, a reasonable dictionary has a size of ~ 100 KB. - It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`. - In general, it's recommended to provide a few thousands samples, but this can vary a lot. - It's recommended that total size of all samples be about ~x100 times the target size of dictionary. -*/ +/*! ZDICT_trainFromBuffer(): + * Train a dictionary from an array of samples. + * Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, but this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + */ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); @@ -69,94 +71,78 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); * ==================================================================================== */ typedef struct { - unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */ int compressionLevel; /* 0 means default; target a specific zstd compression level */ unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */ - unsigned reserved[2]; /* reserved space for future parameters */ } ZDICT_params_t; - -/*! ZDICT_trainFromBuffer_advanced() : - Same as ZDICT_trainFromBuffer() with control over more parameters. - `parameters` is optional and can be provided with values set to 0 to mean "default". - @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`), - or an error code, which can be tested by ZDICT_isError(). - note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0. -*/ -ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, - ZDICT_params_t parameters); - -/*! COVER_params_t : - For all values 0 means default. - k and d are the only required parameters. -*/ +/*! ZDICT_cover_params_t: + * For all values 0 means default. + * k and d are the only required parameters. + */ typedef struct { unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */ - unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ - unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ - unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */ - int compressionLevel; /* 0 means default; target a specific zstd compression level */ -} COVER_params_t; + ZDICT_params_t zParams; +} ZDICT_cover_params_t; -/*! COVER_trainFromBuffer() : - Train a dictionary from an array of samples using the COVER algorithm. - Samples must be stored concatenated in a single flat buffer `samplesBuffer`, - supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. - The resulting dictionary will be saved into `dictBuffer`. - @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) - or an error code, which can be tested with ZDICT_isError(). - Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte. - Tips : In general, a reasonable dictionary has a size of ~ 100 KB. - It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`. - In general, it's recommended to provide a few thousands samples, but this can vary a lot. - It's recommended that total size of all samples be about ~x100 times the target size of dictionary. -*/ -ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, - COVER_params_t parameters); +/*! ZDICT_trainFromBuffer_cover(): + * Train a dictionary from an array of samples using the COVER algorithm. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, but this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( + void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t parameters); -/*! COVER_optimizeTrainFromBuffer() : - The same requirements as above hold for all the parameters except `parameters`. - This function tries many parameter combinations and picks the best parameters. - `*parameters` is filled with the best parameters found, and the dictionary - constructed with those parameters is stored in `dictBuffer`. +/*! ZDICT_optimizeTrainFromBuffer_cover(): + * The same requirements as above hold for all the parameters except `parameters`. + * This function tries many parameter combinations and picks the best parameters. + * `*parameters` is filled with the best parameters found, and the dictionary + * constructed with those parameters is stored in `dictBuffer`. + * + * All of the parameters d, k, steps are optional. + * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}. + * if steps is zero it defaults to its default value. + * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048]. + * + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * On success `*parameters` contains the parameters selected. + * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread. + */ +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( + void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t *parameters); - All of the parameters d, k, steps are optional. - If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}. - if steps is zero it defaults to its default value. - If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048]. - - @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) - or an error code, which can be tested with ZDICT_isError(). - On success `*parameters` contains the parameters selected. - Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread. -*/ -ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, - const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, - COVER_params_t *parameters); - -/*! ZDICT_finalizeDictionary() : - - Given a custom content as a basis for dictionary, and a set of samples, - finalize dictionary by adding headers and statistics. - - Samples must be stored concatenated in a flat buffer `samplesBuffer`, - supplied with an array of sizes `samplesSizes`, providing the size of each sample in order. - - dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes. - maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes. - - @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`), - or an error code, which can be tested by ZDICT_isError(). - note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0. - note 2 : dictBuffer and dictContent can overlap -*/ +/*! ZDICT_finalizeDictionary(): + * Given a custom content as a basis for dictionary, and a set of samples, + * finalize dictionary by adding headers and statistics. + * + * Samples must be stored concatenated in a flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample in order. + * + * dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes. + * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes. + * + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`), + * or an error code, which can be tested by ZDICT_isError(). + * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0. + * Note 2: dictBuffer and dictContent can overlap + */ #define ZDICT_CONTENTSIZE_MIN 128 #define ZDICT_DICTSIZE_MIN 256 ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, @@ -164,7 +150,28 @@ ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBuffer const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_params_t parameters); +typedef struct { + unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */ + ZDICT_params_t zParams; +} ZDICT_legacy_params_t; +/*! ZDICT_trainFromBuffer_legacy(): + * Train a dictionary from an array of samples. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * `parameters` is optional and can be provided with values set to 0 to mean "default". + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, but this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy( + void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters); /* Deprecation warnings */ /* It is generally possible to disable deprecation warnings from compiler, diff --git a/programs/dibio.c b/programs/dibio.c index aac36425..31cde5c9 100644 --- a/programs/dibio.c +++ b/programs/dibio.c @@ -216,21 +216,21 @@ static U64 DiB_getTotalCappedFileSize(const char** fileNamesTable, unsigned nbFi } -/*! ZDICT_trainFromBuffer_unsafe() : +/*! ZDICT_trainFromBuffer_unsafe_legacy() : Strictly Internal use only !! - Same as ZDICT_trainFromBuffer_advanced(), but does not control `samplesBuffer`. + Same as ZDICT_trainFromBuffer_legacy(), but does not control `samplesBuffer`. `samplesBuffer` must be followed by noisy guard band to avoid out-of-buffer reads. @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) or an error code. */ -size_t ZDICT_trainFromBuffer_unsafe(void* dictBuffer, size_t dictBufferCapacity, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, - ZDICT_params_t parameters); +size_t ZDICT_trainFromBuffer_unsafe_legacy(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t parameters); int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize, const char** fileNamesTable, unsigned nbFiles, - ZDICT_params_t *params, COVER_params_t *coverParams, + ZDICT_legacy_params_t *params, ZDICT_cover_params_t *coverParams, int optimizeCover) { void* const dictBuffer = malloc(maxDictSize); @@ -243,8 +243,8 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize, int result = 0; /* Checks */ - if (params) g_displayLevel = params->notificationLevel; - else if (coverParams) g_displayLevel = coverParams->notificationLevel; + if (params) g_displayLevel = params->zParams.notificationLevel; + else if (coverParams) g_displayLevel = coverParams->zParams.notificationLevel; else EXM_THROW(13, "Neither dictionary algorith selected"); /* should not happen */ if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */ if (g_tooLargeSamples) { @@ -273,20 +273,20 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize, size_t dictSize; if (params) { DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH); /* guard band, for end of buffer condition */ - dictSize = ZDICT_trainFromBuffer_unsafe(dictBuffer, maxDictSize, - srcBuffer, fileSizes, nbFiles, - *params); + dictSize = ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, maxDictSize, + srcBuffer, fileSizes, nbFiles, + *params); } else if (optimizeCover) { - dictSize = COVER_optimizeTrainFromBuffer( - dictBuffer, maxDictSize, srcBuffer, fileSizes, nbFiles, - coverParams); + dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, + srcBuffer, fileSizes, nbFiles, + coverParams); if (!ZDICT_isError(dictSize)) { - DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\n", coverParams->k, coverParams->d, coverParams->steps); + DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\n", coverParams->k, coverParams->d, coverParams->steps); } } else { - dictSize = COVER_trainFromBuffer(dictBuffer, maxDictSize, - srcBuffer, fileSizes, nbFiles, - *coverParams); + dictSize = + ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer, + fileSizes, nbFiles, *coverParams); } if (ZDICT_isError(dictSize)) { DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */ diff --git a/programs/dibio.h b/programs/dibio.h index e61d0042..84f7d580 100644 --- a/programs/dibio.h +++ b/programs/dibio.h @@ -32,7 +32,7 @@ */ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize, const char** fileNamesTable, unsigned nbFiles, - ZDICT_params_t *params, COVER_params_t *coverParams, + ZDICT_legacy_params_t *params, ZDICT_cover_params_t *coverParams, int optimizeCover); #endif diff --git a/programs/zstdcli.c b/programs/zstdcli.c index f59f9785..35772e0a 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -248,7 +248,7 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) * @return 1 means that cover parameters were correct * @return 0 in case of malformed parameters */ -static unsigned parseCoverParameters(const char* stringPtr, COVER_params_t* params) +static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t* params) { memset(params, 0, sizeof(*params)); for (; ;) { @@ -277,9 +277,9 @@ static unsigned parseLegacyParameters(const char* stringPtr, unsigned* selectivi return 1; } -static COVER_params_t defaultCoverParams(void) +static ZDICT_cover_params_t defaultCoverParams(void) { - COVER_params_t params; + ZDICT_cover_params_t params; memset(¶ms, 0, sizeof(params)); params.d = 8; params.steps = 4; @@ -358,7 +358,7 @@ int main(int argCount, const char* argv[]) unsigned fileNamesNb; #endif #ifndef ZSTD_NODICT - COVER_params_t coverParams = defaultCoverParams(); + ZDICT_cover_params_t coverParams = defaultCoverParams(); int cover = 1; #endif @@ -699,20 +699,20 @@ int main(int argCount, const char* argv[]) /* Check if dictionary builder is selected */ if (operation==zom_train) { #ifndef ZSTD_NODICT + ZDICT_params_t zParams; + zParams.compressionLevel = dictCLevel; + zParams.notificationLevel = g_displayLevel; + zParams.dictID = dictID; if (cover) { int const optimize = !coverParams.k || !coverParams.d; coverParams.nbThreads = nbThreads; - coverParams.compressionLevel = dictCLevel; - coverParams.notificationLevel = g_displayLevel; - coverParams.dictID = dictID; + coverParams.zParams = zParams; operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, NULL, &coverParams, optimize); } else { - ZDICT_params_t dictParams; + ZDICT_legacy_params_t dictParams; memset(&dictParams, 0, sizeof(dictParams)); - dictParams.compressionLevel = dictCLevel; dictParams.selectivityLevel = dictSelect; - dictParams.notificationLevel = g_displayLevel; - dictParams.dictID = dictID; + dictParams.zParams = zParams; operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, &dictParams, NULL, 0); } #endif diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 0dcba3ce..b8f51478 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -638,7 +638,7 @@ static int basicUnitTests(U32 seed, double compressibility) size_t const sampleUnitSize = 8 KB; U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize); size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t)); - COVER_params_t params; + ZDICT_cover_params_t params; U32 dictID; if (dictBuffer==NULL || samplesSizes==NULL) { @@ -647,14 +647,14 @@ static int basicUnitTests(U32 seed, double compressibility) goto _output_error; } - DISPLAYLEVEL(4, "test%3i : COVER_trainFromBuffer : ", testNb++); + DISPLAYLEVEL(4, "test%3i : ZDICT_trainFromBuffer_cover : ", testNb++); { U32 u; for (u=0; u