From cbc71e40f6dda6b42ddcbbd80a3035ec88491a53 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 15 Mar 2018 17:22:40 -0700 Subject: [PATCH 1/5] moving LRM parameters out of experimental section into "normal" range, start pinned at 160. --- lib/zstd.h | 67 +++++++++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/lib/zstd.h b/lib/zstd.h index 3ea049eb..4b1b92c1 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -990,6 +990,40 @@ typedef enum { * resulting in stronger and slower compression. * Special: value 0 means "do not change strategy". */ + ZSTD_p_enableLongDistanceMatching=160, /* Enable long distance matching. + * This parameter is designed to improve the compression + * ratio for large inputs with long distance matches. + * This increases the memory usage as well as window size. + * Note: setting this parameter sets all the LDM parameters + * as well as ZSTD_p_windowLog. It should be set after + * ZSTD_p_compressionLevel and before ZSTD_p_windowLog and + * other LDM parameters. Setting the compression level + * after this parameter overrides the window log, though LDM + * will remain enabled until explicitly disabled. */ + ZSTD_p_ldmHashLog, /* Size of the table for long distance matching, as a power of 2. + * Larger values increase memory usage and compression ratio, but decrease + * compression speed. + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX + * (default: windowlog - 7). + * Special: value 0 means "do not change ldmHashLog". */ + ZSTD_p_ldmMinMatch, /* Minimum size of searched matches for long distance matcher. + * Larger/too small values usually decrease compression ratio. + * Must be clamped between ZSTD_LDM_MINMATCH_MIN + * and ZSTD_LDM_MINMATCH_MAX (default: 64). + * Special: value 0 means "do not change ldmMinMatch". */ + ZSTD_p_ldmBucketSizeLog, /* Log size of each bucket in the LDM hash table for collision resolution. + * Larger values usually improve collision resolution but may decrease + * compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX (default: 3). + * note : 0 is a valid value */ + ZSTD_p_ldmHashEveryLog, /* Frequency of inserting/looking up entries in the LDM hash table. + * The default is MAX(0, (windowLog - ldmHashLog)) to + * optimize hash table usage. + * Larger values improve compression speed. Deviating far from the + * default value will likely result in a decrease in compression ratio. + * Must be clamped between 0 and ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN. + * note : 0 is a valid value */ + /* frame parameters */ ZSTD_p_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) * Content size must be known at the beginning of compression, @@ -1026,39 +1060,6 @@ typedef enum { ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize, * even when referencing into Dictionary content (default:0) */ - ZSTD_p_enableLongDistanceMatching=1200, /* Enable long distance matching. - * This parameter is designed to improve the compression - * ratio for large inputs with long distance matches. - * This increases the memory usage as well as window size. - * Note: setting this parameter sets all the LDM parameters - * as well as ZSTD_p_windowLog. It should be set after - * ZSTD_p_compressionLevel and before ZSTD_p_windowLog and - * other LDM parameters. Setting the compression level - * after this parameter overrides the window log, though LDM - * will remain enabled until explicitly disabled. */ - ZSTD_p_ldmHashLog, /* Size of the table for long distance matching, as a power of 2. - * Larger values increase memory usage and compression ratio, but decrease - * compression speed. - * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX - * (default: windowlog - 7). - * Special: value 0 means "do not change ldmHashLog". */ - ZSTD_p_ldmMinMatch, /* Minimum size of searched matches for long distance matcher. - * Larger/too small values usually decrease compression ratio. - * Must be clamped between ZSTD_LDM_MINMATCH_MIN - * and ZSTD_LDM_MINMATCH_MAX (default: 64). - * Special: value 0 means "do not change ldmMinMatch". */ - ZSTD_p_ldmBucketSizeLog, /* Log size of each bucket in the LDM hash table for collision resolution. - * Larger values usually improve collision resolution but may decrease - * compression speed. - * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX (default: 3). - * note : 0 is a valid value */ - ZSTD_p_ldmHashEveryLog, /* Frequency of inserting/looking up entries in the LDM hash table. - * The default is MAX(0, (windowLog - ldmHashLog)) to - * optimize hash table usage. - * Larger values improve compression speed. Deviating far from the - * default value will likely result in a decrease in compression ratio. - * Must be clamped between 0 and ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN. - * note : 0 is a valid value */ } ZSTD_cParameter; From 9618c0c8045e95e8c30b3bf2cfc5b9dd0fc61cad Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 19 Mar 2018 11:07:04 -0700 Subject: [PATCH 2/5] make it possible to specify LDM parameters in any order --- lib/compress/zstd_compress.c | 12 ++++-------- lib/compress/zstd_ldm.c | 12 +++++++----- lib/compress/zstd_ldm.h | 12 ++++++------ lib/zstd.h | 37 ++++++++++++++++-------------------- 4 files changed, 33 insertions(+), 40 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e70c2a54..42bcc4c0 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -478,17 +478,15 @@ size_t ZSTD_CCtxParam_setParameter( return ZSTD_ldm_initializeParameters(&CCtxParams->ldmParams, value); case ZSTD_p_ldmHashLog : - if (value) { /* 0 : does not change current ldmHashLog */ + if (value>0) /* 0 ==> auto */ CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); - CCtxParams->ldmParams.hashLog = value; - } + CCtxParams->ldmParams.hashLog = value; return CCtxParams->ldmParams.hashLog; case ZSTD_p_ldmMinMatch : - if (value) { /* 0 : does not change current ldmMinMatch */ + if (value>0) /* 0 ==> default */ CLAMPCHECK(value, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX); - CCtxParams->ldmParams.minMatchLength = value; - } + CCtxParams->ldmParams.minMatchLength = value; return CCtxParams->ldmParams.minMatchLength; case ZSTD_p_ldmBucketSizeLog : @@ -988,8 +986,6 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zbuff, pledgedSrcSize)) { DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%u)", zc->appliedParams.cParams.windowLog, (U32)zc->blockSize); - assert(!(params.ldmParams.enableLdm && - params.ldmParams.hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET)); return ZSTD_continueCCtx(zc, params, pledgedSrcSize); } } DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index fa395ed7..28126071 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -21,10 +21,8 @@ size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm) { ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); params->enableLdm = enableLdm>0; - params->hashLog = 0; - params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; - params->minMatchLength = LDM_MIN_MATCH_LENGTH; - params->hashEveryLog = ZSTD_LDM_HASHEVERYLOG_NOTSET; + if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; + if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; return 0; } @@ -32,6 +30,9 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params, ZSTD_compressionParameters const* cParams) { U32 const windowLog = cParams->windowLog; + DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); + if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; + if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; if (cParams->strategy >= ZSTD_btopt) { /* Get out of the way of the optimal parser */ U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength); @@ -43,7 +44,7 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params, params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG); assert(params->hashLog <= ZSTD_HASHLOG_MAX); } - if (params->hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET) { + if (params->hashEveryLog == 0) { params->hashEveryLog = windowLog < params->hashLog ? 0 : windowLog - params->hashLog; } @@ -183,6 +184,7 @@ static U64 ZSTD_ldm_ipow(U64 base, U64 exp) } U64 ZSTD_ldm_getHashPower(U32 minMatchLength) { + DEBUGLOG(4, "ZSTD_ldm_getHashPower: mml=%u", minMatchLength); assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN); return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1); } diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h index 9d2f7c39..d719a3c0 100644 --- a/lib/compress/zstd_ldm.h +++ b/lib/compress/zstd_ldm.h @@ -22,7 +22,6 @@ extern "C" { ***************************************/ #define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_DEFAULTMAX -#define ZSTD_LDM_HASHEVERYLOG_NOTSET 9999 /** * ZSTD_ldm_generateSequences(): @@ -39,8 +38,8 @@ extern "C" { * sequences. */ size_t ZSTD_ldm_generateSequences( - ldmState_t* ldms, rawSeqStore_t* sequences, - ldmParams_t const* params, void const* src, size_t srcSize); + ldmState_t* ldms, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize); /** * ZSTD_ldm_blockCompress(): @@ -61,9 +60,10 @@ size_t ZSTD_ldm_generateSequences( * NOTE: This function does not return any errors. */ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, - int const extDict); + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, + void const* src, size_t srcSize, + int const extDict); /** ZSTD_ldm_initializeParameters() : diff --git a/lib/zstd.h b/lib/zstd.h index 4b1b92c1..367eab90 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -991,38 +991,33 @@ typedef enum { * Special: value 0 means "do not change strategy". */ ZSTD_p_enableLongDistanceMatching=160, /* Enable long distance matching. - * This parameter is designed to improve the compression - * ratio for large inputs with long distance matches. - * This increases the memory usage as well as window size. - * Note: setting this parameter sets all the LDM parameters - * as well as ZSTD_p_windowLog. It should be set after - * ZSTD_p_compressionLevel and before ZSTD_p_windowLog and - * other LDM parameters. Setting the compression level - * after this parameter overrides the window log, though LDM - * will remain enabled until explicitly disabled. */ + * This parameter is designed to improve compression ratio + * for large inputs, thanks to long distance matches. + * It increases memory usage and window size. + * Note: setting this parameter sets ZSTD_p_windowLog. + * Setting compression level after LDM overrides the window log, + * though LDM will remain enabled until explicitly disabled. */ ZSTD_p_ldmHashLog, /* Size of the table for long distance matching, as a power of 2. * Larger values increase memory usage and compression ratio, but decrease * compression speed. * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX * (default: windowlog - 7). - * Special: value 0 means "do not change ldmHashLog". */ + * Special: value 0 means "automatically determine hashlog". */ ZSTD_p_ldmMinMatch, /* Minimum size of searched matches for long distance matcher. * Larger/too small values usually decrease compression ratio. - * Must be clamped between ZSTD_LDM_MINMATCH_MIN - * and ZSTD_LDM_MINMATCH_MAX (default: 64). - * Special: value 0 means "do not change ldmMinMatch". */ + * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. + * Special: value 0 means "use default value" (default: 64). */ ZSTD_p_ldmBucketSizeLog, /* Log size of each bucket in the LDM hash table for collision resolution. - * Larger values usually improve collision resolution but may decrease - * compression speed. - * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX (default: 3). - * note : 0 is a valid value */ + * Larger values improve collision resolution but decrease compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX . + * Special: value 0 means "use default value" (default: 3). */ ZSTD_p_ldmHashEveryLog, /* Frequency of inserting/looking up entries in the LDM hash table. * The default is MAX(0, (windowLog - ldmHashLog)) to * optimize hash table usage. - * Larger values improve compression speed. Deviating far from the - * default value will likely result in a decrease in compression ratio. - * Must be clamped between 0 and ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN. - * note : 0 is a valid value */ + * Larger values improve compression speed. + * Deviating far from default value will likely result in a compression ratio decrease. + * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). + * Special: value 0 means "automatically determine hashEveryLog". */ /* frame parameters */ ZSTD_p_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) From 6f4d0778a5c23a0174078d054dee766c38f2d821 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 19 Mar 2018 14:41:23 -0700 Subject: [PATCH 3/5] make it possible to express compression parameters in any order --- lib/compress/zstd_compress.c | 156 ++++++++++---------------- lib/compress/zstd_compress_internal.h | 10 +- lib/compress/zstd_ldm.c | 10 +- lib/compress/zstd_ldm.h | 4 - lib/compress/zstdmt_compress.c | 14 +-- lib/compress/zstdmt_compress.h | 5 +- lib/zstd.h | 61 +++++----- 7 files changed, 110 insertions(+), 150 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 42bcc4c0..7b45a971 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -143,28 +143,19 @@ size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) /* private API call, for dictBuilder only */ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } -static ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( - ZSTD_CCtx_params CCtxParams, U64 srcSizeHint, size_t dictSize) +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) { - DEBUGLOG(5, "ZSTD_getCParamsFromCCtxParams: srcSize = %u, dictSize = %u", - (U32)srcSizeHint, (U32)dictSize); - return (CCtxParams.compressionLevel == ZSTD_CLEVEL_CUSTOM) ? - CCtxParams.cParams : - ZSTD_getCParams(CCtxParams.compressionLevel, srcSizeHint, dictSize); -} - -static void ZSTD_cLevelToCCtxParams_srcSize(ZSTD_CCtx_params* CCtxParams, U64 srcSize) -{ - DEBUGLOG(4, "ZSTD_cLevelToCCtxParams_srcSize: srcSize = %u", - (U32)srcSize); - CCtxParams->cParams = ZSTD_getCParamsFromCCtxParams(*CCtxParams, srcSize, 0); - CCtxParams->compressionLevel = ZSTD_CLEVEL_CUSTOM; -} - -static void ZSTD_cLevelToCCtxParams(ZSTD_CCtx_params* CCtxParams) -{ - DEBUGLOG(4, "ZSTD_cLevelToCCtxParams"); - ZSTD_cLevelToCCtxParams_srcSize(CCtxParams, ZSTD_CONTENTSIZE_UNKNOWN); + ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); + if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; + if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; + if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog; + if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog; + if (CCtxParams->cParams.searchLength) cParams.searchLength = CCtxParams->cParams.searchLength; + if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength; + if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy; + return cParams; } static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( @@ -173,7 +164,9 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( ZSTD_CCtx_params cctxParams; memset(&cctxParams, 0, sizeof(cctxParams)); cctxParams.cParams = cParams; - cctxParams.compressionLevel = ZSTD_CLEVEL_CUSTOM; + cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + assert(!ZSTD_checkCParams(cParams)); + cctxParams.fParams.contentSizeFlag = 1; return cctxParams; } @@ -187,6 +180,7 @@ static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( if (!params) { return NULL; } params->customMem = customMem; params->compressionLevel = ZSTD_CLEVEL_DEFAULT; + params->fParams.contentSizeFlag = 1; return params; } @@ -211,6 +205,7 @@ size_t ZSTD_initCCtxParams(ZSTD_CCtx_params* cctxParams, int compressionLevel) { if (!cctxParams) { return ERROR(GENERIC); } memset(cctxParams, 0, sizeof(*cctxParams)); cctxParams->compressionLevel = compressionLevel; + cctxParams->fParams.contentSizeFlag = 1; return 0; } @@ -221,17 +216,21 @@ size_t ZSTD_initCCtxParams_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameter memset(cctxParams, 0, sizeof(*cctxParams)); cctxParams->cParams = params.cParams; cctxParams->fParams = params.fParams; - cctxParams->compressionLevel = ZSTD_CLEVEL_CUSTOM; + cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + assert(!ZSTD_checkCParams(params.cParams)); return 0; } +/* ZSTD_assignParamsToCCtxParams() : + * params is presumed valid at this stage */ static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( ZSTD_CCtx_params cctxParams, ZSTD_parameters params) { ZSTD_CCtx_params ret = cctxParams; ret.cParams = params.cParams; ret.fParams = params.fParams; - ret.compressionLevel = ZSTD_CLEVEL_CUSTOM; + ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ + assert(!ZSTD_checkCParams(params.cParams)); return ret; } @@ -301,9 +300,6 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v case ZSTD_p_targetLength: case ZSTD_p_compressionStrategy: if (cctx->cdict) return ERROR(stage_wrong); - if (value>0) { - ZSTD_cLevelToCCtxParams_srcSize(&cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1); /* Optimize cParams when srcSize is known */ - } return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); case ZSTD_p_compressLiterals: @@ -328,11 +324,6 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); case ZSTD_p_enableLongDistanceMatching: - if (cctx->cdict) return ERROR(stage_wrong); - if (value>0) - ZSTD_cLevelToCCtxParams_srcSize(&cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1); /* Optimize cParams when srcSize is known */ - return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); - case ZSTD_p_ldmHashLog: case ZSTD_p_ldmMinMatch: case ZSTD_p_ldmBucketSizeLog: @@ -368,65 +359,50 @@ size_t ZSTD_CCtxParam_setParameter( } case ZSTD_p_windowLog : - DEBUGLOG(4, "ZSTD_CCtxParam_setParameter: set windowLog=%u", value); - if (value) { /* 0 : does not change current windowLog */ + if (value) /* 0 => use default */ CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); - ZSTD_cLevelToCCtxParams(CCtxParams); - CCtxParams->cParams.windowLog = value; - } + CCtxParams->cParams.windowLog = value; return CCtxParams->cParams.windowLog; case ZSTD_p_hashLog : - if (value) { /* 0 : does not change current hashLog */ + if (value) /* 0 => use default */ CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); - ZSTD_cLevelToCCtxParams(CCtxParams); - CCtxParams->cParams.hashLog = value; - } + CCtxParams->cParams.hashLog = value; return CCtxParams->cParams.hashLog; case ZSTD_p_chainLog : - if (value) { /* 0 : does not change current chainLog */ + if (value) /* 0 => use default */ CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); - ZSTD_cLevelToCCtxParams(CCtxParams); - CCtxParams->cParams.chainLog = value; - } + CCtxParams->cParams.chainLog = value; return CCtxParams->cParams.chainLog; case ZSTD_p_searchLog : - if (value) { /* 0 : does not change current searchLog */ + if (value) /* 0 => use default */ CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); - ZSTD_cLevelToCCtxParams(CCtxParams); - CCtxParams->cParams.searchLog = value; - } + CCtxParams->cParams.searchLog = value; return value; case ZSTD_p_minMatch : - if (value) { /* 0 : does not change current minMatch length */ + if (value) /* 0 => use default */ CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); - ZSTD_cLevelToCCtxParams(CCtxParams); - CCtxParams->cParams.searchLength = value; - } + CCtxParams->cParams.searchLength = value; return CCtxParams->cParams.searchLength; case ZSTD_p_targetLength : - if (value) { /* 0 : does not change current sufficient_len */ + if (value) /* 0 => use default */ CLAMPCHECK(value, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); - ZSTD_cLevelToCCtxParams(CCtxParams); - CCtxParams->cParams.targetLength = value; - } + CCtxParams->cParams.targetLength = value; return CCtxParams->cParams.targetLength; case ZSTD_p_compressionStrategy : - if (value) { /* 0 : does not change currentstrategy */ + if (value) /* 0 => use default */ CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra); - ZSTD_cLevelToCCtxParams(CCtxParams); - CCtxParams->cParams.strategy = (ZSTD_strategy)value; - } + CCtxParams->cParams.strategy = (ZSTD_strategy)value; return (size_t)CCtxParams->cParams.strategy; case ZSTD_p_compressLiterals: CCtxParams->disableLiteralCompression = !value; - return !!value; + return !CCtxParams->disableLiteralCompression; case ZSTD_p_contentSizeFlag : /* Content size written in frame header _when known_ (default:1) */ @@ -441,7 +417,7 @@ size_t ZSTD_CCtxParam_setParameter( case ZSTD_p_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ DEBUGLOG(4, "set dictIDFlag = %u", (value>0)); - CCtxParams->fParams.noDictIDFlag = (value == 0); + CCtxParams->fParams.noDictIDFlag = !value; return !CCtxParams->fParams.noDictIDFlag; case ZSTD_p_forceMaxWindow : @@ -471,11 +447,8 @@ size_t ZSTD_CCtxParam_setParameter( #endif case ZSTD_p_enableLongDistanceMatching : - if (value) { - ZSTD_cLevelToCCtxParams(CCtxParams); - CCtxParams->cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; - } - return ZSTD_ldm_initializeParameters(&CCtxParams->ldmParams, value); + CCtxParams->ldmParams.enableLdm = (value>0); + return CCtxParams->ldmParams.enableLdm; case ZSTD_p_ldmHashLog : if (value>0) /* 0 ==> auto */ @@ -490,18 +463,16 @@ size_t ZSTD_CCtxParam_setParameter( return CCtxParams->ldmParams.minMatchLength; case ZSTD_p_ldmBucketSizeLog : - if (value > ZSTD_LDM_BUCKETSIZELOG_MAX) { + if (value > ZSTD_LDM_BUCKETSIZELOG_MAX) return ERROR(parameter_outOfBound); - } CCtxParams->ldmParams.bucketSizeLog = value; - return value; + return CCtxParams->ldmParams.bucketSizeLog; case ZSTD_p_ldmHashEveryLog : - if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) { + if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) return ERROR(parameter_outOfBound); - } CCtxParams->ldmParams.hashEveryLog = value; - return value; + return CCtxParams->ldmParams.hashEveryLog; default: return ERROR(parameter_unsupported); } @@ -545,7 +516,7 @@ size_t ZSTD_CCtx_loadDictionary_advanced( cctx->cdict = NULL; } else { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, dictSize); + ZSTD_getCParamsFromCCtxParams(&cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, dictSize); cctx->cdictLocal = ZSTD_createCDict_advanced( dict, dictSize, dictLoadMethod, dictMode, @@ -718,14 +689,14 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) /* Estimate CCtx size is supported for single-threaded compression only. */ if (params->nbWorkers > 0) { return ERROR(GENERIC); } { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(*params, 0, 0); + ZSTD_getCParamsFromCCtxParams(params, 0, 0); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); U32 const divider = (cParams.searchLength==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; size_t const tokenSpace = blockSize + 11*maxNbSeq; size_t const entropySpace = HUF_WORKSPACE_SIZE; size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t); - size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms->cParams, /* forCCtx */ 1); + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1); size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams); size_t const ldmSeqSpace = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq); @@ -2093,7 +2064,7 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(cctx->appliedParams, 0, 0); + ZSTD_getCParamsFromCCtxParams(&(cctx->appliedParams), 0, 0); return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); } @@ -2536,6 +2507,7 @@ static size_t ZSTD_initCDict_internal( ZSTD_compressionParameters cParams) { DEBUGLOG(3, "ZSTD_initCDict_internal, mode %u", (U32)dictMode); + assert(!ZSTD_checkCParams(cParams)); cdict->cParams = cParams; if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { cdict->dictBuffer = NULL; @@ -2551,8 +2523,7 @@ static size_t ZSTD_initCDict_internal( /* Reset the state to no dictionary */ ZSTD_reset_compressedBlockState(&cdict->cBlockState); - { - void* const end = ZSTD_reset_matchState( + { void* const end = ZSTD_reset_matchState( &cdict->matchState, (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, &cParams, ZSTDcrp_continue, /* forCCtx */ 0); @@ -2562,15 +2533,13 @@ static size_t ZSTD_initCDict_internal( /* (Maybe) load the dictionary * Skips loading the dictionary if it is <= 8 bytes. */ - { - ZSTD_CCtx_params params; + { ZSTD_CCtx_params params; memset(¶ms, 0, sizeof(params)); params.compressionLevel = ZSTD_CLEVEL_DEFAULT; params.fParams.contentSizeFlag = 1; params.cParams = cParams; - { - size_t const dictID = ZSTD_compress_insertDictionary( - &cdict->cBlockState, &cdict->matchState,¶ms, + { size_t const dictID = ZSTD_compress_insertDictionary( + &cdict->cBlockState, &cdict->matchState, ¶ms, cdict->dictContent, cdict->dictContentSize, dictMode, cdict->workspace); if (ZSTD_isError(dictID)) return dictID; @@ -2830,7 +2799,7 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (U32)pledgedSrcSize); if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; params.fParams.contentSizeFlag = 1; - params.cParams = ZSTD_getCParamsFromCCtxParams(params, pledgedSrcSize, 0); + params.cParams = ZSTD_getCParamsFromCCtxParams(¶ms, pledgedSrcSize, 0); return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, zcs->cdict, params, pledgedSrcSize); } @@ -2867,9 +2836,6 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, zcs->cdict = cdict; } - params.compressionLevel = ZSTD_CLEVEL_CUSTOM; /* enforce usage of cParams, instead of a dynamic derivation from cLevel (but does that happen ?) */ - zcs->requestedParams = params; - return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, zcs->cdict, params, pledgedSrcSize); } @@ -2899,20 +2865,22 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); /* note : will check that cdict != NULL */ } + /* ZSTD_initCStream_advanced() : - * pledgedSrcSize must be correct. + * pledgedSrcSize must be exact. * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. * dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize) { - ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); DEBUGLOG(4, "ZSTD_initCStream_advanced: pledgedSrcSize=%u, flag=%u", (U32)pledgedSrcSize, params.fParams.contentSizeFlag); CHECK_F( ZSTD_checkCParams(params.cParams) ); if ((pledgedSrcSize==0) && (params.fParams.contentSizeFlag==0)) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* for compatibility with older programs relying on this behavior. Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. This line will be removed in the future. */ - return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, cctxParams, pledgedSrcSize); + { ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); + return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, cctxParams, pledgedSrcSize); + } } size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) @@ -3124,7 +3092,7 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, DEBUGLOG(4, "ZSTD_compress_generic : transparent init stage"); if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */ params.cParams = ZSTD_getCParamsFromCCtxParams( - cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); + &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); #ifdef ZSTD_MULTITHREAD if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { @@ -3164,7 +3132,7 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, #ifdef ZSTD_MULTITHREAD if (cctx->appliedParams.nbWorkers > 0) { if (cctx->cParamsChanged) { - ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, cctx->requestedParams.compressionLevel, cctx->requestedParams.cParams); + ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); cctx->cParamsChanged = 0; } { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 0d8bbf29..b726df86 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -32,7 +32,6 @@ extern "C" { ***************************************/ #define kSearchStrength 8 #define HASH_READ_SIZE 8 -#define ZSTD_CLEVEL_CUSTOM 999 #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted". It could be confused for a real successor at index "1", if sorted as larger than its predecessor. It's not a big deal though : candidate will just be sorted again. @@ -183,7 +182,7 @@ struct ZSTD_CCtx_params_s { /* Long distance matching parameters */ ldmParams_t ldmParams; - /* For use with createCCtxParams() and freeCCtxParams() only */ + /* Internal use, for createCCtxParams() and freeCCtxParams() only */ ZSTD_customMem customMem; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ @@ -629,6 +628,13 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, * These prototypes shall only be called from within lib/compress * ============================================================== */ +/* ZSTD_getCParamsFromCCtxParams() : + * cParams are built depending on compressionLevel, src size hints, + * LDM and manually set compression parameters. + */ +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize); + /*! ZSTD_initCStream_internal() : * Private use only. Init streaming operation. * expects params to be valid. diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index 28126071..4d18e7dc 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -17,19 +17,11 @@ #define LDM_HASH_RLOG 7 #define LDM_HASH_CHAR_OFFSET 10 -size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm) -{ - ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); - params->enableLdm = enableLdm>0; - if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; - if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; - return 0; -} - void ZSTD_ldm_adjustParameters(ldmParams_t* params, ZSTD_compressionParameters const* cParams) { U32 const windowLog = cParams->windowLog; + ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h index d719a3c0..88ee6028 100644 --- a/lib/compress/zstd_ldm.h +++ b/lib/compress/zstd_ldm.h @@ -66,10 +66,6 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, int const extDict); -/** ZSTD_ldm_initializeParameters() : - * Initialize the long distance matching parameters to their default values. */ -size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm); - /** ZSTD_ldm_getTableSize() : * Estimate the space needed for long distance matching tables or 0 if LDM is * disabled. diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index a5b6e014..666193b5 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -667,19 +667,19 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params) } /*! ZSTDMT_updateCParams_whileCompressing() : - * Update compression level and parameters (except wlog) - * while compression is ongoing. + * Updates only a selected set of compression parameters, to remain compatible with current frame. * New parameters will be applied to next compression job. */ -void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, int compressionLevel, ZSTD_compressionParameters cParams) +void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams) { U32 const saved_wlog = mtctx->params.cParams.windowLog; /* Do not modify windowLog while compressing */ + int const compressionLevel = cctxParams->compressionLevel; DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)", compressionLevel); mtctx->params.compressionLevel = compressionLevel; - if (compressionLevel != ZSTD_CLEVEL_CUSTOM) - cParams = ZSTD_getCParams(compressionLevel, mtctx->frameContentSize, 0 /* dictSize */ ); - cParams.windowLog = saved_wlog; - mtctx->params.cParams = cParams; + { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, 0, 0); + cParams.windowLog = saved_wlog; + mtctx->params.cParams = cParams; + } } /* ZSTDMT_getNbWorkers(): diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index 4364f100..87800ba1 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -122,10 +122,9 @@ size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_param size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers); /*! ZSTDMT_updateCParams_whileCompressing() : - * Update compression level and parameters (except wlog) - * while compression is ongoing. + * Updates only a selected set of compression parameters, to remain compatible with current frame. * New parameters will be applied to next compression job. */ -void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, int compressionLevel, ZSTD_compressionParameters cParams); +void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams); /* ZSTDMT_getNbWorkers(): * @return nb threads currently active in mtctx. diff --git a/lib/zstd.h b/lib/zstd.h index 367eab90..e817c528 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -905,9 +905,8 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); * and then applied on all subsequent compression jobs. * When no parameter is ever provided, CCtx is created with compression level ZSTD_CLEVEL_DEFAULT. * - * This API is intended to replace all others experimental API. - * It can basically do all other use cases, and even new ones. - * It stands a reasonable chance to become "stable", after a good testing period. + * This API is intended to replace all others advanced / experimental API entry points. + * But it stands a reasonable chance to become "stable", after a reasonable testing period. */ /* note on naming convention : @@ -924,12 +923,12 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); * All enum will be pinned to explicit values before reaching "stable API" status */ typedef enum { - /* Question : should we have a format ZSTD_f_auto ? - * For the time being, it would mean exactly the same as ZSTD_f_zstd1. - * But, in the future, should several formats be supported, + /* Opened question : should we have a format ZSTD_f_auto ? + * Today, it would mean exactly the same as ZSTD_f_zstd1. + * But, in the future, should several formats become supported, * on the compression side, it would mean "default format". - * On the decompression side, it would mean "multi format", - * and ZSTD_f_zstd1 could be reserved to mean "accept *only* zstd frames". + * On the decompression side, it would mean "automatic format detection", + * so that ZSTD_f_zstd1 would mean "accept *only* zstd frames". * Since meaning is a little different, another option could be to define different enums for compression and decompression. * This question could be kept for later, when there are actually multiple formats to support, * but there is also the question of pinning enum values, and pinning value `0` is especially important */ @@ -949,33 +948,34 @@ typedef enum { * Default level is ZSTD_CLEVEL_DEFAULT==3. * Special: value 0 means "do not change cLevel". * Note 1 : it's possible to pass a negative compression level by casting it to unsigned type. - * Note 2 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */ + * Note 2 : setting a level sets all default values of other compression parameters. + * Note 3 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */ ZSTD_p_windowLog, /* Maximum allowed back-reference distance, expressed as power of 2. * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. - * Special: value 0 means "do not change windowLog". + * Special: value 0 means "use default windowLog". * Note: Using a window size greater than ZSTD_MAXWINDOWSIZE_DEFAULT (default: 2^27) - * requires setting the maximum window size at least as large during decompression. */ + * requires explicitly allowing such window size during decompression stage. */ ZSTD_p_hashLog, /* Size of the probe table, as a power of 2. * Resulting table size is (1 << (hashLog+2)). * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. * Larger tables improve compression ratio of strategies <= dFast, * and improve speed of strategies > dFast. - * Special: value 0 means "do not change hashLog". */ + * Special: value 0 means "use default hashLog". */ ZSTD_p_chainLog, /* Size of the full-search table, as a power of 2. * Resulting table size is (1 << (chainLog+2)). * Larger tables result in better and slower compression. * This parameter is useless when using "fast" strategy. - * Special: value 0 means "do not change chainLog". */ + * Special: value 0 means "use default chainLog". */ ZSTD_p_searchLog, /* Number of search attempts, as a power of 2. * More attempts result in better and slower compression. * This parameter is useless when using "fast" and "dFast" strategies. - * Special: value 0 means "do not change searchLog". */ + * Special: value 0 means "use default searchLog". */ ZSTD_p_minMatch, /* Minimum size of searched matches (note : repCode matches can be smaller). * Larger values make faster compression and decompression, but decrease ratio. * Must be clamped between ZSTD_SEARCHLENGTH_MIN and ZSTD_SEARCHLENGTH_MAX. * Note that currently, for all strategies < btopt, effective minimum is 4. - * Note that currently, for all strategies > fast, effective maximum is 6. - * Special: value 0 means "do not change minMatchLength". */ + * , for all strategies > fast, effective maximum is 6. + * Special: value 0 means "use default minMatchLength". */ ZSTD_p_targetLength, /* Impact of this field depends on strategy. * For strategies btopt & btultra: * Length of Match considered "good enough" to stop search. @@ -983,27 +983,26 @@ typedef enum { * For strategy fast: * Distance between match sampling. * Larger values make compression faster, and weaker. - * Special: value 0 means "do not change targetLength". */ + * Special: value 0 means "use default targetLength". */ ZSTD_p_compressionStrategy, /* See ZSTD_strategy enum definition. * Cast selected strategy as unsigned for ZSTD_CCtx_setParameter() compatibility. * The higher the value of selected strategy, the more complex it is, * resulting in stronger and slower compression. - * Special: value 0 means "do not change strategy". */ + * Special: value 0 means "use default strategy". */ ZSTD_p_enableLongDistanceMatching=160, /* Enable long distance matching. * This parameter is designed to improve compression ratio - * for large inputs, thanks to long distance matches. + * for large inputs, by finding large matches at long distance. * It increases memory usage and window size. - * Note: setting this parameter sets ZSTD_p_windowLog. - * Setting compression level after LDM overrides the window log, - * though LDM will remain enabled until explicitly disabled. */ + * Note: enabling this parameter increases ZSTD_p_windowLog to 128 MB + * except when expressly set to a different value. */ ZSTD_p_ldmHashLog, /* Size of the table for long distance matching, as a power of 2. - * Larger values increase memory usage and compression ratio, but decrease - * compression speed. + * Larger values increase memory usage and compression ratio, + * but decrease compression speed. * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX - * (default: windowlog - 7). + * default: windowlog - 7. * Special: value 0 means "automatically determine hashlog". */ - ZSTD_p_ldmMinMatch, /* Minimum size of searched matches for long distance matcher. + ZSTD_p_ldmMinMatch, /* Minimum match size for long distance matcher. * Larger/too small values usually decrease compression ratio. * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. * Special: value 0 means "use default value" (default: 64). */ @@ -1012,11 +1011,10 @@ typedef enum { * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX . * Special: value 0 means "use default value" (default: 3). */ ZSTD_p_ldmHashEveryLog, /* Frequency of inserting/looking up entries in the LDM hash table. - * The default is MAX(0, (windowLog - ldmHashLog)) to - * optimize hash table usage. + * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). + * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. * Larger values improve compression speed. * Deviating far from default value will likely result in a compression ratio decrease. - * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). * Special: value 0 means "automatically determine hashEveryLog". */ /* frame parameters */ @@ -1044,7 +1042,9 @@ typedef enum { ZSTD_p_overlapSizeLog, /* Size of previous input reloaded at the beginning of each job. * 0 => no overlap, 6(default) => use 1/8th of windowSize, >=9 => use full windowSize */ - /* advanced parameters - may not remain available after API update */ + /* =================================================================== */ + /* experimental parameters - no stability guaranteed */ + /* =================================================================== */ ZSTD_p_compressLiterals=1000, /* control huffman compression of literals (enabled) by default. * disabling it improves speed and decreases compression ratio by a large amount. @@ -1055,7 +1055,6 @@ typedef enum { ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize, * even when referencing into Dictionary content (default:0) */ - } ZSTD_cParameter; From e5ddfaede829522d85869ea23e0191e097382f4e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 19 Mar 2018 16:02:51 -0700 Subject: [PATCH 4/5] add a test for unordered parameters --- tests/fuzzer.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/fuzzer.c b/tests/fuzzer.c index cae50970..cebb2655 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1024,6 +1024,45 @@ static int basicUnitTests(U32 seed, double compressibility) ZSTD_freeCCtx(cctx); } + /* parameters order test */ + { size_t const inputSize = CNBuffSize / 2; + U64 xxh64; + + { ZSTD_CCtx* cctx = ZSTD_createCCtx(); + DISPLAYLEVEL(3, "test%3i : parameters in order : ", testNb++); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_p_compressionLevel, 2) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_p_enableLongDistanceMatching, 1) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_p_windowLog, 18) ); + { ZSTD_inBuffer in = { CNBuffer, inputSize, 0 }; + ZSTD_outBuffer out = { compressedBuffer, ZSTD_compressBound(inputSize), 0 }; + size_t const result = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end); + if (result != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + cSize = out.pos; + xxh64 = XXH64(out.dst, out.pos, 0); + } + DISPLAYLEVEL(3, "OK (compress : %u -> %u bytes)\n", (U32)inputSize, (U32)cSize); + ZSTD_freeCCtx(cctx); + } + + { ZSTD_CCtx* cctx = ZSTD_createCCtx(); + DISPLAYLEVEL(3, "test%3i : parameters disordered : ", testNb++); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_p_windowLog, 18) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_p_enableLongDistanceMatching, 1) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_p_compressionLevel, 2) ); + { ZSTD_inBuffer in = { CNBuffer, inputSize, 0 }; + ZSTD_outBuffer out = { compressedBuffer, ZSTD_compressBound(inputSize), 0 }; + size_t const result = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end); + if (result != 0) goto _output_error; + if (in.pos != in.size) goto _output_error; + if (out.pos != cSize) goto _output_error; /* must result in same compressed result, hence same size */ + if (XXH64(out.dst, out.pos, 0) != xxh64) goto _output_error; /* must result in exactly same content, hence same hash */ + DISPLAYLEVEL(3, "OK (compress : %u -> %u bytes)\n", (U32)inputSize, (U32)out.pos); + } + ZSTD_freeCCtx(cctx); + } + } + /* custom formats tests */ { ZSTD_CCtx* const cctx = ZSTD_createCCtx(); size_t const inputSize = CNBuffSize / 2; /* won't cause pb with small dict size */ From 878728dc26dc6c252344cf066acf3926f09a2932 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 20 Mar 2018 16:35:14 -0700 Subject: [PATCH 5/5] fixed several comments by @terrelln --- lib/compress/zstd_compress.c | 20 ++++++++++---------- lib/compress/zstd_ldm.h | 8 ++++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 7b45a971..52c03e74 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -359,43 +359,43 @@ size_t ZSTD_CCtxParam_setParameter( } case ZSTD_p_windowLog : - if (value) /* 0 => use default */ + if (value>0) /* 0 => use default */ CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); CCtxParams->cParams.windowLog = value; return CCtxParams->cParams.windowLog; case ZSTD_p_hashLog : - if (value) /* 0 => use default */ + if (value>0) /* 0 => use default */ CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); CCtxParams->cParams.hashLog = value; return CCtxParams->cParams.hashLog; case ZSTD_p_chainLog : - if (value) /* 0 => use default */ + if (value>0) /* 0 => use default */ CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); CCtxParams->cParams.chainLog = value; return CCtxParams->cParams.chainLog; case ZSTD_p_searchLog : - if (value) /* 0 => use default */ + if (value>0) /* 0 => use default */ CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); CCtxParams->cParams.searchLog = value; return value; case ZSTD_p_minMatch : - if (value) /* 0 => use default */ + if (value>0) /* 0 => use default */ CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); CCtxParams->cParams.searchLength = value; return CCtxParams->cParams.searchLength; case ZSTD_p_targetLength : - if (value) /* 0 => use default */ + if (value>0) /* 0 => use default */ CLAMPCHECK(value, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); CCtxParams->cParams.targetLength = value; return CCtxParams->cParams.targetLength; case ZSTD_p_compressionStrategy : - if (value) /* 0 => use default */ + if (value>0) /* 0 => use default */ CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra); CCtxParams->cParams.strategy = (ZSTD_strategy)value; return (size_t)CCtxParams->cParams.strategy; @@ -426,7 +426,7 @@ size_t ZSTD_CCtxParam_setParameter( case ZSTD_p_nbWorkers : #ifndef ZSTD_MULTITHREAD - if (value > 0) return ERROR(parameter_unsupported); + if (value>0) return ERROR(parameter_unsupported); return 0; #else return ZSTDMT_CCtxParam_setNbWorkers(CCtxParams, value); @@ -2063,8 +2063,8 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) { - ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(&(cctx->appliedParams), 0, 0); + ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; + assert(!ZSTD_checkCParams(cParams)); return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); } diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h index 88ee6028..175a42cc 100644 --- a/lib/compress/zstd_ldm.h +++ b/lib/compress/zstd_ldm.h @@ -60,10 +60,10 @@ size_t ZSTD_ldm_generateSequences( * NOTE: This function does not return any errors. */ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - ZSTD_compressionParameters const* cParams, - void const* src, size_t srcSize, - int const extDict); + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_compressionParameters const* cParams, + void const* src, size_t srcSize, + int const extDict); /** ZSTD_ldm_getTableSize() :