diff --git a/Makefile b/Makefile index f2487c59..9f4b1fe4 100644 --- a/Makefile +++ b/Makefile @@ -77,7 +77,7 @@ check: shortest .PHONY: test shortest test shortest: - $(MAKE) -C $(PRGDIR) allVariants + $(MAKE) -C $(PRGDIR) allVariants MOREFLAGS="-g -DZSTD_DEBUG=1" $(MAKE) -C $(TESTDIR) $@ .PHONY: examples diff --git a/lib/common/mem.h b/lib/common/mem.h index 23335c31..2ec32873 100644 --- a/lib/common/mem.h +++ b/lib/common/mem.h @@ -123,20 +123,26 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } /* currently only defined for gcc and icc */ #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) __pragma( pack(push, 1) ) - typedef union { U16 u16; U32 u32; U64 u64; size_t st; } unalign; + typedef struct { U16 v; } unalign16; + typedef struct { U32 v; } unalign32; + typedef struct { U64 v; } unalign64; + typedef struct { size_t v; } unalignArch; __pragma( pack(pop) ) #else - typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; + typedef struct { U16 v; } __attribute__((packed)) unalign16; + typedef struct { U32 v; } __attribute__((packed)) unalign32; + typedef struct { U64 v; } __attribute__((packed)) unalign64; + typedef struct { size_t v; } __attribute__((packed)) unalignArch; #endif -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } -MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; } +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } +MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } #else diff --git a/lib/common/zstd_common.c b/lib/common/zstd_common.c index c2041053..bccc9488 100644 --- a/lib/common/zstd_common.c +++ b/lib/common/zstd_common.c @@ -31,21 +31,27 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } * ZSTD Error Management ******************************************/ /*! ZSTD_isError() : -* tells if a return value is an error code */ + * tells if a return value is an error code */ unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } /*! ZSTD_getErrorName() : -* provides error code string from function result (useful for debugging) */ + * provides error code string from function result (useful for debugging) */ const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } /*! ZSTD_getError() : -* convert a `size_t` function result into a proper ZSTD_errorCode enum */ + * convert a `size_t` function result into a proper ZSTD_errorCode enum */ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } /*! ZSTD_getErrorString() : -* provides error code string from enum */ + * provides error code string from enum */ const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } +/*! g_debuglog_enable : + * turn on/off debug traces (global switch) */ +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 2) +int g_debuglog_enable = 1; +#endif + /*=************************************************************** * Custom allocator diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 218cbc20..ce2b85b4 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -54,6 +54,7 @@ extern "C" { #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2) # include +extern int g_debuglog_enable; /* recommended values for ZSTD_DEBUG display levels : * 1 : no display, enables assert() only * 2 : reserved for currently active debugging path @@ -61,14 +62,19 @@ extern "C" { * 4 : events once per frame * 5 : events once per block * 6 : events once per sequence (*very* verbose) */ -# define DEBUGLOG(l, ...) { \ - if (l<=ZSTD_DEBUG) { \ - fprintf(stderr, __FILE__ ": "); \ - fprintf(stderr, __VA_ARGS__); \ - fprintf(stderr, " \n"); \ +# define RAWLOG(l, ...) { \ + if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \ + fprintf(stderr, __VA_ARGS__); \ + } } +# define DEBUGLOG(l, ...) { \ + if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \ + fprintf(stderr, __FILE__ ": "); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " \n"); \ } } #else -# define DEBUGLOG(l, ...) {} /* disabled */ +# define RAWLOG(l, ...) {} /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ #endif @@ -89,9 +95,7 @@ extern "C" { #define ZSTD_OPT_NUM (1<<12) #define ZSTD_REP_NUM 3 /* number of repcodes */ -#define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */ #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) -#define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM) static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; #define KB *(1 <<10) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 582e3e50..f0cebe7a 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -42,17 +42,6 @@ size_t ZSTD_compressBound(size_t srcSize) { } -/*-************************************* -* Sequence storage -***************************************/ -static void ZSTD_resetSeqStore(seqStore_t* ssPtr) -{ - ssPtr->lit = ssPtr->litStart; - ssPtr->sequences = ssPtr->sequencesStart; - ssPtr->longLengthID = 0; -} - - /*-************************************* * Context memory management ***************************************/ @@ -153,17 +142,17 @@ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStor #define ZSTD_CLEVEL_CUSTOM 999 static ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( - ZSTD_CCtx_params params, U64 srcSizeHint, size_t dictSize) + ZSTD_CCtx_params CCtxParams, U64 srcSizeHint, size_t dictSize) { - return (params.compressionLevel == ZSTD_CLEVEL_CUSTOM ? - params.cParams : - ZSTD_getCParams(params.compressionLevel, srcSizeHint, dictSize)); + return (CCtxParams.compressionLevel == ZSTD_CLEVEL_CUSTOM) ? + CCtxParams.cParams : + ZSTD_getCParams(CCtxParams.compressionLevel, srcSizeHint, dictSize); } -static void ZSTD_cLevelToCCtxParams_srcSize(ZSTD_CCtx_params* params, U64 srcSize) +static void ZSTD_cLevelToCCtxParams_srcSize(ZSTD_CCtx_params* CCtxParams, U64 srcSize) { - params->cParams = ZSTD_getCParamsFromCCtxParams(*params, srcSize, 0); - params->compressionLevel = ZSTD_CLEVEL_CUSTOM; + CCtxParams->cParams = ZSTD_getCParamsFromCCtxParams(*CCtxParams, srcSize, 0); + CCtxParams->compressionLevel = ZSTD_CLEVEL_CUSTOM; } static void ZSTD_cLevelToCParams(ZSTD_CCtx* cctx) @@ -172,9 +161,9 @@ static void ZSTD_cLevelToCParams(ZSTD_CCtx* cctx) &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1); } -static void ZSTD_cLevelToCCtxParams(ZSTD_CCtx_params* params) +static void ZSTD_cLevelToCCtxParams(ZSTD_CCtx_params* CCtxParams) { - ZSTD_cLevelToCCtxParams_srcSize(params, 0); + ZSTD_cLevelToCCtxParams_srcSize(CCtxParams, 0); } static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( @@ -260,7 +249,6 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); case ZSTD_p_compressionLevel: - if (value == 0) return 0; /* special value : 0 means "don't change anything" */ if (cctx->cdict) return ERROR(stage_wrong); return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); @@ -271,7 +259,6 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v case ZSTD_p_minMatch: case ZSTD_p_targetLength: case ZSTD_p_compressionStrategy: - if (value == 0) return 0; /* special value : 0 means "don't change anything" */ if (cctx->cdict) return ERROR(stage_wrong); ZSTD_cLevelToCParams(cctx); /* Can optimize if srcSize is known */ return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); @@ -288,8 +275,6 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); case ZSTD_p_nbThreads: - if (value==0) return 0; - DEBUGLOG(5, " setting nbThreads : %u", value); if (value > 1 && cctx->staticSize) { return ERROR(parameter_unsupported); /* MT not compatible with static alloc */ } @@ -299,22 +284,15 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); case ZSTD_p_overlapSizeLog: - DEBUGLOG(5, " setting overlap with nbThreads == %u", cctx->requestedParams.nbThreads); return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); case ZSTD_p_enableLongDistanceMatching: if (cctx->cdict) return ERROR(stage_wrong); - if (value != 0) { - ZSTD_cLevelToCParams(cctx); - } + if (value>0) ZSTD_cLevelToCParams(cctx); return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); case ZSTD_p_ldmHashLog: case ZSTD_p_ldmMinMatch: - if (value == 0) return 0; /* special value : 0 means "don't change anything" */ - if (cctx->cdict) return ERROR(stage_wrong); - return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); - case ZSTD_p_ldmBucketSizeLog: case ZSTD_p_ldmHashEveryLog: if (cctx->cdict) return ERROR(stage_wrong); @@ -325,148 +303,157 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v } size_t ZSTD_CCtxParam_setParameter( - ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value) + ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, unsigned value) { switch(param) { case ZSTD_p_format : if (value > (unsigned)ZSTD_f_zstd1_magicless) return ERROR(parameter_unsupported); - params->format = (ZSTD_format_e)value; - return 0; + CCtxParams->format = (ZSTD_format_e)value; + return (size_t)CCtxParams->format; case ZSTD_p_compressionLevel : if ((int)value > ZSTD_maxCLevel()) value = ZSTD_maxCLevel(); - if (value == 0) return 0; - params->compressionLevel = value; - return 0; + if (value) /* 0 : does not change current level */ + CCtxParams->compressionLevel = value; + return CCtxParams->compressionLevel; case ZSTD_p_windowLog : - if (value == 0) return 0; - CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); - ZSTD_cLevelToCCtxParams(params); - params->cParams.windowLog = value; - return 0; + if (value) { /* 0 : does not change current windowLog */ + CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); + ZSTD_cLevelToCCtxParams(CCtxParams); + CCtxParams->cParams.windowLog = value; + } + return CCtxParams->cParams.windowLog; case ZSTD_p_hashLog : - if (value == 0) return 0; - CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); - ZSTD_cLevelToCCtxParams(params); - params->cParams.hashLog = value; - return 0; + if (value) { /* 0 : does not change current hashLog */ + CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + ZSTD_cLevelToCCtxParams(CCtxParams); + CCtxParams->cParams.hashLog = value; + } + return CCtxParams->cParams.hashLog; case ZSTD_p_chainLog : - if (value == 0) return 0; - CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); - ZSTD_cLevelToCCtxParams(params); - params->cParams.chainLog = value; - return 0; + if (value) { /* 0 : does not change current chainLog */ + CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); + ZSTD_cLevelToCCtxParams(CCtxParams); + CCtxParams->cParams.chainLog = value; + } + return CCtxParams->cParams.chainLog; case ZSTD_p_searchLog : - if (value == 0) return 0; - CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); - ZSTD_cLevelToCCtxParams(params); - params->cParams.searchLog = value; - return 0; + if (value) { /* 0 : does not change current searchLog */ + CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); + ZSTD_cLevelToCCtxParams(CCtxParams); + CCtxParams->cParams.searchLog = value; + } + return value; case ZSTD_p_minMatch : - if (value == 0) return 0; - CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); - ZSTD_cLevelToCCtxParams(params); - params->cParams.searchLength = value; - return 0; + if (value) { /* 0 : does not change current minMatch length */ + CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); + ZSTD_cLevelToCCtxParams(CCtxParams); + CCtxParams->cParams.searchLength = value; + } + return CCtxParams->cParams.searchLength; case ZSTD_p_targetLength : - if (value == 0) return 0; - CLAMPCHECK(value, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); - ZSTD_cLevelToCCtxParams(params); - params->cParams.targetLength = value; - return 0; + if (value) { /* 0 : does not change current sufficient_len */ + CLAMPCHECK(value, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); + ZSTD_cLevelToCCtxParams(CCtxParams); + CCtxParams->cParams.targetLength = value; + } + return CCtxParams->cParams.targetLength; case ZSTD_p_compressionStrategy : - if (value == 0) return 0; - CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra); - ZSTD_cLevelToCCtxParams(params); - params->cParams.strategy = (ZSTD_strategy)value; - return 0; + if (value) { /* 0 : does not change currentstrategy */ + CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra); + ZSTD_cLevelToCCtxParams(CCtxParams); + CCtxParams->cParams.strategy = (ZSTD_strategy)value; + } + return (size_t)CCtxParams->cParams.strategy; case ZSTD_p_contentSizeFlag : /* Content size written in frame header _when known_ (default:1) */ DEBUGLOG(4, "set content size flag = %u", (value>0)); - params->fParams.contentSizeFlag = value > 0; - return 0; + CCtxParams->fParams.contentSizeFlag = value > 0; + return CCtxParams->fParams.contentSizeFlag; case ZSTD_p_checksumFlag : /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ - params->fParams.checksumFlag = value > 0; - return 0; + CCtxParams->fParams.checksumFlag = value > 0; + return CCtxParams->fParams.checksumFlag; case ZSTD_p_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ DEBUGLOG(4, "set dictIDFlag = %u", (value>0)); - params->fParams.noDictIDFlag = (value == 0); - return 0; + CCtxParams->fParams.noDictIDFlag = (value == 0); + return !CCtxParams->fParams.noDictIDFlag; case ZSTD_p_forceMaxWindow : - params->forceWindow = value > 0; - return 0; + CCtxParams->forceWindow = (value > 0); + return CCtxParams->forceWindow; case ZSTD_p_nbThreads : - if (value == 0) return 0; + if (value == 0) return CCtxParams->nbThreads; #ifndef ZSTD_MULTITHREAD if (value > 1) return ERROR(parameter_unsupported); - return 0; + return 1; #else - return ZSTDMT_initializeCCtxParameters(params, value); + return ZSTDMT_CCtxParam_setNbThreads(CCtxParams, value); #endif case ZSTD_p_jobSize : #ifndef ZSTD_MULTITHREAD return ERROR(parameter_unsupported); #else - if (params->nbThreads <= 1) return ERROR(parameter_unsupported); - return ZSTDMT_CCtxParam_setMTCtxParameter(params, ZSTDMT_p_sectionSize, value); + if (CCtxParams->nbThreads <= 1) return ERROR(parameter_unsupported); + return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_sectionSize, value); #endif case ZSTD_p_overlapSizeLog : #ifndef ZSTD_MULTITHREAD return ERROR(parameter_unsupported); #else - if (params->nbThreads <= 1) return ERROR(parameter_unsupported); - return ZSTDMT_CCtxParam_setMTCtxParameter(params, ZSTDMT_p_overlapSectionLog, value); + if (CCtxParams->nbThreads <= 1) return ERROR(parameter_unsupported); + return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_overlapSectionLog, value); #endif case ZSTD_p_enableLongDistanceMatching : if (value != 0) { - ZSTD_cLevelToCCtxParams(params); - params->cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + ZSTD_cLevelToCCtxParams(CCtxParams); + CCtxParams->cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; } - return ZSTD_ldm_initializeParameters(¶ms->ldmParams, value); + return ZSTD_ldm_initializeParameters(&CCtxParams->ldmParams, value); case ZSTD_p_ldmHashLog : - if (value == 0) return 0; - CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); - params->ldmParams.hashLog = value; - return 0; + if (value) { /* 0 : does not change current ldmHashLog */ + CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CCtxParams->ldmParams.hashLog = value; + } + return CCtxParams->ldmParams.hashLog; case ZSTD_p_ldmMinMatch : - if (value == 0) return 0; - CLAMPCHECK(value, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX); - params->ldmParams.minMatchLength = value; - return 0; + if (value) { /* 0 : does not change current ldmMinMatch */ + CLAMPCHECK(value, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX); + CCtxParams->ldmParams.minMatchLength = value; + } + return CCtxParams->ldmParams.minMatchLength; case ZSTD_p_ldmBucketSizeLog : if (value > ZSTD_LDM_BUCKETSIZELOG_MAX) { return ERROR(parameter_outOfBound); } - params->ldmParams.bucketSizeLog = value; - return 0; + CCtxParams->ldmParams.bucketSizeLog = value; + return value; case ZSTD_p_ldmHashEveryLog : if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) { return ERROR(parameter_outOfBound); } - params->ldmParams.hashEveryLog = value; - return 0; + CCtxParams->ldmParams.hashEveryLog = value; + return value; default: return ERROR(parameter_unsupported); } @@ -779,12 +766,29 @@ static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1, ldmParams1.hashEveryLog == ldmParams2.hashEveryLog); } +typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e; + +/* ZSTD_sufficientBuff() : + * check internal buffers exist for streaming if buffPol == ZSTDb_buffered . + * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */ +static U32 ZSTD_sufficientBuff(size_t bufferSize, ZSTD_buffered_policy_e buffPol, ZSTD_compressionParameters cParams2, U64 pledgedSrcSize) +{ + size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + size_t const neededBufferSize = (buffPol==ZSTDb_buffered) ? windowSize + blockSize : 0; + return (neededBufferSize <= bufferSize); +} + /** Equivalence for resetCCtx purposes */ static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1, - ZSTD_CCtx_params params2) + ZSTD_CCtx_params params2, + size_t buffSize1, + ZSTD_buffered_policy_e buffPol2, + U64 pledgedSrcSize) { return ZSTD_equivalentCParams(params1.cParams, params2.cParams) && - ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams); + ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams) && + ZSTD_sufficientBuff(buffSize1, buffPol2, params2.cParams, pledgedSrcSize); } /*! ZSTD_continueCCtx() : @@ -813,7 +817,6 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl } typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; -typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e; /*! ZSTD_resetCCtx_internal() : note : `params` are assumed fully validated at this stage */ @@ -826,8 +829,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); if (crp == ZSTDcrp_continue) { - if (ZSTD_equivalentParams(params, zc->appliedParams)) { - DEBUGLOG(4, "ZSTD_equivalentParams()==1"); + if (ZSTD_equivalentParams(params, zc->appliedParams, zc->inBuffSize, zbuff, pledgedSrcSize)) { + DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode"); assert(!(params.ldmParams.enableLdm && params.ldmParams.hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET)); zc->entropy->hufCTable_repeatMode = HUF_repeat_none; @@ -836,6 +839,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->entropy->litlength_repeatMode = FSE_repeat_none; return ZSTD_continueCCtx(zc, params, pledgedSrcSize); } } + DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); if (params.ldmParams.enableLdm) { /* Adjust long distance matching parameters */ @@ -846,7 +850,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength); } - { size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.cParams.windowLog); + { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); U32 const divider = (params.cParams.searchLength==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; size_t const tokenSpace = blockSize + 11*maxNbSeq; @@ -858,7 +863,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const h3Size = ((size_t)1) << hashLog3; size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; - size_t const buffInSize = (zbuff==ZSTDb_buffered) ? ((size_t)1 << params.cParams.windowLog) + blockSize : 0; + size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; void* ptr; /* Check if workSpace is large enough, alloc a new one if needed */ @@ -874,11 +879,15 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, : 0; size_t const neededSpace = entropySpace + optSpace + ldmSpace + tableSpace + tokenSpace + bufferSpace; + DEBUGLOG(4, "Need %uKB workspace, including %uKB for tables, and %uKB for buffers", + (U32)(neededSpace>>10), (U32)(tableSpace>>10), (U32)(bufferSpace>>10)); + DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u - windowSize: %u", + (U32)chainSize, (U32)hSize, (U32)h3Size, (U32)windowSize); if (zc->workSpaceSize < neededSpace) { /* too small : resize */ - DEBUGLOG(5, "Need to update workSpaceSize from %uK to %uK \n", - (unsigned)zc->workSpaceSize>>10, - (unsigned)neededSpace>>10); + DEBUGLOG(4, "Need to update workSpaceSize from %uK to %uK", + (unsigned)(zc->workSpaceSize>>10), + (unsigned)(neededSpace>>10)); /* static cctx : no resize, error out */ if (zc->staticSize) return ERROR(memory_allocation); @@ -901,7 +910,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->consumedSrcSize = 0; if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) zc->appliedParams.fParams.contentSizeFlag = 0; - DEBUGLOG(5, "pledged content size : %u ; flag : %u", + DEBUGLOG(4, "pledged content size : %u ; flag : %u", (U32)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); zc->blockSize = blockSize; @@ -927,7 +936,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, /* opt parser space */ if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btultra)) { - DEBUGLOG(5, "reserving optimal parser space"); + DEBUGLOG(4, "reserving optimal parser space"); assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ zc->optState.litFreq = (U32*)ptr; zc->optState.litLengthFreq = zc->optState.litFreq + (1<hashTable = (U32*)(ptr); @@ -999,8 +1009,8 @@ void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { /*! ZSTD_copyCCtx_internal() : * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. - * The "context", in this case, refers to the hash and chain tables, entropy - * tables, and dictionary offsets. + * The "context", in this case, refers to the hash and chain tables, + * entropy tables, and dictionary offsets. * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). * pledgedSrcSize=0 means "empty". * @return : 0, or an error code */ @@ -1397,7 +1407,7 @@ size_t ZSTD_encodeSequences( U32 const llBits = LL_bits[llCode]; U32 const ofBits = ofCode; U32 const mlBits = ML_bits[mlCode]; - DEBUGLOG(6, "encoding: litlen:%u - matchlen:%u - offCode:%u", + DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u", sequences[n].litLength, sequences[n].matchLength + MINMATCH, sequences[n].offset); /* 32b*/ /* 64b*/ @@ -1606,6 +1616,13 @@ static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, seqStorePtr->lit += lastLLSize; } +static void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->lit = ssPtr->litStart; + ssPtr->sequences = ssPtr->sequencesStart; + ssPtr->longLengthID = 0; +} + static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { DEBUGLOG(5, "ZSTD_compressBlock_internal : dstCapacity = %u", (U32)dstCapacity); @@ -1757,7 +1774,6 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, !params.fParams.noDictIDFlag, dictID, dictIDSizeCode); if (params.format == ZSTD_f_zstd1) { - DEBUGLOG(4, "writing zstd magic number"); MEM_writeLE32(dst, ZSTD_MAGICNUMBER); pos = 4; } @@ -1791,8 +1807,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, const BYTE* const ip = (const BYTE*) src; size_t fhSize = 0; - DEBUGLOG(5, "ZSTD_compressContinue_internal"); - DEBUGLOG(5, "stage: %u", cctx->stage); + DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u", cctx->stage); if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */ if (frame && (cctx->stage==ZSTDcs_init)) { @@ -2021,7 +2036,7 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode) { - DEBUGLOG(5, "ZSTD_compress_insertDictionary"); + DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); if ((dict==NULL) || (dictSize<=8)) return 0; /* dict restricted modes */ @@ -2030,7 +2045,7 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx, if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { if (dictMode == ZSTD_dm_auto) { - DEBUGLOG(5, "raw content dictionary detected"); + DEBUGLOG(4, "raw content dictionary detected"); return ZSTD_loadDictionaryContent(cctx, dict, dictSize); } if (dictMode == ZSTD_dm_fullDict) @@ -2057,6 +2072,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if (cdict && cdict->dictContentSize>0) { + cctx->requestedParams = params; return ZSTD_copyCCtx_internal(cctx, cdict->refContext, params.fParams, pledgedSrcSize, zbuff); @@ -2101,6 +2117,7 @@ size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t di ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + DEBUGLOG(4, "ZSTD_compressBegin_usingDict"); return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); } @@ -2183,6 +2200,7 @@ static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, { ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params); + DEBUGLOG(4, "ZSTD_compress_internal"); return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, @@ -2196,6 +2214,7 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, const void* dict,size_t dictSize, ZSTD_parameters params) { + DEBUGLOG(4, "ZSTD_compress_advanced"); CHECK_F(ZSTD_checkCParams(params.cParams)); return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); } @@ -2208,6 +2227,7 @@ size_t ZSTD_compress_advanced_internal( const void* dict,size_t dictSize, ZSTD_CCtx_params params) { + DEBUGLOG(4, "ZSTD_compress_advanced_internal"); CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL, params, srcSize, ZSTDb_not_buffered) ); return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); @@ -2218,6 +2238,8 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, co { ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize ? srcSize : 1, dict ? dictSize : 0); params.fParams.contentSizeFlag = 1; + DEBUGLOG(4, "ZSTD_compress_usingDict (level=%i, srcSize=%u, dictSize=%u)", + compressionLevel, (U32)srcSize, (U32)dictSize); return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); } @@ -2274,7 +2296,7 @@ static size_t ZSTD_initCDict_internal( ZSTD_dictMode_e dictMode, ZSTD_compressionParameters cParams) { - DEBUGLOG(5, "ZSTD_initCDict_internal, mode %u", (U32)dictMode); + DEBUGLOG(4, "ZSTD_initCDict_internal, mode %u", (U32)dictMode); if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { cdict->dictBuffer = NULL; cdict->dictContent = dictBuffer; @@ -2413,11 +2435,11 @@ size_t ZSTD_compressBegin_usingCDict_advanced( ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) { + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); if (cdict==NULL) return ERROR(dictionary_wrong); { ZSTD_CCtx_params params = cctx->requestedParams; params.cParams = ZSTD_getCParamsFromCDict(cdict); params.fParams = fParams; - DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); return ZSTD_compressBegin_internal(cctx, NULL, 0, ZSTD_dm_auto, cdict, @@ -2497,9 +2519,9 @@ size_t ZSTD_CStreamOutSize(void) } static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) + const void* const dict, size_t const dictSize, ZSTD_dictMode_e const dictMode, + const ZSTD_CDict* const cdict, + ZSTD_CCtx_params const params, unsigned long long const pledgedSrcSize) { DEBUGLOG(4, "ZSTD_resetCStream_internal"); /* params are supposed to be fully validated at this point */ @@ -2578,8 +2600,9 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize) -{ /* cannot handle NULL cdict (does not know what to do) */ - if (!cdict) return ERROR(dictionary_wrong); +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); + if (!cdict) return ERROR(dictionary_wrong); /* cannot handle NULL cdict (does not know what to do) */ { ZSTD_CCtx_params params = zcs->requestedParams; params.cParams = ZSTD_getCParamsFromCDict(cdict); params.fParams = fParams; @@ -2592,8 +2615,9 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, /* note : cdict must outlive compression session */ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) { - ZSTD_frameParameters const fParams = { 0 /* contentSize */, 0 /* checksum */, 0 /* hideDictID */ }; - return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, 0); /* note : will check that cdict != NULL */ + ZSTD_frameParameters const fParams = { 0 /* contentSizeFlag */, 0 /* checksum */, 0 /* hideDictID */ }; + DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); + return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); /* note : will check that cdict != NULL */ } /* ZSTD_initCStream_advanced() : @@ -2815,16 +2839,18 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, /* transparent initialization stage */ if (cctx->streamStage == zcss_init) { - ZSTD_prefixDict const prefixDict = cctx->prefixDict; ZSTD_CCtx_params params = cctx->requestedParams; - if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; - params.cParams = ZSTD_getCParamsFromCCtxParams( - cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); + ZSTD_prefixDict const prefixDict = cctx->prefixDict; memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ DEBUGLOG(4, "ZSTD_compress_generic : transparent init stage"); + if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */ + params.cParams = ZSTD_getCParamsFromCCtxParams( + cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); #ifdef ZSTD_MULTITHREAD + if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) + params.nbThreads = 1; /* do not invoke multi-threading when src size is too small */ if (params.nbThreads > 1) { if (cctx->mtctx == NULL || cctx->appliedParams.nbThreads != params.nbThreads) { ZSTDMT_freeCCtx(cctx->mtctx); @@ -2845,6 +2871,7 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, prefixDict.dictMode, cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); assert(cctx->streamStage == zcss_load); + assert(cctx->appliedParams.nbThreads <= 1); } } /* compression stage */ @@ -2901,8 +2928,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4; size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize + lastBlockSize + checksumSize; - DEBUGLOG(5, "ZSTD_endStream : remaining to flush : %u", - (unsigned)toFlush); + DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (U32)toFlush); return toFlush; } } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index e29a61b7..efcbc450 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -235,11 +235,11 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v { #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6) static const BYTE* g_start = NULL; - U32 const pos = (U32)((const BYTE*)literals - g_start); if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ - if ((pos > 0) && (pos < 1000000000)) - DEBUGLOG(6, "Cpos %6u :%5u literals & match %3u bytes at distance %6u", + { U32 const pos = (U32)((const BYTE*)literals - g_start); + DEBUGLOG(6, "Cpos%7u :%3u literals, match%3u bytes at dist.code%7u", pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode); + } #endif /* copy Literals */ assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB); diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index d80e06f7..cd3d1530 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -18,8 +18,9 @@ /** ZSTD_insertBt1() : add one or multiple positions to tree. * ip : assumed <= iend-8 . * @return : nb of positions added */ -static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares, - U32 extDict) +static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iend, + U32 nbCompares, U32 const mls, U32 const extDict) { U32* const hashTable = zc->hashTable; U32 const hashLog = zc->appliedParams.cParams.hashLog; @@ -50,12 +51,15 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co predictedLarge += (predictedLarge>0); #endif /* ZSTD_C_PREDICT */ + DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current); + assert(ip <= iend-8); /* required for h calculation */ hashTable[h] = current; /* Update Hash Table */ while (nbCompares-- && (matchIndex > windowLow)) { U32* const nextPtr = bt + 2*(matchIndex & btMask); size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < current); #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ @@ -77,7 +81,9 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co continue; } #endif + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */ match = base + matchIndex; if (match[matchLength] == ip[matchLength]) matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; @@ -94,16 +100,17 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co matchEndIdx = matchIndex + (U32)matchLength; } - if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ - /* match+1 is smaller than current */ + /* match is smaller than current */ *smallerPtr = matchIndex; /* update smaller idx */ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ } else { /* match is larger than current */ *largerPtr = matchIndex; @@ -119,6 +126,36 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co return 1; } +FORCE_INLINE_TEMPLATE +void ZSTD_updateTree_internal(ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iend, + const U32 nbCompares, const U32 mls, const U32 extDict) +{ + const BYTE* const base = zc->base; + U32 const target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)", + idx, target, extDict); + + while(idx < target) + idx += ZSTD_insertBt1(zc, base+idx, iend, nbCompares, mls, extDict); + zc->nextToUpdate = target; +} + +void ZSTD_updateTree(ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iend, + const U32 nbCompares, const U32 mls) +{ + ZSTD_updateTree_internal(zc, ip, iend, nbCompares, mls, 0 /*extDict*/); +} + +void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iend, + const U32 nbCompares, const U32 mls) +{ + ZSTD_updateTree_internal(zc, ip, iend, nbCompares, mls, 1 /*extDict*/); +} + static size_t ZSTD_insertBtAndFindBestMatch ( ZSTD_CCtx* zc, @@ -173,8 +210,9 @@ static size_t ZSTD_insertBtAndFindBestMatch ( matchEndIdx = matchIndex + (U32)matchLength; if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; - if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } } if (match[matchLength] < ip[matchLength]) { @@ -195,21 +233,11 @@ static size_t ZSTD_insertBtAndFindBestMatch ( *smallerPtr = *largerPtr = 0; - zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; + zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; /* skip repetitive patterns */ return bestLength; } -void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) -{ - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while(idx < target) - idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0); -} - /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ static size_t ZSTD_BtFindBestMatch ( ZSTD_CCtx* zc, @@ -240,16 +268,6 @@ static size_t ZSTD_BtFindBestMatch_selectMLS ( } -void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) -{ - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1); -} - - /** Tree updater, providing best match */ static size_t ZSTD_BtFindBestMatch_extDict ( ZSTD_CCtx* zc, diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 26a1a1a8..aebe55f8 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -17,6 +17,7 @@ #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ #define ZSTD_MAX_PRICE (1<<30) + /*-************************************* * Price functions for optimal parser ***************************************/ @@ -95,7 +96,7 @@ static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSiz } -static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 litLength, const BYTE* literals) +static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 const litLength, const BYTE* literals) { U32 price; @@ -143,18 +144,20 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const U32 const mlBase = matchLength - MINMATCH; U32 price; - if (optPtr->staticPrices) + if (optPtr->staticPrices) /* fixed scheme, do not use statistics */ return ZSTD_getLiteralPrice(optPtr, litLength, literals) + ZSTD_highbit32((U32)mlBase+1) + 16 + offCode; price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1); - if (!ultra && offCode >= 20) price += (offCode-19)*2; /* handicap for long matches, to favor decompression speed */ + if (!ultra /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */ /* match Length */ { U32 const mlCode = ZSTD_MLcode(mlBase); price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1); } - return price + ZSTD_getLiteralPrice(optPtr, litLength, literals) + optPtr->factor; + price += ZSTD_getLiteralPrice(optPtr, litLength, literals) + optPtr->factor; + DEBUGLOG(8, "ZSTD_getPrice(ll:%u, ml:%u) = %u", litLength, matchLength, price); + return price; } @@ -190,19 +193,8 @@ static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* lite } -/* update opt[pos] and last_pos */ -#define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \ - { \ - while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \ - opt[pos].mlen = mlen_; \ - opt[pos].off = offset_; \ - opt[pos].litlen = litlen_; \ - opt[pos].price = price_; \ - } - - /* function safe only for comparisons */ -static U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) +MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) { switch (length) { @@ -239,71 +231,113 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) /*-************************************* * Binary Tree search ***************************************/ -static U32 ZSTD_insertBtAndGetAllMatches ( +FORCE_INLINE_TEMPLATE +U32 ZSTD_insertBtAndGetAllMatches ( ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - U32 nbCompares, const U32 mls, - U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen) + const BYTE* const ip, const BYTE* const iLimit, const int extDict, + U32 nbCompares, U32 const mls, + U32 rep[ZSTD_REP_NUM], U32 const ll0, + ZSTD_match_t* matches, const U32 minMatchLen) { const BYTE* const base = zc->base; - const U32 current = (U32)(ip-base); - const U32 hashLog = zc->appliedParams.cParams.hashLog; - const size_t h = ZSTD_hashPtr(ip, hashLog, mls); + U32 const current = (U32)(ip-base); + U32 const hashLog = zc->appliedParams.cParams.hashLog; + U32 const minMatch = (mls==3) ? 3 : 4; U32* const hashTable = zc->hashTable; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); U32 matchIndex = hashTable[h]; U32* const bt = zc->chainTable; - const U32 btLog = zc->appliedParams.cParams.chainLog - 1; - const U32 btMask= (1U << btLog) - 1; + U32 const btLog = zc->appliedParams.cParams.chainLog - 1; + U32 const btMask= (1U << btLog) - 1; size_t commonLengthSmaller=0, commonLengthLarger=0; const BYTE* const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; + U32 const dictLimit = zc->dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const prefixStart = base + dictLimit; - const U32 btLow = btMask >= current ? 0 : current - btMask; - const U32 windowLow = zc->lowLimit; + U32 const btLow = btMask >= current ? 0 : current - btMask; + U32 const windowLow = zc->lowLimit; U32* smallerPtr = bt + 2*(current&btMask); U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current+8; + U32 matchEndIdx = current+8; /* farthest referenced position of any match => detects repetitive patterns */ U32 dummy32; /* to be nullified at the end */ U32 mnum = 0; - const U32 minMatch = (mls == 3) ? 3 : 4; size_t bestLength = minMatchLen-1; + DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches"); - if (minMatch == 3) { /* HC3 match finder */ + /* check repCode */ + { U32 const lastR = ZSTD_REP_NUM + ll0; + U32 repCode; + for (repCode = ll0; repCode < lastR; repCode++) { + U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + U32 const repIndex = current - repOffset; + U32 repLen = 0; + assert(current >= dictLimit); + if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */ + if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) { + repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; + } + } else { /* repIndex < dictLimit || repIndex >= current */ + const BYTE* const repMatch = dictBase + repIndex; + assert(current >= windowLow); + if ( extDict /* this case only valid in extDict mode */ + && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */ + & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; + } } + /* save longer solution */ + if (repLen > bestLength) { + DEBUGLOG(8, "found rep-match %u of length %u", + repCode - ll0, (U32)repLen); + bestLength = repLen; + matches[mnum].off = repCode - ll0; + matches[mnum].len = (U32)repLen; + mnum++; + if ( (repLen > ZSTD_OPT_NUM) + | (ip+repLen == iLimit) ) { /* best possible */ + return mnum; + } } } } + + /* HC3 match finder */ + if ((mls == 3) /*static*/ && (bestLength < mls)) { U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); - if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) { - const BYTE* match; - size_t currentMl=0; - if ((!extDict) || matchIndex3 >= dictLimit) { - match = base + matchIndex3; - if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit); + if ((matchIndex3 > windowLow) + & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { + size_t mlen; + if ((!extDict) /*static*/ || (matchIndex3 >= dictLimit)) { + const BYTE* const match = base + matchIndex3; + mlen = ZSTD_count(ip, match, iLimit); } else { - match = dictBase + matchIndex3; - if (ZSTD_readMINMATCH(match, MINMATCH) == ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH; + const BYTE* const match = dictBase + matchIndex3; + mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); } /* save best solution */ - if (currentMl > bestLength) { - bestLength = currentMl; - matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3; - matches[mnum].len = (U32)currentMl; - mnum++; - if (currentMl > ZSTD_OPT_NUM) goto update; - if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/ - } - } - } + if (mlen >= mls /* == 3 > bestLength */) { + DEBUGLOG(8, "found small match with hlog3, of length %u", + (U32)mlen); + bestLength = mlen; + assert(current > matchIndex3); + assert(mnum==0); /* no prior solution */ + matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE; + matches[0].len = (U32)mlen; + mnum = 1; + if ( (mlen > ZSTD_OPT_NUM) + | (ip+mlen == iLimit) ) { /* best possible */ + return 1; + } } } } hashTable[h] = current; /* Update Hash Table */ while (nbCompares-- && (matchIndex > windowLow)) { - U32* nextPtr = bt + 2*(matchIndex & btMask); + U32* const nextPtr = bt + 2*(matchIndex & btMask); size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ const BYTE* match; + assert(current > matchIndex); if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ match = base + matchIndex; if (match[matchLength] == ip[matchLength]) { matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1; @@ -312,29 +346,33 @@ static U32 ZSTD_insertBtAndGetAllMatches ( match = dictBase + matchIndex; matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + match = base + matchIndex; /* prepare for match[matchLength] */ } if (matchLength > bestLength) { - if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; + DEBUGLOG(8, "found match of length %u at distance %u", + (U32)matchLength, current - matchIndex); + assert(matchEndIdx > matchIndex); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; bestLength = matchLength; - matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex; + matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; matches[mnum].len = (U32)matchLength; mnum++; if (matchLength > ZSTD_OPT_NUM) break; - if (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */ - break; /* drop, to guarantee consistency (miss a little bit of compression) */ + if (ip+matchLength == iLimit) { /* equal : no way to know if inf or sup */ + break; /* drop, to preserve bt consistency (miss a little bit of compression) */ + } } if (match[matchLength] < ip[matchLength]) { - /* match is smaller than current */ + /* match smaller than current */ *smallerPtr = matchIndex; /* update smaller idx */ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ } else { - /* match is larger than current */ *largerPtr = matchIndex; commonLengthLarger = matchLength; if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ @@ -344,65 +382,30 @@ static U32 ZSTD_insertBtAndGetAllMatches ( *smallerPtr = *largerPtr = 0; -update: - zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; + zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; /* skip repetitive patterns */ return mnum; } -/** Tree updater, providing best match */ -static U32 ZSTD_BtGetAllMatches ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) -{ - if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen); -} - - -static U32 ZSTD_BtGetAllMatches_selectMLS ( +FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) + const BYTE* ip, const BYTE* const iHighLimit, const int extDict, + const U32 maxNbAttempts, const U32 matchLengthSearch, + U32 rep[ZSTD_REP_NUM], U32 const ll0, + ZSTD_match_t* matches, const U32 minMatchLen) { + DEBUGLOG(7, "ZSTD_BtGetAllMatches"); + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + if (extDict) ZSTD_updateTree_extDict(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch); + else ZSTD_updateTree(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch); switch(matchLengthSearch) { - case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); + case 3 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 3, rep, ll0, matches, minMatchLen); default : - case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); - case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); + case 4 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 4, rep, ll0, matches, minMatchLen); + case 5 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 5, rep, ll0, matches, minMatchLen); case 7 : - case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); - } -} - -/** Tree updater, providing best match */ -static U32 ZSTD_BtGetAllMatches_extDict ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) -{ - if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen); -} - - -static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) -{ - switch(matchLengthSearch) - { - case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); - default : - case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); - case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); - case 7 : - case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); + case 6 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 6, rep, ll0, matches, minMatchLen); } } @@ -410,9 +413,46 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( /*-******************************* * Optimal parser *********************************/ +typedef struct repcodes_s { + U32 rep[3]; +} repcodes_t; + +repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) +{ + repcodes_t newReps; + if (offset >= ZSTD_REP_NUM) { /* full offset */ + newReps.rep[2] = rep[1]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = offset - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offset + ll0; + if (repCode > 0) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = currentOffset; + } else { /* repCode == 0 */ + memcpy(&newReps, rep, sizeof(newReps)); + } + } + return newReps; +} + +/* update opt[pos] and last_pos */ +#define SET_PRICE(pos, mlen_, offset_, litlen_, price_, rep_) \ + { \ + while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \ + opt[pos].mlen = mlen_; \ + opt[pos].off = offset_; \ + opt[pos].litlen = litlen_; \ + opt[pos].price = price_; \ + memcpy(opt[pos].rep, &rep_, sizeof(rep_)); \ + } + FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, const int ultra) + const void* src, size_t srcSize, + const int ultra, const int extDict) { seqStore_t* const seqStorePtr = &(ctx->seqStore); optState_t* const optStatePtr = &(ctx->optState); @@ -434,6 +474,7 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, U32 rep[ZSTD_REP_NUM]; /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_opt_generic"); ctx->nextToUpdate3 = ctx->nextToUpdate; ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize); ip += (ip==prefixStart); @@ -443,63 +484,50 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, while (ip < ilimit) { U32 cur, last_pos = 0; U32 best_mlen, best_off; - U32 const initLL = (U32)(ip - anchor); - memset(opt, 0, sizeof(ZSTD_optimal_t)); - /* check repCode */ - { U32 const ll0 = !initLL; - U32 const lastR = ZSTD_REP_CHECK + ll0; - U32 repCode; - for (repCode = ll0; repCode < lastR; repCode++) { - S32 const repOffset = (repCode==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[repCode]; - if ( (repOffset > 0) - && (repOffset < (S32)(ip-prefixStart)) /* within current mem segment */ - && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { - U32 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iend) + minMatch; - if (repLen > sufficient_len) { - /* large repMatch => immediate encoding */ - best_mlen = repLen; best_off = repCode; cur = 0; last_pos = 1; - goto _shortestPath; - } - do { - U32 const repPrice = ZSTD_getPrice(optStatePtr, initLL, anchor, repCode - ll0, repLen, ultra); - if (repLen > last_pos || repPrice < opt[repLen].price) - SET_PRICE(repLen, repLen, repCode, initLL, repPrice); /* note : macro modifies last_pos */ - repLen--; - } while (repLen >= minMatch); - } } } + /* find first match */ + { U32 const litlen = (U32)(ip - anchor); + U32 const ll0 = !litlen; + U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, ip, iend, extDict, maxSearches, mls, rep, ll0, matches, minMatch); + if (!nbMatches) { ip++; continue; } - { U32 const nb_matches = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch); + /* initialize opt[0] */ + { U32 i ; for (i=0; i immediate encoding */ + { U32 const maxML = matches[nbMatches-1].len; + DEBUGLOG(7, "found %u matches of maxLength=%u and offset=%u at cPos=%u => start new serie", + nbMatches, maxML, matches[nbMatches-1].off, (U32)(ip-prefixStart)); - if (nb_matches && (matches[nb_matches-1].len > sufficient_len)) { - /* large match => immediate encoding */ - best_mlen = matches[nb_matches-1].len; - best_off = matches[nb_matches-1].off; - cur = 0; - last_pos = 1; - goto _shortestPath; - } + if (maxML > sufficient_len) { + best_mlen = maxML; + best_off = matches[nbMatches-1].off; + DEBUGLOG(7, "large match (%u>%u), immediate encoding", + best_mlen, sufficient_len); + cur = 0; + last_pos = 1; + goto _shortestPath; + } } - /* set prices for first matches from position == 0 */ - { U32 matchNb; - U32 pos = last_pos /*some repCode (assumed cheaper)*/ ? last_pos : minMatch; - for (matchNb = 0; matchNb < nb_matches; matchNb++) { + /* set prices for first matches starting position == 0 */ + { U32 pos = minMatch; + U32 matchNb; + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; U32 const end = matches[matchNb].len; + repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0); while (pos <= end) { - U32 const matchPrice = ZSTD_getPrice(optStatePtr, initLL, anchor, matches[matchNb].off-1, pos, ultra); - if (pos > last_pos || matchPrice < opt[pos].price) - SET_PRICE(pos, pos, matches[matchNb].off, initLL, matchPrice); /* note : macro modifies last_pos */ + U32 const matchPrice = ZSTD_getPrice(optStatePtr, litlen, anchor, offset, pos, ultra); + if (pos > last_pos || matchPrice < opt[pos].price) { + DEBUGLOG(7, "rPos:%u => set initial price : %u", + pos, matchPrice); + SET_PRICE(pos, pos, offset, litlen, matchPrice, repHistory); /* note : macro modifies last_pos */ + } pos++; - } } } } - - if (last_pos < minMatch) { ip++; continue; } - - /* initialize opt[0] */ - { U32 i ; for (i=0; i last_pos || price <= opt[cur].price) - SET_PRICE(cur, 1, 0, litlen, price); /* note : macro modifies last_pos */ - } + if (price <= opt[cur].price) { + DEBUGLOG(7, "rPos:%u : better price (%u<%u) using literal", + cur, price, opt[cur].price); + SET_PRICE(cur, 1/*mlen*/, 0/*offset*/, litlen, price, opt[cur-1].rep); + } } if (cur == last_pos) break; /* last match must start at a minimum distance of 8 from oend */ if (inr > ilimit) continue; - /* update repcodes */ - { U32 const mlen = opt[cur].mlen; - if (opt[cur].off > ZSTD_REP_MOVE_OPT) { - opt[cur].rep[2] = opt[cur-mlen].rep[1]; - opt[cur].rep[1] = opt[cur-mlen].rep[0]; - opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; - } else { - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; - opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; - /* If opt[cur].off == ZSTD_REP_MOVE_OPT, then mlen != 1. - * offset ZSTD_REP_MOVE_OPT is used for the special case - * litLength == 0, where offset 0 means something special. - * mlen == 1 means the previous byte was stored as a literal, - * so they are mutually exclusive. - */ - assert(!(opt[cur].off == ZSTD_REP_MOVE_OPT && mlen == 1)); - opt[cur].rep[0] = (opt[cur].off == ZSTD_REP_MOVE_OPT) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); - } } - - best_mlen = minMatch; { U32 const ll0 = (opt[cur].mlen != 1); - U32 const lastR = ZSTD_REP_CHECK + ll0; - U32 repCode4; /* universal referential */ - for (repCode4=ll0; repCode4 0) && (repCur < (S32)(inr-prefixStart)) /* within current mem segment */ - && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) { - U32 matchLength = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch; - U32 const repCode3 = repCode4 - ll0; /* contextual referential, depends on ll0 */ - assert(repCode3 < 3); - - if (matchLength > sufficient_len || cur + matchLength >= ZSTD_OPT_NUM) { - best_mlen = matchLength; - best_off = repCode4; - last_pos = cur + 1; - goto _shortestPath; - } - - if (matchLength > best_mlen) best_mlen = matchLength; - - do { - U32 const litlen = ll0 ? 0 : opt[cur].litlen; - U32 price; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, repCode3, matchLength, ultra); - } else { - price = ZSTD_getPrice(optStatePtr, litlen, anchor, repCode3, matchLength, ultra); - } - - if (cur + matchLength > last_pos || price <= opt[cur + matchLength].price) - SET_PRICE(cur + matchLength, matchLength, repCode4, litlen, price); /* note : macro modifies last_pos */ - matchLength--; - } while (matchLength >= minMatch); - } } } - - { U32 const nb_matches = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen /*largest repLength*/); /* search for matches larger than repcodes */ + U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0; + U32 const basePrice = (cur > litlen) ? opt[cur-litlen].price : 0; + const BYTE* const baseLiterals = ip + cur - litlen; + U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, inr, iend, extDict, maxSearches, mls, opt[cur].rep, ll0, matches, minMatch); U32 matchNb; + if (!nbMatches) continue; + assert(baseLiterals >= prefixStart); - if (nb_matches > 0 && (matches[nb_matches-1].len > sufficient_len || cur + matches[nb_matches-1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[nb_matches-1].len; - best_off = matches[nb_matches-1].off; - last_pos = cur + 1; - goto _shortestPath; + { U32 const maxML = matches[nbMatches-1].len; + DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u", + cur, nbMatches, maxML); + + if ( (maxML > sufficient_len) + | (cur + maxML >= ZSTD_OPT_NUM) ) { + best_mlen = maxML; + best_off = matches[nbMatches-1].off; + last_pos = cur + 1; + goto _shortestPath; + } } - /* set prices using matches at position = cur */ - for (matchNb = 0; matchNb < nb_matches; matchNb++) { - U32 mlen = (matchNb>0) ? matches[matchNb-1].len+1 : best_mlen; + /* set prices using matches found at position == cur */ + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 mlen = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; U32 const lastML = matches[matchNb].len; + U32 const offset = matches[matchNb].off; + repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0); + + DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u", + matchNb, matches[matchNb].off, lastML, litlen); while (mlen <= lastML) { - U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0; - U32 price; - if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[matchNb].off-1, mlen, ultra); - else - price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[matchNb].off-1, mlen, ultra); + U32 const pos = cur + mlen; + U32 const price = basePrice + ZSTD_getPrice(optStatePtr, litlen, baseLiterals, offset, mlen, ultra); + assert(pos < ZSTD_OPT_NUM); - if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) - SET_PRICE(cur + mlen, mlen, matches[matchNb].off, litlen, price); /* note : macro modifies last_pos */ + if ((pos > last_pos) | (price < opt[pos].price)) { + DEBUGLOG(7, "rPos:%u => new better price (%u<%u)", + pos, price, opt[pos].price); + SET_PRICE(pos, mlen, offset, litlen, price, repHistory); /* note : macro modifies last_pos */ + } mlen++; } } } } @@ -612,18 +604,20 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, cur = last_pos - best_mlen; _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ - opt[0].mlen = 1; + assert(opt[0].mlen == 1); /* reverse traversal */ - { U32 selected_matchLength = best_mlen; + DEBUGLOG(7, "start reverse traversal (last_pos:%u, cur:%u)", + last_pos, cur); + { U32 selectedMatchLength = best_mlen; U32 selectedOffset = best_off; U32 pos = cur; while (1) { U32 const mlen = opt[pos].mlen; U32 const off = opt[pos].off; - opt[pos].mlen = selected_matchLength; + opt[pos].mlen = selectedMatchLength; opt[pos].off = selectedOffset; - selected_matchLength = mlen; + selectedMatchLength = mlen; selectedOffset = off; if (mlen > pos) break; pos -= mlen; @@ -632,31 +626,30 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ /* save sequences */ { U32 pos; for (pos=0; pos < last_pos; ) { - U32 const litLength = (U32)(ip - anchor); + U32 const llen = (U32)(ip - anchor); U32 const mlen = opt[pos].mlen; - U32 offset = opt[pos].off; - if (mlen == 1) { ip++; pos++; continue; } - pos += mlen; + U32 const offset = opt[pos].off; + if (mlen == 1) { ip++; pos++; continue; } /* literal position => move on */ + pos += mlen; ip += mlen; - /* repcodes update */ - if (offset > ZSTD_REP_MOVE_OPT) { /* full offset */ + /* repcodes update : like ZSTD_updateRep(), but update in place */ + if (offset >= ZSTD_REP_NUM) { /* full offset */ rep[2] = rep[1]; rep[1] = rep[0]; - rep[0] = offset - ZSTD_REP_MOVE_OPT; - offset--; + rep[0] = offset - ZSTD_REP_MOVE; } else { /* repcode */ - if (offset != 0) { - U32 const currentOffset = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[offset]; - if (offset != 1) rep[2] = rep[1]; + U32 const repCode = offset + (llen==0); + if (repCode) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + if (repCode >= 2) rep[2] = rep[1]; rep[1] = rep[0]; rep[0] = currentOffset; } - if (litLength==0) offset--; } - ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); - anchor = ip = ip + mlen; + ZSTD_updatePrice(optStatePtr, llen, anchor, offset, mlen); + ZSTD_storeSeq(seqStorePtr, llen, anchor, offset, mlen-MINMATCH); + anchor = ip; } } } /* for (cur=0; cur < last_pos; ) */ @@ -670,293 +663,21 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0); + DEBUGLOG(5, "ZSTD_compressBlock_btopt"); + return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*ultra*/, 0 /*extDict*/); } size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1); + return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1 /*ultra*/, 0 /*extDict*/); } - -FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, const int ultra) -{ - seqStore_t* seqStorePtr = &(ctx->seqStore); - optState_t* optStatePtr = &(ctx->optState); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ctx->base; - const U32 lowestIndex = ctx->lowLimit; - const U32 dictLimit = ctx->dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictBase = ctx->dictBase; - const BYTE* const dictEnd = dictBase + dictLimit; - - const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog; - const U32 sufficient_len = ctx->appliedParams.cParams.targetLength; - const U32 mls = ctx->appliedParams.cParams.searchLength; - const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4; - - ZSTD_optimal_t* opt = optStatePtr->priceTable; - ZSTD_match_t* matches = optStatePtr->matchTable; - const BYTE* inr; - - /* init */ - U32 offset, rep[ZSTD_REP_NUM]; - { U32 i; for (i=0; irep[i]; } - - ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize); - ip += (ip==prefixStart); - - /* Match Loop */ - while (ip < ilimit) { - U32 cur, match_num, last_pos, litlen, price; - U32 u, mlen, best_mlen, best_off, litLength; - U32 current = (U32)(ip-base); - memset(opt, 0, sizeof(ZSTD_optimal_t)); - last_pos = 0; - opt[0].litlen = (U32)(ip - anchor); - - /* check repCode */ - { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); - for (i = (ip==anchor); i 0 && repCur <= (S32)current) - && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ - && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; - - if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; - goto _storeSequence; - } - - best_off = i - (ip==anchor); - litlen = opt[0].litlen; - do { - price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ - mlen--; - } while (mlen >= minMatch); - } } } - - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */ - - if (!last_pos && !match_num) { ip++; continue; } - - { U32 i; for (i=0; i sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - cur = 0; - last_pos = 1; - goto _storeSequence; - } - - best_mlen = (last_pos) ? last_pos : minMatch; - - /* set prices using matches at position = 0 */ - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = matches[u].len; - litlen = opt[0].litlen; - while (mlen <= best_mlen) { - price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, matches[u].off, litlen, price); - mlen++; - } } - - if (last_pos < minMatch) { - ip++; continue; - } - - /* check further positions */ - for (cur = 1; cur <= last_pos; cur++) { - inr = ip + cur; - - if (opt[cur-1].mlen == 1) { - litlen = opt[cur-1].litlen + 1; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen); - } else - price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor); - } else { - litlen = 1; - price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1); - } - - if (cur > last_pos || price <= opt[cur].price) - SET_PRICE(cur, 1, 0, litlen, price); - - if (cur == last_pos) break; - - if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ - continue; - - mlen = opt[cur].mlen; - if (opt[cur].off > ZSTD_REP_MOVE_OPT) { - opt[cur].rep[2] = opt[cur-mlen].rep[1]; - opt[cur].rep[1] = opt[cur-mlen].rep[0]; - opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; - } else { - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; - opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; - assert(!(opt[cur].off == ZSTD_REP_MOVE_OPT && mlen == 1)); - opt[cur].rep[0] = (opt[cur].off == ZSTD_REP_MOVE_OPT) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); - } - - best_mlen = minMatch; - { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); - for (i = (mlen != 1); i 0 && repCur <= (S32)(current+cur)) - && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ - && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; - - if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; last_pos = cur + 1; - goto _storeSequence; - } - - best_off = i - (opt[cur].mlen != 1); - if (mlen > best_mlen) best_mlen = mlen; - - do { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen, ultra); - } else - price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen, ultra); - } - - if (cur + mlen > last_pos || price <= opt[cur + mlen].price) - SET_PRICE(cur + mlen, mlen, i, litlen, price); - mlen--; - } while (mlen >= minMatch); - } } } - - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch); - - if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - last_pos = cur + 1; - goto _storeSequence; - } - - /* set prices using matches at position = cur */ - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = matches[u].len; - - while (mlen <= best_mlen) { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen, ultra); - else - price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen, ultra); - } - - if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) - SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); - - mlen++; - } } } /* for (cur = 1; cur <= last_pos; cur++) */ - - best_mlen = opt[last_pos].mlen; - best_off = opt[last_pos].off; - cur = last_pos - best_mlen; - - /* store sequence */ -_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ - opt[0].mlen = 1; - - while (1) { - mlen = opt[cur].mlen; - offset = opt[cur].off; - opt[cur].mlen = best_mlen; - opt[cur].off = best_off; - best_mlen = mlen; - best_off = offset; - if (mlen > cur) break; - cur -= mlen; - } - - for (u = 0; u <= last_pos; ) { - u += opt[u].mlen; - } - - for (cur=0; cur < last_pos; ) { - mlen = opt[cur].mlen; - if (mlen == 1) { ip++; cur++; continue; } - offset = opt[cur].off; - cur += mlen; - litLength = (U32)(ip - anchor); - - if (offset > ZSTD_REP_MOVE_OPT) { - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = offset - ZSTD_REP_MOVE_OPT; - offset--; - } else { - if (offset != 0) { - best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); - if (offset != 1) rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = best_off; - } - - if (litLength==0) offset--; - } - - ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); - anchor = ip = ip + mlen; - } } /* for (cur=0; cur < last_pos; ) */ - - /* Save reps for next block */ - { int i; for (i=0; irepToConfirm[i] = rep[i]; } - - /* Return the last literals size */ - return iend - anchor; -} - - size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0); + return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*ultra*/, 1 /*extDict*/); } size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { - return ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1); + return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1 /*ultra*/, 1 /*extDict*/); } diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 0b9173f3..2926d3b4 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -412,13 +412,14 @@ struct ZSTDMT_CCtx_s { ZSTDMT_CCtxPool* cctxPool; ZSTD_pthread_mutex_t jobCompleted_mutex; ZSTD_pthread_cond_t jobCompleted_cond; + ZSTD_CCtx_params params; size_t targetSectionSize; size_t inBuffSize; size_t dictSize; size_t targetDictSize; inBuff_t inBuff; - ZSTD_CCtx_params params; XXH64_state_t xxhState; + unsigned singleThreaded; unsigned jobIDMask; unsigned doneJobID; unsigned nextJobID; @@ -439,20 +440,23 @@ static ZSTDMT_jobDescription* ZSTDMT_allocJobsTable(U32* nbJobsPtr, ZSTD_customM nbJobs * sizeof(ZSTDMT_jobDescription), cMem); } -/* Internal only */ -size_t ZSTDMT_initializeCCtxParameters(ZSTD_CCtx_params* params, unsigned nbThreads) +/* ZSTDMT_CCtxParam_setNbThreads(): + * Internal use only */ +size_t ZSTDMT_CCtxParam_setNbThreads(ZSTD_CCtx_params* params, unsigned nbThreads) { + if (nbThreads > ZSTDMT_NBTHREADS_MAX) nbThreads = ZSTDMT_NBTHREADS_MAX; + if (nbThreads < 1) nbThreads = 1; params->nbThreads = nbThreads; params->overlapSizeLog = ZSTDMT_OVERLAPLOG_DEFAULT; params->jobSize = 0; - return 0; + return nbThreads; } ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem) { ZSTDMT_CCtx* mtctx; U32 nbJobs = nbThreads + 2; - DEBUGLOG(3, "ZSTDMT_createCCtx_advanced"); + DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbThreads = %u)", nbThreads); if (nbThreads < 1) return NULL; nbThreads = MIN(nbThreads , ZSTDMT_NBTHREADS_MAX); @@ -462,7 +466,7 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem) mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem); if (!mtctx) return NULL; - ZSTDMT_initializeCCtxParameters(&mtctx->params, nbThreads); + ZSTDMT_CCtxParam_setNbThreads(&mtctx->params, nbThreads); mtctx->cMem = cMem; mtctx->allJobsCompleted = 1; mtctx->factory = POOL_create_advanced(nbThreads, 0, cMem); @@ -559,12 +563,16 @@ size_t ZSTDMT_CCtxParam_setMTCtxParameter( switch(parameter) { case ZSTDMT_p_sectionSize : + if ( (value > 0) /* value==0 => automatic job size */ + & (value < ZSTDMT_JOBSIZE_MIN) ) + value = ZSTDMT_JOBSIZE_MIN; params->jobSize = value; - return 0; + return value; case ZSTDMT_p_overlapSectionLog : + if (value > 9) value = 9; DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value); params->overlapSizeLog = (value >= 9) ? 9 : value; - return 0; + return value; default : return ERROR(parameter_unsupported); } @@ -763,13 +771,14 @@ size_t ZSTDMT_initCStream_internal( const ZSTD_CDict* cdict, ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) { - DEBUGLOG(4, "ZSTDMT_initCStream_internal"); + DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u)", (U32)pledgedSrcSize); /* params are supposed to be fully validated at this point */ assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); assert(!((dict) && (cdict))); /* either dict or cdict, not both */ assert(zcs->cctxPool->totalCCtx == params.nbThreads); + zcs->singleThreaded = (params.nbThreads==1) | (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ - if (params.nbThreads==1) { + if (zcs->singleThreaded) { ZSTD_CCtx_params const singleThreadParams = ZSTDMT_makeJobCCtxParams(params); DEBUGLOG(4, "single thread mode"); assert(singleThreadParams.nbThreads == 0); @@ -777,7 +786,7 @@ size_t ZSTDMT_initCStream_internal( dict, dictSize, cdict, singleThreadParams, pledgedSrcSize); } - DEBUGLOG(4, "multi - threading mode"); + DEBUGLOG(4, "multi-threading mode (%u threads)", params.nbThreads); if (zcs->allJobsCompleted == 0) { /* previous compression not correctly finished */ ZSTDMT_waitForAllJobsCompleted(zcs); @@ -800,13 +809,13 @@ size_t ZSTDMT_initCStream_internal( zcs->cdict = cdict; } + assert(params.overlapSizeLog <= 9); zcs->targetDictSize = (params.overlapSizeLog==0) ? 0 : (size_t)1 << (params.cParams.windowLog - (9 - params.overlapSizeLog)); - DEBUGLOG(4, "overlapLog : %u ", params.overlapSizeLog); - DEBUGLOG(4, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10)); + DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(zcs->targetDictSize>>10)); zcs->targetSectionSize = params.jobSize ? params.jobSize : (size_t)1 << (params.cParams.windowLog + 2); - zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize); - zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize); /* one job size must be at least overlap */ - DEBUGLOG(4, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10)); + if (zcs->targetSectionSize < ZSTDMT_JOBSIZE_MIN) zcs->targetSectionSize = ZSTDMT_JOBSIZE_MIN; + if (zcs->targetSectionSize < zcs->targetDictSize) zcs->targetSectionSize = zcs->targetDictSize; /* job size must be >= overlap size */ + DEBUGLOG(4, "Job Size : %u KB", (U32)(zcs->targetSectionSize>>10)); zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize; DEBUGLOG(4, "inBuff Size : %u KB", (U32)(zcs->inBuffSize>>10)); ZSTDMT_setBufferSize(zcs->bufPool, MAX(zcs->inBuffSize, ZSTD_compressBound(zcs->targetSectionSize)) ); @@ -826,7 +835,7 @@ size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize) { ZSTD_CCtx_params cctxParams = mtctx->params; - DEBUGLOG(5, "ZSTDMT_initCStream_advanced"); + DEBUGLOG(5, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize); cctxParams.cParams = params.cParams; cctxParams.fParams = params.fParams; return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dm_auto, NULL, @@ -1008,13 +1017,15 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, DEBUGLOG(5, "ZSTDMT_compressStream_generic"); assert(output->pos <= output->size); assert(input->pos <= input->size); + + if (mtctx->singleThreaded) { /* delegate to single-thread (synchronous) */ + return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp); + } + if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) { /* current frame being ended. Only flush/end are allowed */ return ERROR(stage_wrong); } - if (mtctx->params.nbThreads==1) { /* delegate to single-thread (synchronous) */ - return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp); - } /* single-pass shortcut (note : synchronous-mode) */ if ( (mtctx->nextJobID == 0) /* just started */ @@ -1083,32 +1094,32 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu } -static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned endFrame) +static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned endFrame) { - size_t const srcSize = zcs->inBuff.filled - zcs->dictSize; + size_t const srcSize = mtctx->inBuff.filled - mtctx->dictSize; - if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded)) - && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { - CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) ); + if ( ((srcSize > 0) || (endFrame && !mtctx->frameEnded)) + && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) { + CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) ); } /* check if there is any data available to flush */ - return ZSTDMT_flushNextJob(zcs, output, 1 /* blockToFlush */); + return ZSTDMT_flushNextJob(mtctx, output, 1 /* blockToFlush */); } -size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output) +size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output) { DEBUGLOG(5, "ZSTDMT_flushStream"); - if (zcs->params.nbThreads==1) - return ZSTD_flushStream(zcs->cctxPool->cctx[0], output); - return ZSTDMT_flushStream_internal(zcs, output, 0 /* endFrame */); + if (mtctx->singleThreaded) + return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output); + return ZSTDMT_flushStream_internal(mtctx, output, 0 /* endFrame */); } -size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output) +size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output) { DEBUGLOG(4, "ZSTDMT_endStream"); - if (zcs->params.nbThreads==1) - return ZSTD_endStream(zcs->cctxPool->cctx[0], output); - return ZSTDMT_flushStream_internal(zcs, output, 1 /* endFrame */); + if (mtctx->singleThreaded) + return ZSTD_endStream(mtctx->cctxPool->cctx[0], output); + return ZSTDMT_flushStream_internal(mtctx, output, 1 /* endFrame */); } diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index a7ce7c7d..269c54b1 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -60,8 +60,8 @@ ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /* === Advanced functions and parameters === */ -#ifndef ZSTDMT_SECTION_SIZE_MIN -# define ZSTDMT_SECTION_SIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */ +#ifndef ZSTDMT_JOBSIZE_MIN +# define ZSTDMT_JOBSIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */ #endif ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, @@ -112,7 +112,10 @@ ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value); -size_t ZSTDMT_initializeCCtxParameters(ZSTD_CCtx_params* params, unsigned nbThreads); +/* ZSTDMT_CCtxParam_setNbThreads() + * Set nbThreads, and clamp it correctly, + * but also reset jobSize and overlapLog */ +size_t ZSTDMT_CCtxParam_setNbThreads(ZSTD_CCtx_params* params, unsigned nbThreads); /*! ZSTDMT_initCStream_internal() : * Private use only. Init streaming operation. diff --git a/lib/zstd.h b/lib/zstd.h index 6b7bfd03..f07c9eed 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1059,7 +1059,8 @@ typedef enum { /*! ZSTD_CCtx_setParameter() : * Set one compression parameter, selected by enum ZSTD_cParameter. * Note : when `value` is an enum, cast it to unsigned for proper type checking. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). */ + * @result : informational value (typically, the one being set, possibly corrected), + * or an error code (which can be tested with ZSTD_isError()). */ ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value); /*! ZSTD_CCtx_setPledgedSrcSize() : diff --git a/programs/bench.c b/programs/bench.c index 5093f110..6525f337 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -416,10 +416,12 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos); if (u>5) { int n; + DISPLAY("origin: "); for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]); for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); DISPLAY(" \n"); + DISPLAY("decode: "); for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)resultBuffer)[u+n]); DISPLAY(" :%02X: ", ((const BYTE*)resultBuffer)[u]); for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)resultBuffer)[u+n]); diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 384858ed..4cdd6235 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -473,6 +473,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo DISPLAYLEVEL(3, "test%3i : digested dictionary : ", testNb++); { ZSTD_CDict* const cdict = ZSTD_createCDict(dictionary.start, dictionary.filled, 1 /*byRef*/ ); size_t const initError = ZSTD_initCStream_usingCDict(zc, cdict); + DISPLAYLEVEL(5, "ZSTD_initCStream_usingCDict result : %u ", (U32)initError); if (ZSTD_isError(initError)) goto _output_error; cSize = 0; outBuff.dst = compressedBuffer; @@ -481,10 +482,13 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo inBuff.src = CNBuffer; inBuff.size = CNBufferSize; inBuff.pos = 0; + DISPLAYLEVEL(5, "- starting ZSTD_compressStream "); CHECK_Z( ZSTD_compressStream(zc, &outBuff, &inBuff) ); if (inBuff.pos != inBuff.size) goto _output_error; /* entire input should be consumed */ - { size_t const r = ZSTD_endStream(zc, &outBuff); - if (r != 0) goto _output_error; } /* error, or some data not flushed */ + { size_t const r = ZSTD_endStream(zc, &outBuff); + DISPLAYLEVEL(5, "- ZSTD_endStream result : %u ", (U32)r); + if (r != 0) goto _output_error; /* error, or some data not flushed */ + } cSize = outBuff.pos; ZSTD_freeCDict(cdict); DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/CNBufferSize*100);