diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 74856823..9b743c34 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -801,10 +801,12 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long /* These parameters are only useful if multi-threading is enabled (ZSTD_MULTITHREAD). * They return an error otherwise. */ ZSTD_p_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. - * Triggers asynchronous mode, even with nbWorkers = 1. - * Can only be set to a value >= 1 if ZSTD_MULTITHREAD is enabled. - * More threads improve speed, but also increase memory usage. - * Default value is `0`, aka "blocking mode" : no worker is spawned, compression is performed inside Caller's thread */ + * When nbWorkers >= 1, triggers asynchronous mode : + * ZSTD_compress_generic() consumes some input, flush some output if possible, and immediately gives back control to caller, + * while compression work is performed in parallel, within worker threads. + * (note : a strong exception to this rule is when first invocation sets ZSTD_e_end : it becomes a blocking call). + * More workers improve speed, but also increase memory usage. + * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */ ZSTD_p_jobSize, /* Size of a compression job. This value is only enforced in streaming (non-blocking) mode. * Each compression job is completed in parallel, so indirectly controls the nb of active threads. * 0 means default, which is dynamically determined based on compression parameters. @@ -854,8 +856,10 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
 

Set one compression parameter, selected by enum ZSTD_cParameter. + Setting a parameter is generally only possible during frame initialization (before starting compression), + except for a few exceptions which can be updated during compression: compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. Note : when `value` is an enum, cast it to unsigned for proper type checking. - @result : informational value (typically, the one being set, possibly corrected), + @result : informational value (typically, value being set clamped correctly), or an error code (which can be tested with ZSTD_isError()).


@@ -1025,9 +1029,10 @@ size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t
size_t ZSTD_CCtx_setParametersUsingCCtxParams(
         ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
 

Apply a set of ZSTD_CCtx_params to the compression context. - This must be done before the dictionary is loaded. - The pledgedSrcSize is treated as unknown. - Multithreading parameters are applied only if nbWorkers >= 1. + This can be done even after compression is started, + if nbWorkers==0, this will have no impact until a new compression is started. + if nbWorkers>=1, new parameters will be picked up at next job, + with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).


diff --git a/lib/common/fse_decompress.c b/lib/common/fse_decompress.c index 8e3f0035..4c66c3b7 100644 --- a/lib/common/fse_decompress.c +++ b/lib/common/fse_decompress.c @@ -139,8 +139,8 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned { U32 u; for (u=0; uappliedParams.cParams.strategy != ZSTD_fast) { U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog; if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2) - ZSTD_preserveUnsortedMark(ms->chainTable, chainSize, reducerValue); - ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); + ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); + else + ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); } if (ms->hashLog3) { diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 22213c22..9da2cbfe 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -33,6 +33,12 @@ extern "C" { #define kSearchStrength 8 #define HASH_READ_SIZE 8 #define ZSTD_CLEVEL_CUSTOM 999 +#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted". + It could be confused for a real successor at index "1", if sorted as larger than its predecessor. + It's not a big deal though : candidate will just be sorted again. + Additionnally, candidate position 1 will be lost. + But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */ /*-************************************* diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 7cb53b2e..0dab117a 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -15,35 +15,6 @@ /*-************************************* * Binary Tree search ***************************************/ -#define ZSTD_DUBT_UNSORTED_MARK 1 /* note : index 1 will now be confused with "unsorted" if sorted as larger than its predecessor. - It's not a big deal though : the candidate will just be considered unsorted, and be sorted again. - Additionnally, candidate position 1 will be lost. - But candidate 1 cannot hide a large tree of candidates, so it's a moderate loss. - The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled by a table re-use using a different strategy */ - -/*! ZSTD_preserveUnsortedMark() : - * pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK before ZSTD_reduceTable() - * so that combined operation preserves its value. - * Without it, ZSTD_DUBT_UNSORTED_MARK==1 would be squashed to 0. - * As a consequence, the list of unsorted elements would stop on the first element, - * removing candidates, resulting in a negligible loss to compression ratio - * (since overflow protection with ZSTD_reduceTable() is relatively rare). - * Another potential risk is that a position will be promoted from *unsorted* - * to *sorted=>smaller:0*, meaning the next candidate will be considered smaller. - * This could be wrong, and result in data corruption. - * On second thought, this corruption might be impossible, - * because unsorted elements are always at the beginning of the list, - * and squashing to zero reduce the list to a single element, - * which needs to be sorted anyway. - * I haven't spent much thoughts into this possible scenario, - * and just felt it was safer to implement ZSTD_preserveUnsortedMark() */ -void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue) -{ - U32 u; - for (u=0; udtable; return 0; case set_repeat: if (!flagRepeatTable) return ERROR(corruption_detected); @@ -936,13 +940,14 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); assert(ofBits <= MaxOff); - if (MEM_32bits() && longOffsets) { - U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1); + if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { + U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); - if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream); + BIT_reloadDStream(&seqState->DStream); if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ } else { - offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); } } @@ -955,7 +960,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; seqState->prevOffset[1] = seqState->prevOffset[0]; seqState->prevOffset[0] = offset = temp; - } else { + } else { /* offset == 0 */ offset = seqState->prevOffset[0]; } } else { @@ -967,16 +972,16 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l } seq.matchLength = ML_base[mlCode] - + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ + + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */ if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) BIT_reloadDStream(&seqState->DStream); if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) BIT_reloadDStream(&seqState->DStream); - /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */ + /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); seq.litLength = LL_base[llCode] - + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ + + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); @@ -1364,13 +1369,13 @@ static size_t ZSTD_decompressSequencesLong( FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); /* prepare in advance */ - for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNbfParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))); - /* windowSize could be any value at this point, since it is only validated - * in the streaming API. - */ DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); @@ -1429,7 +1430,9 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, ip += litCSize; srcSize -= litCSize; } - if (frame && dctx->fParams.windowSize > (1<<23)) + if ( frame /* windowSize exists */ + && (dctx->fParams.windowSize > (1<<24)) + && MEM_64bits() /* x86 benefits less from long mode than x64 */ ) return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, isLongOffset); return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, isLongOffset); }