Merge pull request #1041 from facebook/fasterFast

Negative compression levels
2018-03-13 21:32:46 -07:00 · 2018-03-13 21:32:46 -07:00 · 4c5cbac179
parent bd7bb94361 50f763ec44
commit 4c5cbac179
12 changed files with 293 additions and 192 deletions
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@ -753,7 +753,9 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
    </b>/* compression parameters */<b>
    ZSTD_p_compressionLevel=100, </b>/* Update all compression parameters according to pre-defined cLevel table<b>
                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
-                              * Special: value 0 means "do not change cLevel". */
+                              * Special: value 0 means "do not change cLevel".
+                              * Note 1 : it's possible to pass a negative compression level by casting it to unsigned type.
+                              * Note 2 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */
    ZSTD_p_windowLog,        </b>/* Maximum allowed back-reference distance, expressed as power of 2.<b>
                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
                              * Special: value 0 means "do not change windowLog".
@ -780,9 +782,13 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
                              * Note that currently, for all strategies < btopt, effective minimum is 4.
                              * Note that currently, for all strategies > fast, effective maximum is 6.
                              * Special: value 0 means "do not change minMatchLength". */
-    ZSTD_p_targetLength,     </b>/* Only useful for strategies >= btopt.<b>
-                              * Length of Match considered "good enough" to stop search.
-                              * Larger values make compression stronger and slower.
+    ZSTD_p_targetLength,     </b>/* Impact of this field depends on strategy.<b>
+                              * For strategies btopt & btultra:
+                              *     Length of Match considered "good enough" to stop search.
+                              *     Larger values make compression stronger, and slower.
+                              * For strategy fast:
+                              *     Distance between match sampling.
+                              *     Larger values make compression faster, and weaker.
                              * Special: value 0 means "do not change targetLength". */
    ZSTD_p_compressionStrategy, </b>/* See ZSTD_strategy enum definition.<b>
                              * Cast selected strategy as unsigned for ZSTD_CCtx_setParameter() compatibility.
@ -807,17 +813,25 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
                              * (note : a strong exception to this rule is when first invocation sets ZSTD_e_end : it becomes a blocking call).
                              * More workers improve speed, but also increase memory usage.
                              * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */
-    ZSTD_p_jobSize,          </b>/* Size of a compression job. This value is only enforced in streaming (non-blocking) mode.<b>
-                              * Each compression job is completed in parallel, so indirectly controls the nb of active threads.
+    ZSTD_p_jobSize,          </b>/* Size of a compression job. This value is enforced only in non-blocking mode.<b>
+                              * Each compression job is completed in parallel, so this value indirectly controls the nb of active threads.
                              * 0 means default, which is dynamically determined based on compression parameters.
-                              * Job size must be a minimum of overlapSize, or 1 KB, whichever is largest
+                              * Job size must be a minimum of overlapSize, or 1 MB, whichever is largest.
                              * The minimum size is automatically and transparently enforced */
    ZSTD_p_overlapSizeLog,   </b>/* Size of previous input reloaded at the beginning of each job.<b>
                              * 0 => no overlap, 6(default) => use 1/8th of windowSize, >=9 => use full windowSize */

    </b>/* advanced parameters - may not remain available after API update */<b>
+
+    ZSTD_p_compressLiterals=1000, </b>/* control huffman compression of literals (enabled) by default.<b>
+                              * disabling it improves speed and decreases compression ratio by a large amount.
+                              * note : this setting is updated when changing compression level.
+                              *        positive compression levels set literalCompression to 1.
+                              *        negative compression levels set literalCompression to 0. */
+
    ZSTD_p_forceMaxWindow=1100, </b>/* Force back-reference distances to remain < windowSize,<b>
                              * even when referencing into Dictionary content (default:0) */
+
    ZSTD_p_enableLongDistanceMatching=1200, </b>/* Enable long distance matching.<b>
                                         * This parameter is designed to improve the compression
                                         * ratio for large inputs with long distance matches.
@ -877,23 +891,21 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
 <pre><b>size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
 size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
 size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode);
-</b><p>  Create an internal CDict from dict buffer.
-  Decompression will have to use same buffer.
+</b><p>  Create an internal CDict from `dict` buffer.
+  Decompression will have to use same dictionary.
 @result : 0, or an error code (which can be tested with ZSTD_isError()).
-  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
-            meaning "return to no-dictionary mode".
-  Note 1 : `dict` content will be copied internally. Use
-            ZSTD_CCtx_loadDictionary_byReference() to reference dictionary
-            content instead. The dictionary buffer must then outlive its
-            users.
+  Special: Adding a NULL (or 0-size) dictionary invalidates previous dictionary,
+           meaning "return to no-dictionary mode".
+  Note 1 : Dictionary will be used for all future compression jobs.
+           To return to "no-dictionary" situation, load a NULL dictionary
  Note 2 : Loading a dictionary involves building tables, which are dependent on compression parameters.
           For this reason, compression parameters cannot be changed anymore after loading a dictionary.
-           It's also a CPU-heavy operation, with non-negligible impact on latency.
-  Note 3 : Dictionary will be used for all future compression jobs.
-           To return to "no-dictionary" situation, load a NULL dictionary
-  Note 5 : Use ZSTD_CCtx_loadDictionary_advanced() to select how dictionary
-           content will be interpreted.
- 
+           It's also a CPU consuming operation, with non-negligible impact on latency.
+  Note 3 :`dict` content will be copied internally.
+           Use ZSTD_CCtx_loadDictionary_byReference() to reference dictionary content instead.
+           In such a case, dictionary buffer must outlive its users.
+  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
+           to precisely select how dictionary content must be interpreted. 
 </p></pre><BR>

 <pre><b>size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
@ -905,8 +917,7 @@ size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size
  Special : adding a NULL CDict means "return to no-dictionary mode".
  Note 1 : Currently, only one dictionary can be managed.
           Adding a new dictionary effectively "discards" any previous one.
-  Note 2 : CDict is just referenced, its lifetime must outlive CCtx.
- 
+  Note 2 : CDict is just referenced, its lifetime must outlive CCtx. 
 </p></pre><BR>

 <pre><b>size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize);
@ -917,13 +928,12 @@ size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t
  Subsequent compression jobs will be done without prefix (if none is explicitly referenced).
  If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict instead.
 @result : 0, or an error code (which can be tested with ZSTD_isError()).
-  Special : Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
  Note 1 : Prefix buffer is referenced. It must outlive compression job.
  Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters.
-           It's a CPU-heavy operation, with non-negligible impact on latency.
-  Note 3 : By default, the prefix is treated as raw content
-           (ZSTD_dm_rawContent). Use ZSTD_CCtx_refPrefix_advanced() to alter
-           dictMode. 
+           It's a CPU consuming operation, with non-negligible impact on latency.
+  Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
+           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode. 
 </p></pre><BR>

 <pre><b>typedef enum {
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@ -146,7 +146,7 @@ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStor
 static ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
        ZSTD_CCtx_params CCtxParams, U64 srcSizeHint, size_t dictSize)
 {
-    DEBUGLOG(4, "ZSTD_getCParamsFromCCtxParams: srcSize = %u, dictSize = %u",
+    DEBUGLOG(5, "ZSTD_getCParamsFromCCtxParams: srcSize = %u, dictSize = %u",
                (U32)srcSizeHint, (U32)dictSize);
    return (CCtxParams.compressionLevel == ZSTD_CLEVEL_CUSTOM) ?
                CCtxParams.cParams :
@ -252,6 +252,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
    case ZSTD_p_minMatch:
    case ZSTD_p_targetLength:
    case ZSTD_p_compressionStrategy:
+    case ZSTD_p_compressLiterals:
        return 1;

    case ZSTD_p_format:
@ -305,6 +306,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v
        }
        return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);

+    case ZSTD_p_compressLiterals:
    case ZSTD_p_contentSizeFlag:
    case ZSTD_p_checksumFlag:
    case ZSTD_p_dictIDFlag:
@ -354,11 +356,16 @@ size_t ZSTD_CCtxParam_setParameter(
        CCtxParams->format = (ZSTD_format_e)value;
        return (size_t)CCtxParams->format;

-    case ZSTD_p_compressionLevel :
-        if ((int)value > ZSTD_maxCLevel()) value = ZSTD_maxCLevel();
-        if (value)  /* 0 : does not change current level */
-            CCtxParams->compressionLevel = value;
-        return CCtxParams->compressionLevel;
+    case ZSTD_p_compressionLevel : {
+        int cLevel = (int)value;  /* cast expected to restore negative sign */
+        if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();
+        if (cLevel) {  /* 0 : does not change current level */
+            CCtxParams->disableLiteralCompression = (cLevel<0);  /* negative levels disable huffman */
+            CCtxParams->compressionLevel = cLevel;
+        }
+        if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel;
+        return 0;  /* return type (size_t) cannot represent negative values */
+    }

    case ZSTD_p_windowLog :
        DEBUGLOG(4, "ZSTD_CCtxParam_setParameter: set windowLog=%u", value);
@ -417,6 +424,10 @@ size_t ZSTD_CCtxParam_setParameter(
        }
        return (size_t)CCtxParams->cParams.strategy;

+    case ZSTD_p_compressLiterals:
+        CCtxParams->disableLiteralCompression = !value;
+        return !!value;
+
    case ZSTD_p_contentSizeFlag :
        /* Content size written in frame header _when known_ (default:1) */
        DEBUGLOG(4, "set content size flag = %u", (value>0));
@ -1376,7 +1387,7 @@ static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }

 static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy,
                                     ZSTD_entropyCTables_t* nextEntropy,
-                                     ZSTD_strategy strategy,
+                                     ZSTD_strategy strategy, int disableLiteralCompression,
                                     void* dst, size_t dstCapacity,
                               const void* src, size_t srcSize,
                                     U32* workspace, const int bmi2)
@ -1388,15 +1399,22 @@ static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy,
    symbolEncodingType_e hType = set_compressed;
    size_t cLitSize;

+    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)",
+                disableLiteralCompression);
+
    /* Prepare nextEntropy assuming reusing the existing table */
    nextEntropy->hufCTable_repeatMode = prevEntropy->hufCTable_repeatMode;
    memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable,
           sizeof(prevEntropy->hufCTable));

+    if (disableLiteralCompression)
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+
    /* small ? don't even attempt compression (speed opt) */
 #   define LITERAL_NOENTROPY 63
-    {   size_t const minLitSize = prevEntropy->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
-        if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    {   size_t const minLitSize = (prevEntropy->hufCTable_repeatMode == HUF_repeat_valid) ? 6 : LITERAL_NOENTROPY;
+        if (srcSize <= minLitSize)
+            return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
    }

    if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall);   /* not enough space for compression */
@ -1604,11 +1622,11 @@ size_t ZSTD_encodeSequences(
 MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
                              ZSTD_entropyCTables_t const* prevEntropy,
                              ZSTD_entropyCTables_t* nextEntropy,
-                              ZSTD_compressionParameters const* cParams,
+                              ZSTD_CCtx_params const* cctxParams,
                              void* dst, size_t dstCapacity, U32* workspace,
                              const int bmi2)
 {
-    const int longOffsets = cParams->windowLog > STREAM_ACCUMULATOR_MIN;
+    const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
    U32 count[MaxSeq+1];
    FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
    FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
@ -1629,9 +1647,12 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
    /* Compress literals */
    {   const BYTE* const literals = seqStorePtr->litStart;
        size_t const litSize = seqStorePtr->lit - literals;
-        size_t const cSize = ZSTD_compressLiterals(prevEntropy, nextEntropy,
-                cParams->strategy, op, dstCapacity, literals, litSize,
-                workspace, bmi2);
+        size_t const cSize = ZSTD_compressLiterals(
+                                    prevEntropy, nextEntropy,
+                                    cctxParams->cParams.strategy, cctxParams->disableLiteralCompression,
+                                    op, dstCapacity,
+                                    literals, litSize,
+                                    workspace, bmi2);
        if (ZSTD_isError(cSize))
          return cSize;
        assert(cSize <= dstCapacity);
@ -1722,28 +1743,24 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
 MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
                              ZSTD_entropyCTables_t const* prevEntropy,
                              ZSTD_entropyCTables_t* nextEntropy,
-                              ZSTD_compressionParameters const* cParams,
+                              ZSTD_CCtx_params const* cctxParams,
                              void* dst, size_t dstCapacity,
                              size_t srcSize, U32* workspace, int bmi2)
 {
    size_t const cSize = ZSTD_compressSequences_internal(
-            seqStorePtr, prevEntropy, nextEntropy, cParams, dst, dstCapacity,
+            seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity,
            workspace, bmi2);
-    /* If the srcSize <= dstCapacity, then there is enough space to write a
-     * raw uncompressed block. Since we ran out of space, the block must not
-     * be compressible, so fall back to a raw uncompressed block.
+    /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
+     * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
     */
-    int const uncompressibleError = (cSize == ERROR(dstSize_tooSmall)) && (srcSize <= dstCapacity);
-    if (ZSTD_isError(cSize) && !uncompressibleError)
-        return cSize;
+    if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
+        return 0;  /* block not compressed */
+    if (ZSTD_isError(cSize)) return cSize;

    /* Check compressibility */
-    {   size_t const minGain = ZSTD_minGain(srcSize);  /* note : fixed formula, maybe should depend on compression level, or strategy */
-        size_t const maxCSize = srcSize - minGain;
-        if (cSize >= maxCSize || uncompressibleError) {
-            return 0;  /* block not compressed */
-    }   }
-    assert(!ZSTD_isError(cSize));
+    {   size_t const maxCSize = srcSize - ZSTD_minGain(srcSize);  /* note : fixed formula, maybe should depend on compression level, or strategy */
+        if (cSize >= maxCSize) return 0;  /* block not compressed */
+    }

    /* We check that dictionaries have offset codes available for the first
     * block. After the first block, the offcode table might not have large
@ -1813,7 +1830,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
    /* select and store sequences */
    {   U32 const extDict = ZSTD_window_hasExtDict(ms->window);
        size_t lastLLSize;
-        { int i; for (i = 0; i < ZSTD_REP_NUM; ++i) zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; }
+        {   int i;
+            for (i = 0; i < ZSTD_REP_NUM; ++i)
+                zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
+        }
        if (zc->appliedParams.ldmParams.enableLdm) {
            size_t const nbSeq =
                ZSTD_ldm_generateSequences(&zc->ldmState, zc->ldmSequences,
@ -1834,7 +1854,11 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
    }   }

    /* encode sequences and literals */
-    {   size_t const cSize = ZSTD_compressSequences(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams.cParams, dst, dstCapacity, srcSize, zc->entropyWorkspace, zc->bmi2);
+    {   size_t const cSize = ZSTD_compressSequences(&zc->seqStore,
+                                &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
+                                &zc->appliedParams,
+                                dst, dstCapacity,
+                                srcSize, zc->entropyWorkspace, zc->bmi2);
        if (ZSTD_isError(cSize) || cSize == 0) return cSize;
        /* confirm repcodes and entropy tables */
        {   ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
@ -2422,20 +2446,20 @@ size_t ZSTD_compress_advanced_internal(
    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
 }

-size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize,
+size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize,
                               const void* dict, size_t dictSize, int compressionLevel)
 {
-    ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize ? srcSize : 1, dict ? dictSize : 0);
-    params.fParams.contentSizeFlag = 1;
-    DEBUGLOG(4, "ZSTD_compress_usingDict (level=%i, srcSize=%u, dictSize=%u)",
-                compressionLevel, (U32)srcSize, (U32)dictSize);
-    return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
+    ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize ? srcSize : 1, dict ? dictSize : 0);
+    ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
+    assert(params.fParams.contentSizeFlag == 1);
+    ZSTD_CCtxParam_setParameter(&cctxParams, ZSTD_p_compressLiterals, compressionLevel>=0);
+    return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams);
 }

-size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
+size_t ZSTD_compressCCtx (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
 {
    DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (U32)srcSize);
-    return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
+    return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
 }

 size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
@ -2743,29 +2767,30 @@ size_t ZSTD_CStreamOutSize(void)
    return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
 }

-static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs,
+static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx,
                    const void* const dict, size_t const dictSize, ZSTD_dictMode_e const dictMode,
                    const ZSTD_CDict* const cdict,
                    ZSTD_CCtx_params const params, unsigned long long const pledgedSrcSize)
 {
-    DEBUGLOG(4, "ZSTD_resetCStream_internal");
+    DEBUGLOG(4, "ZSTD_resetCStream_internal (disableLiteralCompression=%i)",
+                params.disableLiteralCompression);
    /* params are supposed to be fully validated at this point */
    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */

-    CHECK_F( ZSTD_compressBegin_internal(zcs,
+    CHECK_F( ZSTD_compressBegin_internal(cctx,
                                         dict, dictSize, dictMode,
                                         cdict,
                                         params, pledgedSrcSize,
                                         ZSTDb_buffered) );

-    zcs->inToCompress = 0;
-    zcs->inBuffPos = 0;
-    zcs->inBuffTarget = zcs->blockSize
-                      + (zcs->blockSize == pledgedSrcSize);   /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */
-    zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
-    zcs->streamStage = zcss_load;
-    zcs->frameEnded = 0;
+    cctx->inToCompress = 0;
+    cctx->inBuffPos = 0;
+    cctx->inBuffTarget = cctx->blockSize
+                      + (cctx->blockSize == pledgedSrcSize);   /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */
+    cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
+    cctx->streamStage = zcss_load;
+    cctx->frameEnded = 0;
    return 0;   /* ready to go */
 }

@ -3099,10 +3124,10 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
            cctx->appliedParams.nbWorkers = params.nbWorkers;
        } else
 #endif
-        {   CHECK_F( ZSTD_resetCStream_internal(
-                             cctx, prefixDict.dict, prefixDict.dictSize,
-                             prefixDict.dictMode, cctx->cdict, params,
-                             cctx->pledgedSrcSizePlusOne-1) );
+        {   CHECK_F( ZSTD_resetCStream_internal(cctx,
+                            prefixDict.dict, prefixDict.dictSize, prefixDict.dictMode,
+                            cctx->cdict,
+                            params, cctx->pledgedSrcSizePlusOne-1) );
            assert(cctx->streamStage == zcss_load);
            assert(cctx->appliedParams.nbWorkers == 0);
    }   }
@ -3178,11 +3203,11 @@ int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
 {   /* "default" - guarantees a monotonically increasing memory budget */
    /* W,  C,  H,  S,  L, TL, strat */
-    { 18, 12, 12,  1,  7, 16, ZSTD_fast    },  /* level  0 - never used */
-    { 19, 13, 14,  1,  7, 16, ZSTD_fast    },  /* level  1 */
-    { 19, 15, 16,  1,  6, 16, ZSTD_fast    },  /* level  2 */
-    { 20, 16, 17,  1,  5, 16, ZSTD_dfast   },  /* level  3 */
-    { 20, 17, 18,  1,  5, 16, ZSTD_dfast   },  /* level  4 */
+    { 19, 12, 13,  1,  6,  1, ZSTD_fast    },  /* base for negative levels */
+    { 19, 13, 14,  1,  7,  1, ZSTD_fast    },  /* level  1 */
+    { 19, 15, 16,  1,  6,  1, ZSTD_fast    },  /* level  2 */
+    { 20, 16, 17,  1,  5,  8, ZSTD_dfast   },  /* level  3 */
+    { 20, 17, 18,  1,  5,  8, ZSTD_dfast   },  /* level  4 */
    { 20, 17, 18,  2,  5, 16, ZSTD_greedy  },  /* level  5 */
    { 21, 17, 19,  2,  5, 16, ZSTD_lazy    },  /* level  6 */
    { 21, 18, 19,  3,  5, 16, ZSTD_lazy    },  /* level  7 */
@ -3191,9 +3216,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
    { 21, 19, 21,  4,  5, 16, ZSTD_lazy2   },  /* level 10 */
    { 22, 20, 22,  4,  5, 16, ZSTD_lazy2   },  /* level 11 */
    { 22, 20, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 12 */
-    { 22, 21, 22,  4,  5, 16, ZSTD_btlazy2 },  /* level 13 */
-    { 22, 21, 22,  5,  5, 16, ZSTD_btlazy2 },  /* level 14 */
-    { 22, 22, 22,  6,  5, 16, ZSTD_btlazy2 },  /* level 15 */
+    { 22, 21, 22,  4,  5, 32, ZSTD_btlazy2 },  /* level 13 */
+    { 22, 21, 22,  5,  5, 32, ZSTD_btlazy2 },  /* level 14 */
+    { 22, 22, 22,  6,  5, 32, ZSTD_btlazy2 },  /* level 15 */
    { 22, 21, 22,  4,  5, 48, ZSTD_btopt   },  /* level 16 */
    { 23, 22, 22,  4,  4, 48, ZSTD_btopt   },  /* level 17 */
    { 23, 22, 22,  5,  3, 64, ZSTD_btopt   },  /* level 18 */
@ -3204,8 +3229,8 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
 },
 {   /* for srcSize <= 256 KB */
    /* W,  C,  H,  S,  L,  T, strat */
-    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 - not used */
-    { 18, 13, 14,  1,  6,  8, ZSTD_fast    },  /* level  1 */
+    { 18, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 18, 13, 14,  1,  6,  1, ZSTD_fast    },  /* level  1 */
    { 18, 14, 13,  1,  5,  8, ZSTD_dfast   },  /* level  2 */
    { 18, 16, 15,  1,  5,  8, ZSTD_dfast   },  /* level  3 */
    { 18, 15, 17,  1,  5,  8, ZSTD_greedy  },  /* level  4.*/
@ -3230,9 +3255,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
 },
 {   /* for srcSize <= 128 KB */
    /* W,  C,  H,  S,  L,  T, strat */
-    { 17, 12, 12,  1,  7,  8, ZSTD_fast    },  /* level  0 - not used */
-    { 17, 12, 13,  1,  6,  8, ZSTD_fast    },  /* level  1 */
-    { 17, 13, 16,  1,  5,  8, ZSTD_fast    },  /* level  2 */
+    { 17, 12, 12,  1,  5,  1, ZSTD_fast    },  /* level  0 - not used */
+    { 17, 12, 13,  1,  6,  1, ZSTD_fast    },  /* level  1 */
+    { 17, 13, 16,  1,  5,  1, ZSTD_fast    },  /* level  2 */
    { 17, 16, 16,  2,  5,  8, ZSTD_dfast   },  /* level  3 */
    { 17, 13, 15,  3,  4,  8, ZSTD_greedy  },  /* level  4 */
    { 17, 15, 17,  4,  4,  8, ZSTD_greedy  },  /* level  5 */
@ -3256,9 +3281,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
 },
 {   /* for srcSize <= 16 KB */
    /* W,  C,  H,  S,  L,  T, strat */
-    { 14, 12, 12,  1,  7,  6, ZSTD_fast    },  /* level  0 - not used */
-    { 14, 14, 14,  1,  6,  6, ZSTD_fast    },  /* level  1 */
-    { 14, 14, 14,  1,  4,  6, ZSTD_fast    },  /* level  2 */
+    { 14, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 14, 14, 14,  1,  6,  1, ZSTD_fast    },  /* level  1 */
+    { 14, 14, 14,  1,  4,  1, ZSTD_fast    },  /* level  2 */
    { 14, 14, 14,  1,  4,  6, ZSTD_dfast   },  /* level  3.*/
    { 14, 14, 14,  4,  4,  6, ZSTD_greedy  },  /* level  4.*/
    { 14, 14, 14,  3,  4,  6, ZSTD_lazy    },  /* level  5.*/
@ -3290,12 +3315,14 @@ ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long l
    size_t const addedSize = srcSizeHint ? 0 : 500;
    U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : (U64)-1;
    U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);   /* intentional underflow for srcSizeHint == 0 */
-    DEBUGLOG(4, "ZSTD_getCParams: cLevel=%i, srcSize=%u, dictSize=%u => table %u",
-                compressionLevel, (U32)srcSizeHint, (U32)dictSize, tableID);
-    if (compressionLevel <= 0) compressionLevel = ZSTD_CLEVEL_DEFAULT;   /* 0 == default; no negative compressionLevel yet */
-    if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
-    { ZSTD_compressionParameters const cp = ZSTD_defaultCParameters[tableID][compressionLevel];
-      return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); }
+    int row = compressionLevel;
+    DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel);
+    if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT;   /* 0 == default */
+    if (compressionLevel < 0) row = 0;   /* entry 0 is baseline for fast mode */
+    if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
+    {   ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
+        if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel);   /* acceleration factor */
+        return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); }

 }

@ -3305,6 +3332,7 @@ ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long l
 ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
    ZSTD_parameters params;
    ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize);
+    DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
    memset(&params, 0, sizeof(params));
    params.cParams = cParams;
    params.fParams.contentSizeFlag = 1;
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@ -158,6 +158,7 @@ struct ZSTD_CCtx_params_s {
    ZSTD_frameParameters fParams;

    int compressionLevel;
+    int disableLiteralCompression;
    int forceWindow;           /* force back-references to respect limit of
                                * 1<<wLog, even for dictionary */

@ -395,10 +396,12 @@ MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* co
 }

 /** ZSTD_count_2segments() :
-*   can count match length with `ip` & `match` in 2 different segments.
-*   convention : on reaching mEnd, match count continue starting from iStart
-*/
-MEM_STATIC size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
+ *  can count match length with `ip` & `match` in 2 different segments.
+ *  convention : on reaching mEnd, match count continue starting from iStart
+ */
+MEM_STATIC size_t
+ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
+                     const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
 {
    const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
    size_t const matchLength = ZSTD_count(ip, match, vEnd);
@ -408,8 +411,8 @@ MEM_STATIC size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const


 /*-*************************************
-*  Hashes
-***************************************/
+ *  Hashes
+ ***************************************/
 static const U32 prime3bytes = 506832829U;
 static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
@ -453,9 +456,9 @@ MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)

 #define ZSTD_LOWLIMIT_MAX (3U << 29) /* Max lowLimit allowed */
 /* Maximum chunk size before overflow correction needs to be called again */
-#define ZSTD_CHUNKSIZE_MAX                                                     \
-    ( ((U32)-1)                  /* Maximum ending current index */            \
-    - (1U << ZSTD_WINDOWLOG_MAX) /* Max distance from lowLimit to current */   \
+#define ZSTD_CHUNKSIZE_MAX                                                   \
+    ( ((U32)-1)                  /* Maximum ending current index */          \
+    - (1U << ZSTD_WINDOWLOG_MAX) /* Max distance from lowLimit to current */ \
    - ZSTD_LOWLIMIT_MAX)         /* Maximum beginning lowLimit */

 /**
--- a/lib/compress/zstd_fast.c
+++ b/lib/compress/zstd_fast.c
@ -42,7 +42,7 @@ FORCE_INLINE_TEMPLATE
 size_t ZSTD_compressBlock_fast_generic(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize,
-        U32 const hlog, U32 const mls)
+        U32 const hlog, U32 const stepSize, U32 const mls)
 {
    U32* const hashTable = ms->hashTable;
    const BYTE* const base = ms->window.base;
@ -77,19 +77,19 @@ size_t ZSTD_compressBlock_fast_generic(
            ip++;
            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
        } else {
-            U32 offset;
-            if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) {
-                ip += ((ip-anchor) >> kSearchStrength) + 1;
+            if ( (matchIndex <= lowestIndex)
+              || (MEM_read32(match) != MEM_read32(ip)) ) {
+                assert(stepSize >= 1);
+                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
                continue;
            }
            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
-            offset = (U32)(ip-match);
-            while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
-            offset_2 = offset_1;
-            offset_1 = offset;
-
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
-        }
+            {   U32 const offset = (U32)(ip-match);
+                while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+        }   }

        /* match found */
        ip += mLength;
@ -128,17 +128,18 @@ size_t ZSTD_compressBlock_fast(
 {
    U32 const hlog = cParams->hashLog;
    U32 const mls = cParams->searchLength;
+    U32 const stepSize = cParams->targetLength;
    switch(mls)
    {
    default: /* includes case 3 */
    case 4 :
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, 4);
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4);
    case 5 :
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, 5);
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5);
    case 6 :
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, 6);
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6);
    case 7 :
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, 7);
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7);
    }
 }

@ -146,7 +147,7 @@ size_t ZSTD_compressBlock_fast(
 static size_t ZSTD_compressBlock_fast_extDict_generic(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize,
-        U32 const hlog, U32 const mls)
+        U32 const hlog, U32 const stepSize, U32 const mls)
 {
    U32* hashTable = ms->hashTable;
    const BYTE* const base = ms->window.base;
@ -185,7 +186,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
        } else {
            if ( (matchIndex < lowestIndex) ||
                 (MEM_read32(match) != MEM_read32(ip)) ) {
-                ip += ((ip-anchor) >> kSearchStrength) + 1;
+                assert(stepSize >= 1);
+                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
                continue;
            }
            {   const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
@ -241,16 +243,17 @@ size_t ZSTD_compressBlock_fast_extDict(
 {
    U32 const hlog = cParams->hashLog;
    U32 const mls = cParams->searchLength;
+    U32 const stepSize = cParams->targetLength;
    switch(mls)
    {
    default: /* includes case 3 */
    case 4 :
-        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, 4);
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4);
    case 5 :
-        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, 5);
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5);
    case 6 :
-        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, 6);
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6);
    case 7 :
-        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, 7);
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7);
    }
 }
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@ -660,6 +660,7 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
    jobParams.cParams = params.cParams;
    jobParams.fParams = params.fParams;
    jobParams.compressionLevel = params.compressionLevel;
+    jobParams.disableLiteralCompression = params.disableLiteralCompression;

    jobParams.ldmParams = params.ldmParams;
    return jobParams;
@ -767,6 +768,7 @@ static size_t ZSTDMT_compress_advanced_internal(

    if ((nbJobs==1) | (params.nbWorkers<=1)) {   /* fallback to single-thread mode : this is a blocking invocation anyway */
        ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
+        DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
        if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
        return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
    }
@ -909,8 +911,8 @@ size_t ZSTDMT_initCStream_internal(
        const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
        unsigned long long pledgedSrcSize)
 {
-    DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)",
-                (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx);
+    DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u, disableLiteralCompression=%i)",
+                (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx, params.disableLiteralCompression);
    /* params are supposed to be fully validated at this point */
    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
@ -928,7 +930,7 @@ size_t ZSTDMT_initCStream_internal(
    mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN);  /* do not trigger multi-threading when srcSize is too small */
    if (mtctx->singleBlockingThread) {
        ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
-        DEBUGLOG(4, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
+        DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
        assert(singleThreadParams.nbWorkers == 0);
        return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
                                         dict, dictSize, cdict,
--- a/lib/zstd.h
+++ b/lib/zstd.h
@ -390,8 +390,8 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output
 #define ZSTD_SEARCHLOG_MIN       1
 #define ZSTD_SEARCHLENGTH_MAX    7   /* only for ZSTD_fast, other strategies are limited to 6 */
 #define ZSTD_SEARCHLENGTH_MIN    3   /* only for ZSTD_btopt, other strategies are limited to 4 */
-#define ZSTD_TARGETLENGTH_MIN    4   /* only useful for btopt */
-#define ZSTD_TARGETLENGTH_MAX  999   /* only useful for btopt */
+#define ZSTD_TARGETLENGTH_MIN    1   /* only used by btopt, btultra and btfast */
+#define ZSTD_TARGETLENGTH_MAX  999   /* only used by btopt, btultra and btfast */
 #define ZSTD_LDM_MINMATCH_MIN    4
 #define ZSTD_LDM_MINMATCH_MAX 4096
 #define ZSTD_LDM_BUCKETSIZELOG_MAX 8
@ -947,7 +947,9 @@ typedef enum {
    /* compression parameters */
    ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
-                              * Special: value 0 means "do not change cLevel". */
+                              * Special: value 0 means "do not change cLevel".
+                              * Note 1 : it's possible to pass a negative compression level by casting it to unsigned type.
+                              * Note 2 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */
    ZSTD_p_windowLog,        /* Maximum allowed back-reference distance, expressed as power of 2.
                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
                              * Special: value 0 means "do not change windowLog".
@ -974,9 +976,13 @@ typedef enum {
                              * Note that currently, for all strategies < btopt, effective minimum is 4.
                              * Note that currently, for all strategies > fast, effective maximum is 6.
                              * Special: value 0 means "do not change minMatchLength". */
-    ZSTD_p_targetLength,     /* Only useful for strategies >= btopt.
-                              * Length of Match considered "good enough" to stop search.
-                              * Larger values make compression stronger and slower.
+    ZSTD_p_targetLength,     /* Impact of this field depends on strategy.
+                              * For strategies btopt & btultra:
+                              *     Length of Match considered "good enough" to stop search.
+                              *     Larger values make compression stronger, and slower.
+                              * For strategy fast:
+                              *     Distance between match sampling.
+                              *     Larger values make compression faster, and weaker.
                              * Special: value 0 means "do not change targetLength". */
    ZSTD_p_compressionStrategy, /* See ZSTD_strategy enum definition.
                              * Cast selected strategy as unsigned for ZSTD_CCtx_setParameter() compatibility.
@ -1001,17 +1007,25 @@ typedef enum {
                              * (note : a strong exception to this rule is when first invocation sets ZSTD_e_end : it becomes a blocking call).
                              * More workers improve speed, but also increase memory usage.
                              * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */
-    ZSTD_p_jobSize,          /* Size of a compression job. This value is only enforced in streaming (non-blocking) mode.
-                              * Each compression job is completed in parallel, so indirectly controls the nb of active threads.
+    ZSTD_p_jobSize,          /* Size of a compression job. This value is enforced only in non-blocking mode.
+                              * Each compression job is completed in parallel, so this value indirectly controls the nb of active threads.
                              * 0 means default, which is dynamically determined based on compression parameters.
-                              * Job size must be a minimum of overlapSize, or 1 KB, whichever is largest
+                              * Job size must be a minimum of overlapSize, or 1 MB, whichever is largest.
                              * The minimum size is automatically and transparently enforced */
    ZSTD_p_overlapSizeLog,   /* Size of previous input reloaded at the beginning of each job.
                              * 0 => no overlap, 6(default) => use 1/8th of windowSize, >=9 => use full windowSize */

    /* advanced parameters - may not remain available after API update */
+
+    ZSTD_p_compressLiterals=1000, /* control huffman compression of literals (enabled) by default.
+                              * disabling it improves speed and decreases compression ratio by a large amount.
+                              * note : this setting is automatically updated when changing compression level.
+                              *        positive compression levels set ZSTD_p_compressLiterals to 1.
+                              *        negative compression levels set ZSTD_p_compressLiterals to 0. */
+
    ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
                              * even when referencing into Dictionary content (default:0) */
+
    ZSTD_p_enableLongDistanceMatching=1200, /* Enable long distance matching.
                                         * This parameter is designed to improve the compression
                                         * ratio for large inputs with long distance matches.
@ -1070,23 +1084,21 @@ ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param
 ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);

 /*! ZSTD_CCtx_loadDictionary() :
- *  Create an internal CDict from dict buffer.
- *  Decompression will have to use same buffer.
+ *  Create an internal CDict from `dict` buffer.
+ *  Decompression will have to use same dictionary.
 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
- *            meaning "return to no-dictionary mode".
- *  Note 1 : `dict` content will be copied internally. Use
- *            ZSTD_CCtx_loadDictionary_byReference() to reference dictionary
- *            content instead. The dictionary buffer must then outlive its
- *            users.
+ *  Special: Adding a NULL (or 0-size) dictionary invalidates previous dictionary,
+ *           meaning "return to no-dictionary mode".
+ *  Note 1 : Dictionary will be used for all future compression jobs.
+ *           To return to "no-dictionary" situation, load a NULL dictionary
 *  Note 2 : Loading a dictionary involves building tables, which are dependent on compression parameters.
 *           For this reason, compression parameters cannot be changed anymore after loading a dictionary.
- *           It's also a CPU-heavy operation, with non-negligible impact on latency.
- *  Note 3 : Dictionary will be used for all future compression jobs.
- *           To return to "no-dictionary" situation, load a NULL dictionary
- *  Note 5 : Use ZSTD_CCtx_loadDictionary_advanced() to select how dictionary
- *           content will be interpreted.
- */
+ *           It's also a CPU consuming operation, with non-negligible impact on latency.
+ *  Note 3 :`dict` content will be copied internally.
+ *           Use ZSTD_CCtx_loadDictionary_byReference() to reference dictionary content instead.
+ *           In such a case, dictionary buffer must outlive its users.
+ *  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
+ *           to precisely select how dictionary content must be interpreted. */
 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode);
@ -1101,8 +1113,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void
 *  Special : adding a NULL CDict means "return to no-dictionary mode".
 *  Note 1 : Currently, only one dictionary can be managed.
 *           Adding a new dictionary effectively "discards" any previous one.
- *  Note 2 : CDict is just referenced, its lifetime must outlive CCtx.
- */
+ *  Note 2 : CDict is just referenced, its lifetime must outlive CCtx. */
 ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);

 /*! ZSTD_CCtx_refPrefix() :
@ -1112,13 +1123,12 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
 *  Subsequent compression jobs will be done without prefix (if none is explicitly referenced).
 *  If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict instead.
 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
- *  Special : Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+ *  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
 *  Note 1 : Prefix buffer is referenced. It must outlive compression job.
 *  Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters.
- *           It's a CPU-heavy operation, with non-negligible impact on latency.
- *  Note 3 : By default, the prefix is treated as raw content
- *           (ZSTD_dm_rawContent). Use ZSTD_CCtx_refPrefix_advanced() to alter
- *           dictMode. */
+ *           It's a CPU consuming operation, with non-negligible impact on latency.
+ *  Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
+ *           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode. */
 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize);
 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode);

--- a/programs/bench.c
+++ b/programs/bench.c
@ -214,6 +214,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,

    /* init */
    if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* display last 17 characters */
+    if (g_nbWorkers==1) g_nbWorkers=0;   /* prefer synchronous mode */

    if (g_decodeOnly) {  /* benchmark only decompression : source must be already compressed */
        const char* srcPtr = (const char*)srcBuffer;
@ -533,9 +534,8 @@ static void BMK_benchCLevel(const void* srcBuffer, size_t benchedSize,
    if (g_displayLevel == 1 && !g_additionalParam)
        DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10));

-    if (cLevelLast < cLevel) cLevelLast = cLevel;
-
    for (l=cLevel; l <= cLevelLast; l++) {
+        if (l==0) continue;  /* skip level 0 */
        BMK_benchMem(srcBuffer, benchedSize,
                     displayName, l,
                     fileSizes, nbFiles,
@ -545,8 +545,8 @@ static void BMK_benchCLevel(const void* srcBuffer, size_t benchedSize,


 /*! BMK_loadFiles() :
-    Loads `buffer` with content of files listed within `fileNamesTable`.
-    At most, fills `buffer` entirely */
+ *  Loads `buffer` with content of files listed within `fileNamesTable`.
+ *  At most, fills `buffer` entirely. */
 static void BMK_loadFiles(void* buffer, size_t bufferSize,
                          size_t* fileSizes,
                          const char* const * const fileNamesTable, unsigned nbFiles)
@ -677,7 +677,6 @@ int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
 {
    double const compressibility = (double)g_compressibilityDefault / 100;

-    if (cLevel < 1) cLevel = 1;   /* minimum compression level */
    if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();
    if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel();
    if (cLevelLast < cLevel) cLevelLast = cLevel;
--- a/programs/fileio.c
+++ b/programs/fileio.c
@ -420,6 +420,7 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
    cRess_t ress;
    memset(&ress, 0, sizeof(ress));

+    DISPLAYLEVEL(6, "FIO_createCResources \n");
    ress.cctx = ZSTD_createCCtx();
    if (ress.cctx == NULL)
        EXM_THROW(30, "allocation error : can't create ZSTD_CCtx");
@ -440,7 +441,7 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_dictIDFlag, g_dictIDFlag) );
        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_checksumFlag, g_checksumFlag) );
        /* compression level */
-        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, cLevel) );
+        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, (unsigned)cLevel) );
        /* long distance matching */
        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag) );
        CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_ldmHashLog, g_ldmHashLog) );
@ -908,6 +909,7 @@ static int FIO_compressFilename_dstFile(cRess_t ress,
    stat_t statbuf;
    int stat_result = 0;

+    DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s", dstFileName);
    ress.dstFile = FIO_openDstFile(dstFileName);
    if (ress.dstFile==NULL) return 1;  /* could not open dstFileName */
    /* Must ony be added after FIO_openDstFile() succeeds.
--- a/programs/zstd.1.md
+++ b/programs/zstd.1.md
@ -115,6 +115,14 @@ the last one takes effect.

    Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
    `--memory=windowSize` needs to be passed to the decompressor.
+* `--fast[=#]`:
+    switch to ultra-fast compression levels.
+    If `=#` is not present, it defaults to `1`.
+    The higher the value, the faster the compression speed,
+    at the cost of some compression ratio.
+    This setting overwrites compression level if one was set previously.
+    Similarly, if a compression level is set after `--fast`, it overrides it.
+
 * `-T#`, `--threads=#`:
    Compress using `#` working threads (default: 1).
    If `#` is 0, attempt to detect and use the number of physical CPU cores.
@ -339,14 +347,21 @@ The list of available _options_:
    The minimum _slen_ is 3 and the maximum is 7.

 - `targetLen`=_tlen_, `tlen`=_tlen_:
-    Specify the minimum match length that causes a match finder to stop
-    searching for better matches.
+    The impact of this field vary depending on selected strategy.

-    A larger minimum match length usually improves compression ratio but
-    decreases compression speed.
-    This option is only used with strategies ZSTD_btopt and ZSTD_btultra.
+    For ZSTD\_btopt and ZSTD\_btultra, it specifies the minimum match length
+    that causes match finder to stop searching for better matches.
+    A larger `targetLen` usually improves compression ratio
+    but decreases compression speed.

-    The minimum _tlen_ is 4 and the maximum is 999.
+    For ZSTD\_fast, it specifies
+    the amount of data skipped between match sampling.
+    Impact is reversed : a larger `targetLen` increases compression speed
+    but decreases compression ratio.
+
+    For all other strategies, this field has no impact.
+
+    The minimum _tlen_ is 1 and the maximum is 999.

 - `overlapLog`=_ovlog_,  `ovlog`=_ovlog_:
    Determine `overlapSize`, amount of data reloaded from previous job.
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@ -133,7 +133,8 @@ static int usage_advanced(const char* programName)
    DISPLAY( " -l     : print information about zstd compressed files \n");
 #ifndef ZSTD_NOCOMPRESS
    DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
-    DISPLAY( "--long[=#]  : enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
+    DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
+    DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
 #ifdef ZSTD_MULTITHREAD
    DISPLAY( " -T#    : spawns # compression threads (default: 1, 0==# cores) \n");
    DISPLAY( " -B#    : select size of each job (default: 0==automatic) \n");
@ -219,10 +220,10 @@ static int exeNameMatch(const char* exeName, const char* test)
 }

 /*! readU32FromChar() :
-    @return : unsigned integer value read from input in `char` format
-    allows and interprets K, KB, KiB, M, MB and MiB suffix.
-    Will also modify `*stringPtr`, advancing it to position where it stopped reading.
-    Note : function result can overflow if digit string > MAX_UINT */
+ * @return : unsigned integer value read from input in `char` format.
+ *  allows and interprets K, KB, KiB, M, MB and MiB suffix.
+ *  Will also modify `*stringPtr`, advancing it to position where it stopped reading.
+ *  Note : function result can overflow if digit string > MAX_UINT */
 static unsigned readU32FromChar(const char** stringPtr)
 {
    unsigned result = 0;
@ -241,7 +242,7 @@ static unsigned readU32FromChar(const char** stringPtr)
 /** longCommandWArg() :
 *  check if *stringPtr is the same as longCommand.
 *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
- *  @return 0 and doesn't modify *stringPtr otherwise.
+ * @return 0 and doesn't modify *stringPtr otherwise.
 */
 static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
 {
@ -387,7 +388,7 @@ int main(int argCount, const char* argv[])
    zstd_operation_mode operation = zom_compress;
    ZSTD_compressionParameters compressionParams;
    int cLevel = ZSTDCLI_CLEVEL_DEFAULT;
-    int cLevelLast = 1;
+    int cLevelLast = -10000;
    unsigned recursive = 0;
    unsigned memLimit = 0;
    const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));   /* argCount >= 1 */
@ -543,6 +544,21 @@ int main(int argCount, const char* argv[])
                            compressionParams.windowLog = ldmWindowLog;
                        continue;
                    }
+                    if (longCommandWArg(&argument, "--fast")) {
+                        /* Parse optional window log */
+                        if (*argument == '=') {
+                            U32 fastLevel;
+                            ++argument;
+                            fastLevel = readU32FromChar(&argument);
+                            if (fastLevel) cLevel = - (int)fastLevel;
+                        } else if (*argument != 0) {
+                            /* Invalid character following --fast */
+                            CLEAN_RETURN(badusage(programName));
+                        } else {
+                            cLevel = -1;  /* default for --fast */
+                        }
+                        continue;
+                    }
                    /* fall-through, will trigger bad_usage() later on */
                }

@ -813,16 +829,22 @@ int main(int argCount, const char* argv[])
    }

 #ifndef ZSTD_NODECOMPRESS
-    if (operation==zom_test) { outFileName=nulmark; FIO_setRemoveSrcFile(0); } /* test mode */
+    if (operation==zom_test) { outFileName=nulmark; FIO_setRemoveSrcFile(0); }  /* test mode */
 #endif

    /* No input filename ==> use stdin and stdout */
    filenameIdx += !filenameIdx;   /* filenameTable[0] is stdin by default */
-    if (!strcmp(filenameTable[0], stdinmark) && !outFileName) outFileName = stdoutmark;   /* when input is stdin, default output is stdout */
+    if (!strcmp(filenameTable[0], stdinmark) && !outFileName)
+        outFileName = stdoutmark;  /* when input is stdin, default output is stdout */

    /* Check if input/output defined as console; trigger an error in this case */
-    if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) CLEAN_RETURN(badusage(programName));
-    if (outFileName && !strcmp(outFileName, stdoutmark) && IS_CONSOLE(stdout) && !strcmp(filenameTable[0], stdinmark) && !forceStdout && operation!=zom_decompress)
+    if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) )
+        CLEAN_RETURN(badusage(programName));
+    if ( outFileName && !strcmp(outFileName, stdoutmark)
+      && IS_CONSOLE(stdout)
+      && !strcmp(filenameTable[0], stdinmark)
+      && !forceStdout
+      && operation!=zom_decompress )
        CLEAN_RETURN(badusage(programName));

 #ifndef ZSTD_NOCOMPRESS
--- a/tests/fuzzer.c
+++ b/tests/fuzzer.c
@ -34,7 +34,7 @@
 #include "zdict.h"        /* ZDICT_trainFromBuffer */
 #include "datagen.h"      /* RDG_genBuffer */
 #include "mem.h"
-#define XXH_STATIC_LINKING_ONLY
+#define XXH_STATIC_LINKING_ONLY   /* XXH64_state_t */
 #include "xxhash.h"       /* XXH64 */
 #include "util.h"

@ -1382,10 +1382,13 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD
        crcOrig = XXH64(sampleBuffer, sampleSize, 0);

        /* compression tests */
-        {   int const cLevel =
+        {   int const cLevelPositive =
                    ( FUZ_rand(&lseed) %
                     (ZSTD_maxCLevel() - (FUZ_highbit32((U32)sampleSize) / cLevelLimiter)) )
                    + 1;
+            int const cLevel = ((FUZ_rand(&lseed) & 15) == 3) ?
+                             - (int)((FUZ_rand(&lseed) & 7) + 1) :   /* test negative cLevel */
+                             cLevelPositive;
            DISPLAYLEVEL(5, "fuzzer t%u: Simple compression test (level %i) \n", testNb, cLevel);
            cSize = ZSTD_compressCCtx(ctx, cBuffer, cBufferSize, sampleBuffer, sampleSize, cLevel);
            CHECK(ZSTD_isError(cSize), "ZSTD_compressCCtx failed : %s", ZSTD_getErrorName(cSize));
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@ -101,8 +101,11 @@ $ECHO "test : basic compression "
 $ZSTD -f tmp                      # trivial compression case, creates tmp.zst
 $ECHO "test : basic decompression"
 $ZSTD -df tmp.zst                 # trivial decompression case (overwrites tmp)
-$ECHO "test : too large compression level (must fail)"
+$ECHO "test : too large compression level => auto-fix"
 $ZSTD -99 -f tmp  # too large compression level, automatic sized down
+$ECHO "test : --fast aka negative compression levels"
+$ZSTD --fast -f tmp  # == -1
+$ZSTD --fast=3 -f tmp  # == -3
 $ECHO "test : compress to stdout"
 $ZSTD tmp -c > tmpCompressed
 $ZSTD tmp --stdout > tmpCompressed       # long command format
@ -201,7 +204,6 @@ fi
 rm tmp*


-
 $ECHO "\n===>  Advanced compression parameters "
 $ECHO "Hello world!" | $ZSTD --zstd=windowLog=21,      - -o tmp.zst && die "wrong parameters not detected!"
 $ECHO "Hello world!" | $ZSTD --zstd=windowLo=21        - -o tmp.zst && die "wrong parameters not detected!"
@ -474,6 +476,8 @@ $ECHO "bench one file"
 $ZSTD -bi0 tmp1
 $ECHO "bench multiple levels"
 $ZSTD -i0b0e3 tmp1
+$ECHO "bench negative level"
+$ZSTD -bi0 --fast tmp1
 $ECHO "with recursive and quiet modes"
 $ZSTD -rqi1b1e2 tmp1