diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 9697ebd5..e50f4666 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -111,7 +111,7 @@ unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); @return : content size to be decompressed, as a 64-bits value _if known and not empty_, 0 otherwise.


-

Helper functions

#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < 128 KB) ? ((128 KB - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
+

Helper functions

#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
 size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */
 unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
 const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
@@ -906,7 +906,8 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
 
size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
 

Set one compression parameter, selected by enum ZSTD_cParameter. Note : when `value` is an enum, cast it to unsigned for proper type checking. - @result : 0, or an error code (which can be tested with ZSTD_isError()). + @result : informational value (typically, the one being set, possibly corrected), + or an error code (which can be tested with ZSTD_isError()).


size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index efcbc450..4fd55c43 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -72,26 +72,27 @@ typedef struct {
 } ZSTD_optimal_t;
 
 typedef struct {
-    U32* litFreq;
-    U32* litLengthFreq;
-    U32* matchLengthFreq;
-    U32* offCodeFreq;
-    ZSTD_match_t* matchTable;
-    ZSTD_optimal_t* priceTable;
+    /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
+    U32* litFreq;               /* table of literals statistics, of size 256 */
+    U32* litLengthFreq;         /* table of litLength statistics, of size (MaxLL+1) */
+    U32* matchLengthFreq;       /* table of matchLength statistics, of size (MaxML+1) */
+    U32* offCodeFreq;           /* table of offCode statistics, of size (MaxOff+1) */
+    ZSTD_match_t* matchTable;   /* list of found matches, of size ZSTD_OPT_NUM+1 */
+    ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
 
     U32  litSum;                 /* nb of literals */
     U32  litLengthSum;           /* nb of litLength codes */
     U32  matchLengthSum;         /* nb of matchLength codes */
-    U32  matchSum;               /* one argument to calculate `factor` */
+    U32  matchSum;               /* a strange argument used in calculating `factor` */
     U32  offCodeSum;             /* nb of offset codes */
     /* begin updated by ZSTD_setLog2Prices */
     U32  log2litSum;             /* pow2 to compare log2(litfreq) to */
     U32  log2litLengthSum;       /* pow2 to compare log2(llfreq) to */
     U32  log2matchLengthSum;     /* pow2 to compare log2(mlfreq) to */
     U32  log2offCodeSum;         /* pow2 to compare log2(offreq) to */
-    U32  factor;                 /* added to calculate ZSTD_getPrice() (but why?) */
+    U32  factor;                 /* fixed cost added when calculating ZSTD_getPrice() (but why ? seems to favor less sequences) */
     /* end : updated by ZSTD_setLog2Prices */
-    U32  staticPrices;           /* prices follow a static cost structure, statistics are irrelevant */
+    U32  staticPrices;           /* prices follow a pre-defined cost structure, statistics are irrelevant */
     U32  cachedPrice;
     U32  cachedLitLength;
     const BYTE* cachedLiterals;
@@ -346,13 +347,17 @@ MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* co
     const BYTE* const pStart = pIn;
     const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
 
-    while (pIn < pInLoopLimit) {
-        size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
-        if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
-        pIn += ZSTD_NbCommonBytes(diff);
-        return (size_t)(pIn - pStart);
-    }
-    if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if (pIn < pInLoopLimit) {
+        { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
+          if (diff) return ZSTD_NbCommonBytes(diff); }
+        pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
+        while (pIn < pInLoopLimit) {
+            size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
+            if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
+            pIn += ZSTD_NbCommonBytes(diff);
+            return (size_t)(pIn - pStart);
+    }   }
+    if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
     if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
     if ((pInlowLimit;
-    U32 matchEndIdx = current+8;
+    U32 matchEndIdx = current+8+1;
     size_t bestLength = 8;
 #ifdef ZSTD_C_PREDICT
     U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
@@ -85,8 +85,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
         if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
             assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if extDict is incorrectly set to 0 */
             match = base + matchIndex;
-            if (match[matchLength] == ip[matchLength])
-                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
         } else {
             match = dictBase + matchIndex;
             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
@@ -122,8 +121,8 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
 
     *smallerPtr = *largerPtr = 0;
     if (bestLength > 384) return MIN(192, (U32)(bestLength - 384));   /* speed optimization */
-    if (matchEndIdx > current + 8) return matchEndIdx - (current + 8);
-    return 1;
+    assert(matchEndIdx > current + 8);
+    return matchEndIdx - (current + 8);
 }
 
 FORCE_INLINE_TEMPLATE
@@ -182,7 +181,7 @@ static size_t ZSTD_insertBtAndFindBestMatch (
     const U32 windowLow = zc->lowLimit;
     U32* smallerPtr = bt + 2*(current&btMask);
     U32* largerPtr  = bt + 2*(current&btMask) + 1;
-    U32 matchEndIdx = current+8;
+    U32 matchEndIdx = current+8+1;
     U32 dummy32;   /* to be nullified at the end */
     size_t bestLength = 0;
 
@@ -196,8 +195,7 @@ static size_t ZSTD_insertBtAndFindBestMatch (
 
         if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
             match = base + matchIndex;
-            if (match[matchLength] == ip[matchLength])
-                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
         } else {
             match = dictBase + matchIndex;
             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
@@ -233,7 +231,8 @@ static size_t ZSTD_insertBtAndFindBestMatch (
 
     *smallerPtr = *largerPtr = 0;
 
-    zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;   /* skip repetitive patterns */
+    assert(matchEndIdx > current+8);
+    zc->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
     return bestLength;
 }
 
@@ -354,14 +353,14 @@ size_t ZSTD_HcFindBestMatch_generic (
     U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
 
     for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
-        const BYTE* match;
         size_t currentMl=0;
         if ((!extDict) || matchIndex >= dictLimit) {
-            match = base + matchIndex;
+            const BYTE* const match = base + matchIndex;
             if (match[ml] == ip[ml])   /* potentially better */
                 currentMl = ZSTD_count(ip, match, iLimit);
         } else {
-            match = dictBase + matchIndex;
+            const BYTE* const match = dictBase + matchIndex;
+            assert(match+4 <= dictEnd);
             if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
                 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
         }
@@ -399,10 +398,10 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
 
 
 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
-                        ZSTD_CCtx* zc,
+                        ZSTD_CCtx* const zc,
                         const BYTE* ip, const BYTE* const iLimit,
-                        size_t* offsetPtr,
-                        const U32 maxNbAttempts, const U32 matchLengthSearch)
+                        size_t* const offsetPtr,
+                        U32 const maxNbAttempts, U32 const matchLengthSearch)
 {
     switch(matchLengthSearch)
     {
@@ -521,9 +520,8 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
          */
         /* catch up */
         if (offset) {
-            while ( (start > anchor)
-                 && (start > base+offset-ZSTD_REP_MOVE)
-                 && (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1]) )  /* only search for offset within prefix */
+            while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > base))
+                 && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )  /* only search for offset within prefix */
                 { start--; matchLength++; }
             offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
         }
@@ -535,9 +533,8 @@ _storeSequence:
         }
 
         /* check immediate repcode */
-        while ( (ip <= ilimit)
-             && ((offset_2>0)
-             & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
+        while ( ((ip <= ilimit) & (offset_2>0))
+             && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
             /* store sequence */
             matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
             offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 04f03ebc..66a7da19 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -13,7 +13,7 @@
 #include "zstd_lazy.h"   /* ZSTD_updateTree, ZSTD_updateTree_extDict */
 
 
-#define ZSTD_LITFREQ_ADD    2   /* sort of scaling factor for litSum and litFreq (why the need ?), but also used for matchSum ? */
+#define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */
 #define ZSTD_FREQ_DIV       4   /* log factor when using previous stats to init next stats */
 #define ZSTD_MAX_PRICE     (1<<30)
 
@@ -96,7 +96,8 @@ static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSiz
 }
 
 
-static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 const litLength, const BYTE* literals)
+static U32 ZSTD_getLiteralPrice(optState_t* const optPtr,
+                                U32 const litLength, const BYTE* const literals)
 {
     U32 price;
 
@@ -138,17 +139,22 @@ static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 const litLength, const B
 }
 
 
-FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
+/* ZSTD_getPrice() :
+ * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
+FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr,
+                                    U32 const litLength, const BYTE* const literals, U32 const offset, U32 const matchLength,
+                                    int const optLevel)
 {
-    BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
-    U32 const mlBase = matchLength - MINMATCH;
     U32 price;
+    U32 const offCode = ZSTD_highbit32(offset+1);
+    U32 const mlBase = matchLength - MINMATCH;
+    assert(matchLength >= MINMATCH);
 
     if (optPtr->staticPrices)  /* fixed scheme, do not use statistics */
         return ZSTD_getLiteralPrice(optPtr, litLength, literals) + ZSTD_highbit32((U32)mlBase+1) + 16 + offCode;
 
     price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1);
-    if (!ultra /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */
+    if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */
 
     /* match Length */
     {   U32 const mlCode = ZSTD_MLcode(mlBase);
@@ -161,25 +167,26 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const
 }
 
 
-static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
+static void ZSTD_updateStats(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offsetCode, U32 matchLength)
 {
-    U32 u;
-
     /* literals */
-    optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
-    for (u=0; u < litLength; u++)
-        optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
+    {   U32 u;
+        for (u=0; u < litLength; u++)
+            optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
+        optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
+    }
 
     /* literal Length */
-    {   const U32 llCode = ZSTD_LLcode(litLength);
+    {   U32 const llCode = ZSTD_LLcode(litLength);
         optPtr->litLengthFreq[llCode]++;
         optPtr->litLengthSum++;
     }
 
-    /* match offset */
-    {   BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
-        optPtr->offCodeSum++;
+    /* match offset code (0-2=>repCode; 3+=>offset+2) */
+    {   U32 const offCode = ZSTD_highbit32(offsetCode+1);
+        assert(offCode <= MaxOff);
         optPtr->offCodeFreq[offCode]++;
+        optPtr->offCodeSum++;
     }
 
     /* match Length */
@@ -188,8 +195,6 @@ static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* lite
         optPtr->matchLengthFreq[mlCode]++;
         optPtr->matchLengthSum++;
     }
-
-    ZSTD_setLog2Prices(optPtr);
 }
 
 
@@ -235,9 +240,9 @@ FORCE_INLINE_TEMPLATE
 U32 ZSTD_insertBtAndGetAllMatches (
                         ZSTD_CCtx* zc,
                         const BYTE* const ip, const BYTE* const iLimit, const int extDict,
-                        U32 nbCompares, U32 const mls,
+                        U32 nbCompares, U32 const mls, U32 const sufficient_len,
                         U32 rep[ZSTD_REP_NUM], U32 const ll0,
-                        ZSTD_match_t* matches, const U32 minMatchLen)
+                        ZSTD_match_t* matches, const U32 lengthToBeat)
 {
     const BYTE* const base = zc->base;
     U32 const current = (U32)(ip-base);
@@ -258,11 +263,11 @@ U32 ZSTD_insertBtAndGetAllMatches (
     U32 const windowLow = zc->lowLimit;
     U32* smallerPtr = bt + 2*(current&btMask);
     U32* largerPtr  = bt + 2*(current&btMask) + 1;
-    U32 matchEndIdx = current+8;   /* farthest referenced position of any match => detects repetitive patterns */
+    U32 matchEndIdx = current+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
     U32 dummy32;   /* to be nullified at the end */
     U32 mnum = 0;
 
-    size_t bestLength = minMatchLen-1;
+    size_t bestLength = lengthToBeat-1;
     DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches");
 
     /* check repCode */
@@ -294,7 +299,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
                 matches[mnum].off = repCode - ll0;
                 matches[mnum].len = (U32)repLen;
                 mnum++;
-                if ( (repLen > ZSTD_OPT_NUM)
+                if ( (repLen > sufficient_len)
                    | (ip+repLen == iLimit) ) {  /* best possible */
                     return mnum;
     }   }   }   }
@@ -323,8 +328,9 @@ U32 ZSTD_insertBtAndGetAllMatches (
                 matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
                 matches[0].len = (U32)mlen;
                 mnum = 1;
-                if ( (mlen > ZSTD_OPT_NUM)
-                   | (ip+mlen == iLimit) ) {  /* best possible */
+                if ( (mlen > sufficient_len) |
+                     (ip+mlen == iLimit) ) {  /* best possible length */
+                    zc->nextToUpdate = current+1;  /* skip insertion */
                     return 1;
     }   }   }   }
 
@@ -339,9 +345,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
         if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
             assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
             match = base + matchIndex;
-            if (match[matchLength] == ip[matchLength]) {
-                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1;
-            }
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
         } else {
             match = dictBase + matchIndex;
             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
@@ -382,17 +386,18 @@ U32 ZSTD_insertBtAndGetAllMatches (
 
     *smallerPtr = *largerPtr = 0;
 
-    zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;  /* skip repetitive patterns */
+    assert(matchEndIdx > current+8);
+    zc->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
     return mnum;
 }
 
 
 FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
                         ZSTD_CCtx* zc,   /* Index table will be updated */
-                        const BYTE* ip, const BYTE* const iHighLimit, const int extDict,
-                        const U32 maxNbAttempts, const U32 matchLengthSearch,
+                        const BYTE* ip, const BYTE* const iHighLimit, int const extDict,
+                        U32 const maxNbAttempts, U32 const matchLengthSearch, U32 const sufficient_len,
                         U32 rep[ZSTD_REP_NUM], U32 const ll0,
-                        ZSTD_match_t* matches, const U32 minMatchLen)
+                        ZSTD_match_t* matches, U32 const lengthToBeat)
 {
     DEBUGLOG(7, "ZSTD_BtGetAllMatches");
     if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
@@ -400,12 +405,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
     else ZSTD_updateTree(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch);
     switch(matchLengthSearch)
     {
-    case 3 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 3, rep, ll0, matches, minMatchLen);
+    case 3 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 3, sufficient_len, rep, ll0, matches, lengthToBeat);
     default :
-    case 4 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 4, rep, ll0, matches, minMatchLen);
-    case 5 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 5, rep, ll0, matches, minMatchLen);
+    case 4 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 4, sufficient_len, rep, ll0, matches, lengthToBeat);
+    case 5 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 5, sufficient_len, rep, ll0, matches, lengthToBeat);
     case 7 :
-    case 6 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 6, rep, ll0, matches, minMatchLen);
+    case 6 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 6, sufficient_len, rep, ll0, matches, lengthToBeat);
     }
 }
 
@@ -452,7 +457,7 @@ repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
 FORCE_INLINE_TEMPLATE
 size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                                       const void* src, size_t srcSize,
-                                      const int ultra, const int extDict)
+                                      const int optLevel, const int extDict)
 {
     seqStore_t* const seqStorePtr = &(ctx->seqStore);
     optState_t* const optStatePtr = &(ctx->optState);
@@ -488,7 +493,7 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
         /* find first match */
         {   U32 const litlen = (U32)(ip - anchor);
             U32 const ll0 = !litlen;
-            U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, ip, iend, extDict, maxSearches, mls, rep, ll0, matches, minMatch);
+            U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, ip, iend, extDict, maxSearches, mls, sufficient_len, rep, ll0, matches, minMatch);
             if (!nbMatches) { ip++; continue; }
 
             /* initialize opt[0] */
@@ -519,7 +524,7 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                     U32 const end = matches[matchNb].len;
                     repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
                     while (pos <= end) {
-                        U32 const matchPrice = ZSTD_getPrice(optStatePtr, litlen, anchor, offset, pos, ultra);
+                        U32 const matchPrice = ZSTD_getPrice(optStatePtr, litlen, anchor, offset, pos, optLevel);
                         if (pos > last_pos || matchPrice < opt[pos].price) {
                             DEBUGLOG(7, "rPos:%u => set initial price : %u",
                                         pos, matchPrice);
@@ -548,16 +553,20 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                     SET_PRICE(cur, 1/*mlen*/, 0/*offset*/, litlen, price, opt[cur-1].rep);
             }   }
 
-            if (cur == last_pos) break;
-
             /* last match must start at a minimum distance of 8 from oend */
             if (inr > ilimit) continue;
 
+            if (cur == last_pos) break;
+
+             if ( (optLevel==0) /*static*/
+               && (opt[cur+1].price <= opt[cur].price) )
+                continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
+
             {   U32 const ll0 = (opt[cur].mlen != 1);
                 U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0;
                 U32 const basePrice = (cur > litlen) ? opt[cur-litlen].price : 0;
                 const BYTE* const baseLiterals = ip + cur - litlen;
-                U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, inr, iend, extDict, maxSearches, mls, opt[cur].rep, ll0, matches, minMatch);
+                U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, inr, iend, extDict, maxSearches, mls, sufficient_len, opt[cur].rep, ll0, matches, minMatch);
                 U32 matchNb;
                 if (!nbMatches) continue;
                 assert(baseLiterals >= prefixStart);
@@ -577,26 +586,28 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
 
                 /* set prices using matches found at position == cur */
                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
-                    U32 mlen = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
-                    U32 const lastML = matches[matchNb].len;
                     U32 const offset = matches[matchNb].off;
                     repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
+                    U32 const lastML = matches[matchNb].len;
+                    U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
+                    U32 mlen;
 
                     DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u",
                                 matchNb, matches[matchNb].off, lastML, litlen);
 
-                    while (mlen <= lastML) {
+                    for (mlen = lastML; mlen >= startML; mlen--) {
                         U32 const pos = cur + mlen;
-                        U32 const price = basePrice + ZSTD_getPrice(optStatePtr, litlen, baseLiterals, offset, mlen, ultra);
+                        U32 const price = basePrice + ZSTD_getPrice(optStatePtr, litlen, baseLiterals, offset, mlen, optLevel);
 
                         if ((pos > last_pos) || (price < opt[pos].price)) {
                             DEBUGLOG(7, "rPos:%u => new better price (%u<%u)",
                                         pos, price, opt[pos].price);
                             SET_PRICE(pos, mlen, offset, litlen, price, repHistory);  /* note : macro modifies last_pos */
+                        } else {
+                            if (optLevel==0) break;  /* gets ~+10% speed for about -0.01 ratio loss */
                         }
-
-                        mlen++;
-        }   }   }   }
+            }   }   }
+        }  /* for (cur = 1; cur <= last_pos; cur++) */
 
         best_mlen = opt[last_pos].mlen;
         best_off = opt[last_pos].off;
@@ -646,11 +657,12 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
                     }
                 }
 
-                ZSTD_updatePrice(optStatePtr, llen, anchor, offset, mlen);
+                ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen);
                 ZSTD_storeSeq(seqStorePtr, llen, anchor, offset, mlen-MINMATCH);
                 anchor = ip;
         }   }
-    }   /* for (cur=0; cur < last_pos; ) */
+        ZSTD_setLog2Prices(optStatePtr);
+    }   /* while (ip < ilimit) */
 
     /* Save reps for next block */
     { int i; for (i=0; irepToConfirm[i] = rep[i]; }
@@ -663,20 +675,20 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
 size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
     DEBUGLOG(5, "ZSTD_compressBlock_btopt");
-    return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*ultra*/, 0 /*extDict*/);
+    return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/);
 }
 
 size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1 /*ultra*/, 0 /*extDict*/);
+    return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/);
 }
 
 size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*ultra*/, 1 /*extDict*/);
+    return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/);
 }
 
 size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1 /*ultra*/, 1 /*extDict*/);
+    return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/);
 }
diff --git a/tests/paramgrill.c b/tests/paramgrill.c
index a387a44c..081a284e 100644
--- a/tests/paramgrill.c
+++ b/tests/paramgrill.c
@@ -310,14 +310,10 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
 }
 
 
-const char* g_stratName[] = { "ZSTD_fast    ",
-                              "ZSTD_dfast   ",
-                              "ZSTD_greedy  ",
-                              "ZSTD_lazy    ",
-                              "ZSTD_lazy2   ",
-                              "ZSTD_btlazy2 ",
-                              "ZSTD_btopt   ",
-                              "ZSTD_btultra "};
+const char* g_stratName[ZSTD_btultra+1] = {
+                "(none)       ", "ZSTD_fast    ", "ZSTD_dfast   ",
+                "ZSTD_greedy  ", "ZSTD_lazy    ", "ZSTD_lazy2   ",
+                "ZSTD_btlazy2 ", "ZSTD_btopt   ", "ZSTD_btultra "};
 
 static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_compressionParameters params, size_t srcSize)
 {