Merge pull request #924 from facebook/opt2

faster btopt variant
This commit is contained in:
Yann Collet 2017-11-21 16:34:37 -08:00 committed by GitHub
commit 4451b213c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 115 additions and 104 deletions

View File

@ -111,7 +111,7 @@ unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
@return : content size to be decompressed, as a 64-bits value _if known and not empty_, 0 otherwise.
</p></pre><BR>
<h3>Helper functions</h3><pre></pre><b><pre>#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < 128 KB) ? ((128 KB - (srcSize)) >> 11) </b>/* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */<b>
<h3>Helper functions</h3><pre></pre><b><pre>#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) </b>/* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */<b>
size_t ZSTD_compressBound(size_t srcSize); </b>/*!< maximum compressed size in worst case scenario */<b>
unsigned ZSTD_isError(size_t code); </b>/*!< tells if a `size_t` function result is an error code */<b>
const char* ZSTD_getErrorName(size_t code); </b>/*!< provides readable string from an error code */<b>
@ -906,7 +906,8 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
<pre><b>size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
</b><p> Set one compression parameter, selected by enum ZSTD_cParameter.
Note : when `value` is an enum, cast it to unsigned for proper type checking.
@result : 0, or an error code (which can be tested with ZSTD_isError()).
@result : informational value (typically, the one being set, possibly corrected),
or an error code (which can be tested with ZSTD_isError()).
</p></pre><BR>
<pre><b>size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);

View File

@ -72,26 +72,27 @@ typedef struct {
} ZSTD_optimal_t;
typedef struct {
U32* litFreq;
U32* litLengthFreq;
U32* matchLengthFreq;
U32* offCodeFreq;
ZSTD_match_t* matchTable;
ZSTD_optimal_t* priceTable;
/* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
U32* litFreq; /* table of literals statistics, of size 256 */
U32* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
U32* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
U32* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
U32 litSum; /* nb of literals */
U32 litLengthSum; /* nb of litLength codes */
U32 matchLengthSum; /* nb of matchLength codes */
U32 matchSum; /* one argument to calculate `factor` */
U32 matchSum; /* a strange argument used in calculating `factor` */
U32 offCodeSum; /* nb of offset codes */
/* begin updated by ZSTD_setLog2Prices */
U32 log2litSum; /* pow2 to compare log2(litfreq) to */
U32 log2litLengthSum; /* pow2 to compare log2(llfreq) to */
U32 log2matchLengthSum; /* pow2 to compare log2(mlfreq) to */
U32 log2offCodeSum; /* pow2 to compare log2(offreq) to */
U32 factor; /* added to calculate ZSTD_getPrice() (but why?) */
U32 factor; /* fixed cost added when calculating ZSTD_getPrice() (but why ? seems to favor less sequences) */
/* end : updated by ZSTD_setLog2Prices */
U32 staticPrices; /* prices follow a static cost structure, statistics are irrelevant */
U32 staticPrices; /* prices follow a pre-defined cost structure, statistics are irrelevant */
U32 cachedPrice;
U32 cachedLitLength;
const BYTE* cachedLiterals;
@ -346,13 +347,17 @@ MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* co
const BYTE* const pStart = pIn;
const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
while (pIn < pInLoopLimit) {
size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
pIn += ZSTD_NbCommonBytes(diff);
return (size_t)(pIn - pStart);
}
if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
if (pIn < pInLoopLimit) {
{ size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
if (diff) return ZSTD_NbCommonBytes(diff); }
pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
while (pIn < pInLoopLimit) {
size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
pIn += ZSTD_NbCommonBytes(diff);
return (size_t)(pIn - pStart);
} }
if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
return (size_t)(pIn - pStart);

View File

@ -16,8 +16,8 @@
* Binary Tree search
***************************************/
/** ZSTD_insertBt1() : add one or multiple positions to tree.
* ip : assumed <= iend-8 .
* @return : nb of positions added */
* ip : assumed <= iend-8 .
* @return : nb of positions added */
static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
const BYTE* const ip, const BYTE* const iend,
U32 nbCompares, U32 const mls, U32 const extDict)
@ -42,7 +42,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
U32* largerPtr = smallerPtr + 1;
U32 dummy32; /* to be nullified at the end */
U32 const windowLow = zc->lowLimit;
U32 matchEndIdx = current+8;
U32 matchEndIdx = current+8+1;
size_t bestLength = 8;
#ifdef ZSTD_C_PREDICT
U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
@ -85,8 +85,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */
match = base + matchIndex;
if (match[matchLength] == ip[matchLength])
matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
} else {
match = dictBase + matchIndex;
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
@ -122,8 +121,8 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
*smallerPtr = *largerPtr = 0;
if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
if (matchEndIdx > current + 8) return matchEndIdx - (current + 8);
return 1;
assert(matchEndIdx > current + 8);
return matchEndIdx - (current + 8);
}
FORCE_INLINE_TEMPLATE
@ -182,7 +181,7 @@ static size_t ZSTD_insertBtAndFindBestMatch (
const U32 windowLow = zc->lowLimit;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = bt + 2*(current&btMask) + 1;
U32 matchEndIdx = current+8;
U32 matchEndIdx = current+8+1;
U32 dummy32; /* to be nullified at the end */
size_t bestLength = 0;
@ -196,8 +195,7 @@ static size_t ZSTD_insertBtAndFindBestMatch (
if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
match = base + matchIndex;
if (match[matchLength] == ip[matchLength])
matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
} else {
match = dictBase + matchIndex;
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
@ -233,7 +231,8 @@ static size_t ZSTD_insertBtAndFindBestMatch (
*smallerPtr = *largerPtr = 0;
zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; /* skip repetitive patterns */
assert(matchEndIdx > current+8);
zc->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
return bestLength;
}
@ -354,14 +353,14 @@ size_t ZSTD_HcFindBestMatch_generic (
U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
const BYTE* match;
size_t currentMl=0;
if ((!extDict) || matchIndex >= dictLimit) {
match = base + matchIndex;
const BYTE* const match = base + matchIndex;
if (match[ml] == ip[ml]) /* potentially better */
currentMl = ZSTD_count(ip, match, iLimit);
} else {
match = dictBase + matchIndex;
const BYTE* const match = dictBase + matchIndex;
assert(match+4 <= dictEnd);
if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
}
@ -399,10 +398,10 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
ZSTD_CCtx* zc,
ZSTD_CCtx* const zc,
const BYTE* ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 maxNbAttempts, const U32 matchLengthSearch)
size_t* const offsetPtr,
U32 const maxNbAttempts, U32 const matchLengthSearch)
{
switch(matchLengthSearch)
{
@ -521,9 +520,8 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
*/
/* catch up */
if (offset) {
while ( (start > anchor)
&& (start > base+offset-ZSTD_REP_MOVE)
&& (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1]) ) /* only search for offset within prefix */
while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > base))
&& (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
{ start--; matchLength++; }
offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
}
@ -535,9 +533,8 @@ _storeSequence:
}
/* check immediate repcode */
while ( (ip <= ilimit)
&& ((offset_2>0)
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
while ( ((ip <= ilimit) & (offset_2>0))
&& (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
/* store sequence */
matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */

View File

@ -13,7 +13,7 @@
#include "zstd_lazy.h" /* ZSTD_updateTree, ZSTD_updateTree_extDict */
#define ZSTD_LITFREQ_ADD 2 /* sort of scaling factor for litSum and litFreq (why the need ?), but also used for matchSum ? */
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */
#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
#define ZSTD_MAX_PRICE (1<<30)
@ -96,7 +96,8 @@ static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSiz
}
static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 const litLength, const BYTE* literals)
static U32 ZSTD_getLiteralPrice(optState_t* const optPtr,
U32 const litLength, const BYTE* const literals)
{
U32 price;
@ -138,17 +139,22 @@ static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 const litLength, const B
}
FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
/* ZSTD_getPrice() :
* optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr,
U32 const litLength, const BYTE* const literals, U32 const offset, U32 const matchLength,
int const optLevel)
{
BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
U32 const mlBase = matchLength - MINMATCH;
U32 price;
U32 const offCode = ZSTD_highbit32(offset+1);
U32 const mlBase = matchLength - MINMATCH;
assert(matchLength >= MINMATCH);
if (optPtr->staticPrices) /* fixed scheme, do not use statistics */
return ZSTD_getLiteralPrice(optPtr, litLength, literals) + ZSTD_highbit32((U32)mlBase+1) + 16 + offCode;
price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1);
if (!ultra /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */
if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */
/* match Length */
{ U32 const mlCode = ZSTD_MLcode(mlBase);
@ -161,25 +167,26 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const
}
static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
static void ZSTD_updateStats(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offsetCode, U32 matchLength)
{
U32 u;
/* literals */
optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
for (u=0; u < litLength; u++)
optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
{ U32 u;
for (u=0; u < litLength; u++)
optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
}
/* literal Length */
{ const U32 llCode = ZSTD_LLcode(litLength);
{ U32 const llCode = ZSTD_LLcode(litLength);
optPtr->litLengthFreq[llCode]++;
optPtr->litLengthSum++;
}
/* match offset */
{ BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
optPtr->offCodeSum++;
/* match offset code (0-2=>repCode; 3+=>offset+2) */
{ U32 const offCode = ZSTD_highbit32(offsetCode+1);
assert(offCode <= MaxOff);
optPtr->offCodeFreq[offCode]++;
optPtr->offCodeSum++;
}
/* match Length */
@ -188,8 +195,6 @@ static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* lite
optPtr->matchLengthFreq[mlCode]++;
optPtr->matchLengthSum++;
}
ZSTD_setLog2Prices(optPtr);
}
@ -235,9 +240,9 @@ FORCE_INLINE_TEMPLATE
U32 ZSTD_insertBtAndGetAllMatches (
ZSTD_CCtx* zc,
const BYTE* const ip, const BYTE* const iLimit, const int extDict,
U32 nbCompares, U32 const mls,
U32 nbCompares, U32 const mls, U32 const sufficient_len,
U32 rep[ZSTD_REP_NUM], U32 const ll0,
ZSTD_match_t* matches, const U32 minMatchLen)
ZSTD_match_t* matches, const U32 lengthToBeat)
{
const BYTE* const base = zc->base;
U32 const current = (U32)(ip-base);
@ -258,11 +263,11 @@ U32 ZSTD_insertBtAndGetAllMatches (
U32 const windowLow = zc->lowLimit;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = bt + 2*(current&btMask) + 1;
U32 matchEndIdx = current+8; /* farthest referenced position of any match => detects repetitive patterns */
U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
U32 dummy32; /* to be nullified at the end */
U32 mnum = 0;
size_t bestLength = minMatchLen-1;
size_t bestLength = lengthToBeat-1;
DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches");
/* check repCode */
@ -294,7 +299,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
matches[mnum].off = repCode - ll0;
matches[mnum].len = (U32)repLen;
mnum++;
if ( (repLen > ZSTD_OPT_NUM)
if ( (repLen > sufficient_len)
| (ip+repLen == iLimit) ) { /* best possible */
return mnum;
} } } }
@ -323,8 +328,9 @@ U32 ZSTD_insertBtAndGetAllMatches (
matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
matches[0].len = (U32)mlen;
mnum = 1;
if ( (mlen > ZSTD_OPT_NUM)
| (ip+mlen == iLimit) ) { /* best possible */
if ( (mlen > sufficient_len) |
(ip+mlen == iLimit) ) { /* best possible length */
zc->nextToUpdate = current+1; /* skip insertion */
return 1;
} } } }
@ -339,9 +345,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
match = base + matchIndex;
if (match[matchLength] == ip[matchLength]) {
matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1;
}
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
} else {
match = dictBase + matchIndex;
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
@ -382,17 +386,18 @@ U32 ZSTD_insertBtAndGetAllMatches (
*smallerPtr = *largerPtr = 0;
zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; /* skip repetitive patterns */
assert(matchEndIdx > current+8);
zc->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
return mnum;
}
FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
ZSTD_CCtx* zc, /* Index table will be updated */
const BYTE* ip, const BYTE* const iHighLimit, const int extDict,
const U32 maxNbAttempts, const U32 matchLengthSearch,
const BYTE* ip, const BYTE* const iHighLimit, int const extDict,
U32 const maxNbAttempts, U32 const matchLengthSearch, U32 const sufficient_len,
U32 rep[ZSTD_REP_NUM], U32 const ll0,
ZSTD_match_t* matches, const U32 minMatchLen)
ZSTD_match_t* matches, U32 const lengthToBeat)
{
DEBUGLOG(7, "ZSTD_BtGetAllMatches");
if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
@ -400,12 +405,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
else ZSTD_updateTree(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch);
switch(matchLengthSearch)
{
case 3 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 3, rep, ll0, matches, minMatchLen);
case 3 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 3, sufficient_len, rep, ll0, matches, lengthToBeat);
default :
case 4 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 4, rep, ll0, matches, minMatchLen);
case 5 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 5, rep, ll0, matches, minMatchLen);
case 4 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 4, sufficient_len, rep, ll0, matches, lengthToBeat);
case 5 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 5, sufficient_len, rep, ll0, matches, lengthToBeat);
case 7 :
case 6 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 6, rep, ll0, matches, minMatchLen);
case 6 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 6, sufficient_len, rep, ll0, matches, lengthToBeat);
}
}
@ -452,7 +457,7 @@ repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
const void* src, size_t srcSize,
const int ultra, const int extDict)
const int optLevel, const int extDict)
{
seqStore_t* const seqStorePtr = &(ctx->seqStore);
optState_t* const optStatePtr = &(ctx->optState);
@ -488,7 +493,7 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
/* find first match */
{ U32 const litlen = (U32)(ip - anchor);
U32 const ll0 = !litlen;
U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, ip, iend, extDict, maxSearches, mls, rep, ll0, matches, minMatch);
U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, ip, iend, extDict, maxSearches, mls, sufficient_len, rep, ll0, matches, minMatch);
if (!nbMatches) { ip++; continue; }
/* initialize opt[0] */
@ -519,7 +524,7 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
U32 const end = matches[matchNb].len;
repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
while (pos <= end) {
U32 const matchPrice = ZSTD_getPrice(optStatePtr, litlen, anchor, offset, pos, ultra);
U32 const matchPrice = ZSTD_getPrice(optStatePtr, litlen, anchor, offset, pos, optLevel);
if (pos > last_pos || matchPrice < opt[pos].price) {
DEBUGLOG(7, "rPos:%u => set initial price : %u",
pos, matchPrice);
@ -548,16 +553,20 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
SET_PRICE(cur, 1/*mlen*/, 0/*offset*/, litlen, price, opt[cur-1].rep);
} }
if (cur == last_pos) break;
/* last match must start at a minimum distance of 8 from oend */
if (inr > ilimit) continue;
if (cur == last_pos) break;
if ( (optLevel==0) /*static*/
&& (opt[cur+1].price <= opt[cur].price) )
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
{ U32 const ll0 = (opt[cur].mlen != 1);
U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0;
U32 const basePrice = (cur > litlen) ? opt[cur-litlen].price : 0;
const BYTE* const baseLiterals = ip + cur - litlen;
U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, inr, iend, extDict, maxSearches, mls, opt[cur].rep, ll0, matches, minMatch);
U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, inr, iend, extDict, maxSearches, mls, sufficient_len, opt[cur].rep, ll0, matches, minMatch);
U32 matchNb;
if (!nbMatches) continue;
assert(baseLiterals >= prefixStart);
@ -577,26 +586,28 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
/* set prices using matches found at position == cur */
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
U32 mlen = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
U32 const lastML = matches[matchNb].len;
U32 const offset = matches[matchNb].off;
repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
U32 const lastML = matches[matchNb].len;
U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
U32 mlen;
DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u",
matchNb, matches[matchNb].off, lastML, litlen);
while (mlen <= lastML) {
for (mlen = lastML; mlen >= startML; mlen--) {
U32 const pos = cur + mlen;
U32 const price = basePrice + ZSTD_getPrice(optStatePtr, litlen, baseLiterals, offset, mlen, ultra);
U32 const price = basePrice + ZSTD_getPrice(optStatePtr, litlen, baseLiterals, offset, mlen, optLevel);
if ((pos > last_pos) || (price < opt[pos].price)) {
DEBUGLOG(7, "rPos:%u => new better price (%u<%u)",
pos, price, opt[pos].price);
SET_PRICE(pos, mlen, offset, litlen, price, repHistory); /* note : macro modifies last_pos */
} else {
if (optLevel==0) break; /* gets ~+10% speed for about -0.01 ratio loss */
}
mlen++;
} } } }
} } }
} /* for (cur = 1; cur <= last_pos; cur++) */
best_mlen = opt[last_pos].mlen;
best_off = opt[last_pos].off;
@ -646,11 +657,12 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
}
}
ZSTD_updatePrice(optStatePtr, llen, anchor, offset, mlen);
ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen);
ZSTD_storeSeq(seqStorePtr, llen, anchor, offset, mlen-MINMATCH);
anchor = ip;
} }
} /* for (cur=0; cur < last_pos; ) */
ZSTD_setLog2Prices(optStatePtr);
} /* while (ip < ilimit) */
/* Save reps for next block */
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }
@ -663,20 +675,20 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
{
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*ultra*/, 0 /*extDict*/);
return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/);
}
size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
{
return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1 /*ultra*/, 0 /*extDict*/);
return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/);
}
size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
{
return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*ultra*/, 1 /*extDict*/);
return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/);
}
size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
{
return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1 /*ultra*/, 1 /*extDict*/);
return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/);
}

View File

@ -310,14 +310,10 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
}
const char* g_stratName[] = { "ZSTD_fast ",
"ZSTD_dfast ",
"ZSTD_greedy ",
"ZSTD_lazy ",
"ZSTD_lazy2 ",
"ZSTD_btlazy2 ",
"ZSTD_btopt ",
"ZSTD_btultra "};
const char* g_stratName[ZSTD_btultra+1] = {
"(none) ", "ZSTD_fast ", "ZSTD_dfast ",
"ZSTD_greedy ", "ZSTD_lazy ", "ZSTD_lazy2 ",
"ZSTD_btlazy2 ", "ZSTD_btopt ", "ZSTD_btultra "};
static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_compressionParameters params, size_t srcSize)
{