diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 5e42039a..26a1a1a8 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -100,7 +100,7 @@ static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 litLength, const BYTE* l U32 price; if (optPtr->staticPrices) - return ZSTD_highbit32((U32)litLength+1) + (litLength*6); /* 6 bit per literal - no real estimation */ + return ZSTD_highbit32((U32)litLength+1) + (litLength*6); /* 6 bit per literal - no statistic used */ if (litLength == 0) return optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[0]+1); @@ -414,8 +414,8 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const int ultra) { - seqStore_t* seqStorePtr = &(ctx->seqStore); - optState_t* optStatePtr = &(ctx->optState); + seqStore_t* const seqStorePtr = &(ctx->seqStore); + optState_t* const optStatePtr = &(ctx->optState); const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; @@ -441,223 +441,223 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, /* Match Loop */ while (ip < ilimit) { - U32 cur, match_num, last_pos = 0, litlen, price; - U32 u, mlen, best_mlen, best_off, litLength, offset; + U32 cur, last_pos = 0; + U32 best_mlen, best_off; + U32 const initLL = (U32)(ip - anchor); memset(opt, 0, sizeof(ZSTD_optimal_t)); - litlen = (U32)(ip - anchor); /* check repCode */ - { U32 const ll0 = (ip==anchor); + { U32 const ll0 = !initLL; U32 const lastR = ZSTD_REP_CHECK + ll0; U32 repCode; for (repCode = ll0; repCode < lastR; repCode++) { S32 const repOffset = (repCode==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[repCode]; if ( (repOffset > 0) - && (repOffset < (S32)(ip-prefixStart)) /* only check within current mem segment */ + && (repOffset < (S32)(ip-prefixStart)) /* within current mem segment */ && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { U32 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iend) + minMatch; if (repLen > sufficient_len) { /* large repMatch => immediate encoding */ best_mlen = repLen; best_off = repCode; cur = 0; last_pos = 1; - goto _storeSequence; + goto _shortestPath; } do { - U32 const repPrice = ZSTD_getPrice(optStatePtr, litlen, anchor, repCode - ll0, repLen, ultra); + U32 const repPrice = ZSTD_getPrice(optStatePtr, initLL, anchor, repCode - ll0, repLen, ultra); if (repLen > last_pos || repPrice < opt[repLen].price) - SET_PRICE(repLen, repLen, repCode, litlen, repPrice); /* note : macro modifies last_pos */ + SET_PRICE(repLen, repLen, repCode, initLL, repPrice); /* note : macro modifies last_pos */ repLen--; } while (repLen >= minMatch); } } } - match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch); + { U32 const nb_matches = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch); - if (!last_pos /*no repCode*/ && !match_num /*no match*/) { ip++; continue; } + if (!last_pos /*no repCode*/ && !nb_matches /*no match*/) { ip++; continue; } - if (match_num && (matches[match_num-1].len > sufficient_len)) { - /* large match => immediate encoding */ - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - cur = 0; - last_pos = 1; - goto _storeSequence; - } + if (nb_matches && (matches[nb_matches-1].len > sufficient_len)) { + /* large match => immediate encoding */ + best_mlen = matches[nb_matches-1].len; + best_off = matches[nb_matches-1].off; + cur = 0; + last_pos = 1; + goto _shortestPath; + } - /* set prices for first matches from position == 0 */ - { U32 matchNb; - for (matchNb = 0; matchNb < match_num; matchNb++) { - U32 pos = (matchNb==0) ? (last_pos /*some repCode (assumed cheaper)*/ ? last_pos : minMatch) - : matches[matchNb-1].len+1; - U32 const end = matches[matchNb].len; - while (pos <= end) { - U32 const matchPrice = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[matchNb].off-1, pos, ultra); - if (pos > last_pos || matchPrice < opt[pos].price) - SET_PRICE(pos, pos, matches[matchNb].off, litlen, matchPrice); /* note : macro modifies last_pos */ - pos++; - } } } + /* set prices for first matches from position == 0 */ + { U32 matchNb; + U32 pos = last_pos /*some repCode (assumed cheaper)*/ ? last_pos : minMatch; + for (matchNb = 0; matchNb < nb_matches; matchNb++) { + U32 const end = matches[matchNb].len; + while (pos <= end) { + U32 const matchPrice = ZSTD_getPrice(optStatePtr, initLL, anchor, matches[matchNb].off-1, pos, ultra); + if (pos > last_pos || matchPrice < opt[pos].price) + SET_PRICE(pos, pos, matches[matchNb].off, initLL, matchPrice); /* note : macro modifies last_pos */ + pos++; + } } } } if (last_pos < minMatch) { ip++; continue; } /* initialize opt[0] */ { U32 i ; for (i=0; i litlen) { price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen); - } else + } else { price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor); - } else { - litlen = 1; - price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1); - } - - if (cur > last_pos || price <= opt[cur].price) - SET_PRICE(cur, 1, 0, litlen, price); /* note : macro modifies last_pos */ - - if (cur == last_pos) break; - - if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ - continue; - - mlen = opt[cur].mlen; - if (opt[cur].off > ZSTD_REP_MOVE_OPT) { - opt[cur].rep[2] = opt[cur-mlen].rep[1]; - opt[cur].rep[1] = opt[cur-mlen].rep[0]; - opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; - } else { - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; - opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; - /* If opt[cur].off == ZSTD_REP_MOVE_OPT, then mlen != 1. - * offset ZSTD_REP_MOVE_OPT is used for the special case - * litLength == 0, where offset 0 means something special. - * mlen == 1 means the previous byte was stored as a literal, - * so they are mutually exclusive. - */ - assert(!(opt[cur].off == ZSTD_REP_MOVE_OPT && mlen == 1)); - opt[cur].rep[0] = (opt[cur].off == ZSTD_REP_MOVE_OPT) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); - } - - best_mlen = minMatch; - { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); - for (i=(opt[cur].mlen != 1); i 0) && (repCur < (S32)(inr-prefixStart)) - && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) { - mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch; - - if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; last_pos = cur + 1; - goto _storeSequence; - } - - best_off = i - (opt[cur].mlen != 1); - if (mlen > best_mlen) best_mlen = mlen; - - do { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen, ultra); - } else - price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen, ultra); - } - - if (cur + mlen > last_pos || price <= opt[cur + mlen].price) - SET_PRICE(cur + mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ - mlen--; - } while (mlen >= minMatch); - } } } - - match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen); - - if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - last_pos = cur + 1; - goto _storeSequence; + } + if (cur > last_pos || price <= opt[cur].price) + SET_PRICE(cur, 1, 0, litlen, price); /* note : macro modifies last_pos */ } - /* set prices using matches at position = cur */ - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = matches[u].len; + if (cur == last_pos) break; - while (mlen <= best_mlen) { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; + /* last match must start at a minimum distance of 8 from oend */ + if (inr > ilimit) continue; + + /* update repcodes */ + { U32 const mlen = opt[cur].mlen; + if (opt[cur].off > ZSTD_REP_MOVE_OPT) { + opt[cur].rep[2] = opt[cur-mlen].rep[1]; + opt[cur].rep[1] = opt[cur-mlen].rep[0]; + opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; + } else { + opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; + opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + /* If opt[cur].off == ZSTD_REP_MOVE_OPT, then mlen != 1. + * offset ZSTD_REP_MOVE_OPT is used for the special case + * litLength == 0, where offset 0 means something special. + * mlen == 1 means the previous byte was stored as a literal, + * so they are mutually exclusive. + */ + assert(!(opt[cur].off == ZSTD_REP_MOVE_OPT && mlen == 1)); + opt[cur].rep[0] = (opt[cur].off == ZSTD_REP_MOVE_OPT) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); + } } + + best_mlen = minMatch; + { U32 const ll0 = (opt[cur].mlen != 1); + U32 const lastR = ZSTD_REP_CHECK + ll0; + U32 repCode4; /* universal referential */ + for (repCode4=ll0; repCode4 0) && (repCur < (S32)(inr-prefixStart)) /* within current mem segment */ + && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) { + U32 matchLength = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch; + U32 const repCode3 = repCode4 - ll0; /* contextual referential, depends on ll0 */ + assert(repCode3 < 3); + + if (matchLength > sufficient_len || cur + matchLength >= ZSTD_OPT_NUM) { + best_mlen = matchLength; + best_off = repCode4; + last_pos = cur + 1; + goto _shortestPath; + } + + if (matchLength > best_mlen) best_mlen = matchLength; + + do { + U32 const litlen = ll0 ? 0 : opt[cur].litlen; + U32 price; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, repCode3, matchLength, ultra); + } else { + price = ZSTD_getPrice(optStatePtr, litlen, anchor, repCode3, matchLength, ultra); + } + + if (cur + matchLength > last_pos || price <= opt[cur + matchLength].price) + SET_PRICE(cur + matchLength, matchLength, repCode4, litlen, price); /* note : macro modifies last_pos */ + matchLength--; + } while (matchLength >= minMatch); + } } } + + { U32 const nb_matches = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen /*largest repLength*/); /* search for matches larger than repcodes */ + U32 matchNb; + + if (nb_matches > 0 && (matches[nb_matches-1].len > sufficient_len || cur + matches[nb_matches-1].len >= ZSTD_OPT_NUM)) { + best_mlen = matches[nb_matches-1].len; + best_off = matches[nb_matches-1].off; + last_pos = cur + 1; + goto _shortestPath; + } + + /* set prices using matches at position = cur */ + for (matchNb = 0; matchNb < nb_matches; matchNb++) { + U32 mlen = (matchNb>0) ? matches[matchNb-1].len+1 : best_mlen; + U32 const lastML = matches[matchNb].len; + + while (mlen <= lastML) { + U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0; + U32 price; if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen, ultra); + price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[matchNb].off-1, mlen, ultra); else - price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen, ultra); - } + price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[matchNb].off-1, mlen, ultra); - if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) - SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */ + if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) + SET_PRICE(cur + mlen, mlen, matches[matchNb].off, litlen, price); /* note : macro modifies last_pos */ - mlen++; - } } } + mlen++; + } } } } best_mlen = opt[last_pos].mlen; best_off = opt[last_pos].off; cur = last_pos - best_mlen; - /* store sequence */ -_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ +_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ opt[0].mlen = 1; - while (1) { - mlen = opt[cur].mlen; - offset = opt[cur].off; - opt[cur].mlen = best_mlen; - opt[cur].off = best_off; - best_mlen = mlen; - best_off = offset; - if (mlen > cur) break; - cur -= mlen; - } + /* reverse traversal */ + { U32 selected_matchLength = best_mlen; + U32 selectedOffset = best_off; + U32 pos = cur; + while (1) { + U32 const mlen = opt[pos].mlen; + U32 const off = opt[pos].off; + opt[pos].mlen = selected_matchLength; + opt[pos].off = selectedOffset; + selected_matchLength = mlen; + selectedOffset = off; + if (mlen > pos) break; + pos -= mlen; + } } - for (u = 0; u <= last_pos;) { - u += opt[u].mlen; - } + /* save sequences */ + { U32 pos; + for (pos=0; pos < last_pos; ) { + U32 const litLength = (U32)(ip - anchor); + U32 const mlen = opt[pos].mlen; + U32 offset = opt[pos].off; + if (mlen == 1) { ip++; pos++; continue; } + pos += mlen; - for (cur=0; cur < last_pos; ) { - mlen = opt[cur].mlen; - if (mlen == 1) { ip++; cur++; continue; } - offset = opt[cur].off; - cur += mlen; - litLength = (U32)(ip - anchor); - - if (offset > ZSTD_REP_MOVE_OPT) { - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = offset - ZSTD_REP_MOVE_OPT; - offset--; - } else { - if (offset != 0) { - best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); - if (offset != 1) rep[2] = rep[1]; + /* repcodes update */ + if (offset > ZSTD_REP_MOVE_OPT) { /* full offset */ + rep[2] = rep[1]; rep[1] = rep[0]; - rep[0] = best_off; + rep[0] = offset - ZSTD_REP_MOVE_OPT; + offset--; + } else { /* repcode */ + if (offset != 0) { + U32 const currentOffset = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[offset]; + if (offset != 1) rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = currentOffset; + } + if (litLength==0) offset--; } - if (litLength==0) offset--; - } - ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); - anchor = ip = ip + mlen; - } + ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + anchor = ip = ip + mlen; + } } } /* for (cur=0; cur < last_pos; ) */ /* Save reps for next block */