diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index ee191f35..17ae1a77 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -69,6 +69,7 @@ #define ZSTD_REP_NUM 3 #define ZSTD_REP_INIT ZSTD_REP_NUM #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) +static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; #define KB *(1 <<10) #define MB *(1 <<20) @@ -99,7 +100,6 @@ typedef enum { lbt_huffman, lbt_repeat, lbt_raw, lbt_rle } litBlockType_t; #define MINMATCH 3 #define EQUAL_READ32 4 -#define REPCODE_STARTVALUE 1 #define Litbits 8 #define MaxLit ((1<params = params; zc->blockSize = blockSize; zc->frameContentSize = frameContentSize; + { int i; for (i=0; irep[i] = repStartValue[i]; } if (params.cParams.strategy == ZSTD_btopt) { zc->seqStore.litFreq = (U32*)(zc->seqStore.buffer); @@ -918,6 +921,9 @@ _check_compressibility: size_t const maxCSize = srcSize - minGain; if ((size_t)(op-ostart) >= maxCSize) return 0; } + /* confirm repcodes */ + { int i; for (i=0; irep[i] = zc->savedRep[i]; } + return op - ostart; } @@ -929,11 +935,11 @@ _check_compressibility: */ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offsetCode, size_t matchCode) { -#if 0 /* for debug */ +#if 1 /* for debug */ static const BYTE* g_start = NULL; const U32 pos = (U32)(literals - g_start); if (g_start==NULL) g_start = literals; - if ((pos > 2587900) && (pos < 2588050)) + //if ((pos > 1) && (pos < 50000)) printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif @@ -1104,29 +1110,33 @@ static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) FORCE_INLINE -void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, +void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, const void* src, size_t srcSize, const U32 mls) { - U32* const hashTable = zc->hashTable; - const U32 hBits = zc->params.cParams.hashLog; - seqStore_t* seqStorePtr = &(zc->seqStore); - const BYTE* const base = zc->base; + U32* const hashTable = cctx->hashTable; + const U32 hBits = cctx->params.cParams.hashLog; + seqStore_t* seqStorePtr = &(cctx->seqStore); + const BYTE* const base = cctx->base; const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; - const U32 lowIndex = zc->dictLimit; - const BYTE* const lowest = base + lowIndex; + const U32 lowestIndex = cctx->dictLimit; + const BYTE* const lowest = base + lowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; - size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; + size_t offset_1=cctx->rep[0], offset_2=cctx->rep[1]; /* init */ ZSTD_resetSeqStore(seqStorePtr); - if (ip < lowest+REPCODE_STARTVALUE) ip = lowest+REPCODE_STARTVALUE; + ip += (ip==lowest); + { U32 const maxRep = (U32)(ip-lowest); + if (offset_1 > maxRep) offset_1 = 0; + if (offset_2 > maxRep) offset_2 = 0; + } /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ size_t mlCode; size_t offset; size_t const h = ZSTD_hashPtr(ip, hBits, mls); @@ -1140,7 +1150,7 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, ip++; ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mlCode-MINMATCH); } else { - if ( (matchIndex <= lowIndex) || + if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; @@ -1164,7 +1174,7 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); /* check immediate repcode */ while ( (ip <= ilimit) - && ( (offset>0) + && ( (offset_2>0) & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { /* store sequence */ size_t const rlCode = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32; @@ -1176,6 +1186,10 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc, continue; /* faster when present ... (?) */ } } } + /* save reps for next block */ + cctx->savedRep[0] = offset_1 ? offset_1 : (U32)(iend-base); + cctx->savedRep[1] = offset_2 ? offset_2 : (U32)(iend-base); + /* Last Literals */ { size_t const lastLLSize = iend - anchor; memcpy(seqStorePtr->lit, anchor, lastLLSize); @@ -1222,15 +1236,13 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; - - U32 offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE; - + U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1]; /* init */ ZSTD_resetSeqStore(seqStorePtr); /* skip first position to avoid read overflow during repcode match check */ - hashTable[ZSTD_hashPtr(ip+0, hBits, mls)] = (U32)(ip-base+0); - ip += REPCODE_STARTVALUE; + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); + ip++; /* Main Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ @@ -1246,8 +1258,8 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, U32 offset; hashTable[h] = current; /* update hash table */ - if ( ((repIndex >= dictLimit) | ((repIndex <= dictLimit-4) & (repIndex>lowestIndex))) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + if ( (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; mlCode = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32; ip++; @@ -1281,8 +1293,8 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, U32 const current2 = (U32)(ip-base); U32 const repIndex2 = current2 - offset_2; const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; - if ( ( ((repIndex2>lowestIndex) & (repIndex2 <= dictLimit-4)) | (repIndex2 >= dictLimit) ) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32; U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ @@ -1295,6 +1307,9 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, break; } } } + /* save reps for next block */ + ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2; + /* Last Literals */ { size_t const lastLLSize = iend - anchor; memcpy(seqStorePtr->lit, anchor, lastLLSize); @@ -1723,14 +1738,18 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, size_t* offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch); searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; + U32 rep[ZSTD_REP_INIT]; /* init */ - U32 rep[ZSTD_REP_INIT]; - { U32 i ; for (i=0; inextToUpdate3 = ctx->nextToUpdate; ZSTD_resetSeqStore(seqStorePtr); - if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE; + { U32 i; + U32 const maxRep = (U32)(ip-base); + for (i=0; irep[i]; + if (rep[i]>maxRep) rep[i]=0; + } } /* Match Loop */ while (ip < ilimit) { @@ -1825,6 +1844,13 @@ _storeSequence: continue; /* faster when present ... (?) */ } } + /* Save reps for next block */ + { int i; + for (i=0; isavedRep[i] = rep[i]; + } } + /* Last Literals */ { size_t const lastLLSize = iend - anchor; memcpy(seqStorePtr->lit, anchor, lastLLSize); @@ -1884,11 +1910,11 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, /* init */ U32 rep[ZSTD_REP_INIT]; - { U32 i; for (i=0; irep[i]; } ctx->nextToUpdate3 = ctx->nextToUpdate; ZSTD_resetSeqStore(seqStorePtr); - if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; + ip += (ip == prefixStart); /* Match Loop */ while (ip < ilimit) { @@ -1898,8 +1924,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, U32 current = (U32)(ip-base); /* check repCode */ - { - const U32 repIndex = (U32)(current+1 - rep[0]); + { const U32 repIndex = (U32)(current+1 - rep[0]); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ @@ -2020,6 +2045,9 @@ _storeSequence: break; } } + /* Save reps for next block */ + ctx->savedRep[0] = rep[0]; ctx->savedRep[1] = rep[1]; ctx->savedRep[2] = rep[2]; + /* Last Literals */ { size_t const lastLLSize = iend - anchor; memcpy(seqStorePtr->lit, anchor, lastLLSize); diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index 3995deeb..7c0b6418 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -461,15 +461,19 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, ZSTD_optimal_t* opt = seqStorePtr->priceTable; ZSTD_match_t* matches = seqStorePtr->matchTable; const BYTE* inr; + U32 offset, rep[ZSTD_REP_INIT]; /* init */ - U32 offset, rep[ZSTD_REP_INIT]; - { U32 i; for (i=0; inextToUpdate3 = ctx->nextToUpdate; ZSTD_resetSeqStore(seqStorePtr); ZSTD_rescaleFreqs(seqStorePtr); - if ((ip-prefixStart) < REPCODE_STARTVALUE) ip = prefixStart + REPCODE_STARTVALUE; + ip += (ip==prefixStart); + { U32 i; + U32 const maxRep = (ip-prefixStart); + for (i=0; irep[i]; + if (rep[i]>maxRep) rep[i]=0; + } } ZSTD_LOG_BLOCK("%d: COMPBLOCK_OPT_GENERIC srcSz=%d maxSrch=%d mls=%d sufLen=%d\n", (int)(ip-base), (int)srcSize, maxSearches, mls, sufficient_len); @@ -713,8 +717,15 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ anchor = ip = ip + mlen; } } /* for (cur=0; cur < last_pos; ) */ - { /* Last Literals */ - size_t lastLLSize = iend - anchor; + /* Save reps for next block */ + { int i; + for (i=0; isavedRep[i] = rep[i]; + } } + + /* Last Literals */ + { size_t lastLLSize = iend - anchor; ZSTD_LOG_ENCODE("%d: lastLLSize literals=%u\n", (int)(ip-base), (U32)lastLLSize); memcpy(seqStorePtr->lit, anchor, lastLLSize); seqStorePtr->lit += lastLLSize; @@ -750,12 +761,12 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, /* init */ U32 offset, rep[ZSTD_REP_INIT]; - { U32 i; for (i=0; irep[i]; } ctx->nextToUpdate3 = ctx->nextToUpdate; ZSTD_resetSeqStore(seqStorePtr); ZSTD_rescaleFreqs(seqStorePtr); - if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE; + ip += (ip==prefixStart); ZSTD_LOG_BLOCK("%d: COMPBLOCK_OPT_EXTDICT srcSz=%d maxSrch=%d mls=%d sufLen=%d\n", (int)(ip-base), (int)srcSize, maxSearches, mls, sufficient_len); @@ -1027,8 +1038,11 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ anchor = ip = ip + mlen; } } /* for (cur=0; cur < last_pos; ) */ - { /* Last Literals */ - size_t lastLLSize = iend - anchor; + /* Save reps for next block */ + ctx->savedRep[0] = rep[0]; ctx->savedRep[1] = rep[1]; ctx->savedRep[2] = rep[2]; + + /* Last Literals */ + { size_t lastLLSize = iend - anchor; ZSTD_LOG_ENCODE("%d: lastLLSize literals=%u\n", (int)(ip-base), (U32)(lastLLSize)); memcpy(seqStorePtr->lit, anchor, lastLLSize); seqStorePtr->lit += lastLLSize; diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index e76368d4..4ac9dd53 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -118,6 +118,7 @@ struct ZSTD_DCtx_s const void* vBase; const void* dictEnd; size_t expected; + U32 rep[3]; ZSTD_frameParams fParams; blockType_t bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */ ZSTD_dStage stage; @@ -147,6 +148,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) dctx->hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); dctx->litEntropy = dctx->fseEntropy = 0; dctx->dictID = 0; + { int i; for (i=0; irep[i] = repStartValue[i]; } return 0; } @@ -855,13 +857,13 @@ static size_t ZSTD_decompressSequences( { size_t const seqHSize = ZSTD_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, dctx->fseEntropy, ip, seqSize); if (ZSTD_isError(seqHSize)) return seqHSize; ip += seqHSize; - dctx->fseEntropy = 1; } /* Regen sequences */ if (nbSeq) { seqState_t seqState; - { U32 i; for (i=0; ifseEntropy = 1; + { U32 i; for (i=0; irep[i]; } { size_t const errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip); if (ERR_isError(errorCode)) return ERROR(corruption_detected); } FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); @@ -870,7 +872,7 @@ static size_t ZSTD_decompressSequences( for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { nbSeq--; - { seq_t const sequence = ZSTD_decodeSequence(&seqState); + { seq_t const sequence = ZSTD_decodeSequence(&seqState); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd); if (ZSTD_isError(oneSeqSize)) return oneSeqSize; op += oneSeqSize; @@ -878,6 +880,8 @@ static size_t ZSTD_decompressSequences( /* check if reached exact end */ if (nbSeq) return ERROR(corruption_detected); + /* save reps for next block */ + { U32 i; for (i=0; irep[i] = seqState.prevOffset[i]; } } /* last literal segment */