diff --git a/build/VS2008/fullbench/fullbench.vcproj b/build/VS2008/fullbench/fullbench.vcproj index 94293456..05ec5ca0 100644 --- a/build/VS2008/fullbench/fullbench.vcproj +++ b/build/VS2008/fullbench/fullbench.vcproj @@ -388,6 +388,22 @@ RelativePath="..\..\..\tests\fullbench.c" > + + + + + + + + + + + + + + + + diff --git a/build/VS2008/fuzzer/fuzzer.vcproj b/build/VS2008/fuzzer/fuzzer.vcproj index f1719e8a..700dd7eb 100644 --- a/build/VS2008/fuzzer/fuzzer.vcproj +++ b/build/VS2008/fuzzer/fuzzer.vcproj @@ -400,6 +400,22 @@ RelativePath="..\..\..\lib\decompress\zstd_decompress.c" > + + + + + + + + + + + + + + + + diff --git a/build/VS2008/zstd/zstd.vcproj b/build/VS2008/zstd/zstd.vcproj index 2e2923d5..86dd3a25 100644 --- a/build/VS2008/zstd/zstd.vcproj +++ b/build/VS2008/zstd/zstd.vcproj @@ -444,6 +444,22 @@ RelativePath="..\..\..\lib\compress\zstdmt_compress.c" > + + + + + + + + - - @@ -558,6 +570,26 @@ RelativePath="..\..\..\lib\compress\zstdmt_compress.h" > + + + + + + + + + + diff --git a/build/VS2008/zstdlib/zstdlib.vcproj b/build/VS2008/zstdlib/zstdlib.vcproj index ac85f7ae..ac8f896c 100644 --- a/build/VS2008/zstdlib/zstdlib.vcproj +++ b/build/VS2008/zstdlib/zstdlib.vcproj @@ -388,6 +388,22 @@ RelativePath="..\..\..\lib\compress\zstd_compress.c" > + + + + + + + + @@ -495,11 +511,27 @@ > + + + + + + + + + + + + @@ -180,6 +184,10 @@ + + + + diff --git a/build/VS2010/fuzzer/fuzzer.vcxproj b/build/VS2010/fuzzer/fuzzer.vcxproj index 12a4b931..9f00899d 100644 --- a/build/VS2010/fuzzer/fuzzer.vcxproj +++ b/build/VS2010/fuzzer/fuzzer.vcxproj @@ -165,6 +165,10 @@ + + + + @@ -183,6 +187,10 @@ + + + + diff --git a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj index 364b3bea..0a4be69d 100644 --- a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj +++ b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj @@ -29,6 +29,10 @@ + + + + @@ -67,6 +71,10 @@ + + + + diff --git a/build/VS2010/libzstd/libzstd.vcxproj b/build/VS2010/libzstd/libzstd.vcxproj index 6087d737..51b84067 100644 --- a/build/VS2010/libzstd/libzstd.vcxproj +++ b/build/VS2010/libzstd/libzstd.vcxproj @@ -29,6 +29,10 @@ + + + + @@ -67,6 +71,10 @@ + + + + diff --git a/build/VS2010/zstd/zstd.vcxproj b/build/VS2010/zstd/zstd.vcxproj index 438dc617..90470180 100644 --- a/build/VS2010/zstd/zstd.vcxproj +++ b/build/VS2010/zstd/zstd.vcxproj @@ -30,6 +30,10 @@ + + + + @@ -60,6 +64,10 @@ + + + + diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index 8371192c..f4b7e375 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -38,6 +38,10 @@ SET(Sources ${LIBRARY_DIR}/compress/huf_compress.c ${LIBRARY_DIR}/compress/zstd_compress.c ${LIBRARY_DIR}/compress/zstdmt_compress.c + ${LIBRARY_DIR}/compress/zstd_fast.c + ${LIBRARY_DIR}/compress/zstd_double_fast.c + ${LIBRARY_DIR}/compress/zstd_lazy.c + ${LIBRARY_DIR}/compress/zstd_opt.c ${LIBRARY_DIR}/decompress/huf_decompress.c ${LIBRARY_DIR}/decompress/zstd_decompress.c ${LIBRARY_DIR}/dictBuilder/cover.c @@ -58,6 +62,11 @@ SET(Headers ${LIBRARY_DIR}/common/huf.h ${LIBRARY_DIR}/common/mem.h ${LIBRARY_DIR}/common/zstd_internal.h + ${LIBRARY_DIR}/compress/zstd_compress.h + ${LIBRARY_DIR}/compress/zstd_fast.h + ${LIBRARY_DIR}/compress/zstd_double_fast.h + ${LIBRARY_DIR}/compress/zstd_lazy.h + ${LIBRARY_DIR}/compress/zstd_opt.h ${LIBRARY_DIR}/compress/zstdmt_compress.h ${LIBRARY_DIR}/dictBuilder/zdict.h ${LIBRARY_DIR}/deprecated/zbuff.h) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 978be3c8..ebb73f39 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -25,17 +25,13 @@ #include "fse.h" #define HUF_STATIC_LINKING_ONLY #include "huf.h" -#include "zstd_internal.h" /* includes zstd.h */ -#include "zstdmt_compress.h" +#include "zstd_compress.h" +#include "zstd_fast.h" +#include "zstd_double_fast.h" +#include "zstd_lazy.h" +#include "zstd_opt.h" -/*-************************************* -* Constants -***************************************/ -static const U32 g_searchStrength = 8; /* control skip over incompressible data */ -#define HASH_READ_SIZE 8 -typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; - /*-************************************* * Helper functions @@ -61,8 +57,6 @@ static void ZSTD_resetSeqStore(seqStore_t* ssPtr) /*-************************************* * Context memory management ***************************************/ -typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; - struct ZSTD_CDict_s { void* dictBuffer; const void* dictContent; @@ -70,64 +64,6 @@ struct ZSTD_CDict_s { ZSTD_CCtx* refContext; }; /* typedef'd to ZSTD_CDict within "zstd.h" */ -typedef struct ZSTD_prefixDict_s { - const void* dict; - size_t dictSize; - ZSTD_dictMode_e dictMode; -} ZSTD_prefixDict; - -struct ZSTD_CCtx_s { - const BYTE* nextSrc; /* next block here to continue on current prefix */ - const BYTE* base; /* All regular indexes relative to this position */ - const BYTE* dictBase; /* extDict indexes relative to this position */ - U32 dictLimit; /* below that point, need extDict */ - U32 lowLimit; /* below that point, no more data */ - U32 nextToUpdate; /* index from which to continue dictionary update */ - U32 nextToUpdate3; /* index from which to continue dictionary update */ - U32 hashLog3; /* dispatch table : larger == faster, more memory */ - U32 loadedDictEnd; /* index of end of dictionary */ - ZSTD_compressionStage_e stage; - U32 dictID; - ZSTD_CCtx_params requestedParams; - ZSTD_CCtx_params appliedParams; - void* workSpace; - size_t workSpaceSize; - size_t blockSize; - U64 pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ - U64 consumedSrcSize; - XXH64_state_t xxhState; - ZSTD_customMem customMem; - size_t staticSize; - - seqStore_t seqStore; /* sequences storage ptrs */ - optState_t optState; - U32* hashTable; - U32* hashTable3; - U32* chainTable; - ZSTD_entropyCTables_t* entropy; - - /* streaming */ - char* inBuff; - size_t inBuffSize; - size_t inToCompress; - size_t inBuffPos; - size_t inBuffTarget; - char* outBuff; - size_t outBuffSize; - size_t outBuffContentSize; - size_t outBuffFlushedSize; - ZSTD_cStreamStage streamStage; - U32 frameEnded; - - /* Dictionary */ - ZSTD_CDict* cdictLocal; - const ZSTD_CDict* cdict; - ZSTD_prefixDict prefixDict; /* single-usage dictionary */ - - /* Multi-threading */ - ZSTDMT_CCtx* mtctx; -}; - ZSTD_CCtx* ZSTD_createCCtx(void) { @@ -1145,24 +1081,6 @@ static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t * entropy, return lhSize+cLitSize; } -static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 16, 17, 17, 18, 18, 19, 19, - 20, 20, 20, 20, 21, 21, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, - 23, 23, 23, 23, 23, 23, 23, 23, - 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24 }; - -static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, - 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; - void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) { @@ -1449,1523 +1367,6 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, } -/*! ZSTD_storeSeq() : - Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. - `offsetCode` : distance to match, or 0 == repCode. - `matchCode` : matchLength - MINMATCH -*/ -MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode) -{ -#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6) - static const BYTE* g_start = NULL; - U32 const pos = (U32)((const BYTE*)literals - g_start); - if (g_start==NULL) g_start = (const BYTE*)literals; - if ((pos > 0) && (pos < 1000000000)) - DEBUGLOG(6, "Cpos %6u :%5u literals & match %3u bytes at distance %6u", - pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); -#endif - /* copy Literals */ - assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB); - ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); - seqStorePtr->lit += litLength; - - /* literal Length */ - if (litLength>0xFFFF) { - seqStorePtr->longLengthID = 1; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - } - seqStorePtr->sequences[0].litLength = (U16)litLength; - - /* match offset */ - seqStorePtr->sequences[0].offset = offsetCode + 1; - - /* match Length */ - if (matchCode>0xFFFF) { - seqStorePtr->longLengthID = 2; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - } - seqStorePtr->sequences[0].matchLength = (U16)matchCode; - - seqStorePtr->sequences++; -} - - -/*-************************************* -* Match length counter -***************************************/ -static unsigned ZSTD_NbCommonBytes (register size_t val) -{ - if (MEM_isLittleEndian()) { - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - _BitScanForward64( &r, (U64)val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_ctzll((U64)val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, - 0, 3, 1, 3, 1, 4, 2, 7, - 0, 2, 3, 6, 1, 5, 3, 5, - 1, 3, 4, 4, 2, 5, 6, 7, - 7, 0, 1, 2, 3, 3, 4, 6, - 2, 6, 5, 5, 3, 4, 5, 6, - 7, 1, 2, 4, 6, 4, 4, 5, - 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - unsigned long r=0; - _BitScanForward( &r, (U32)val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_ctz((U32)val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, - 3, 2, 2, 1, 3, 2, 0, 1, - 3, 3, 1, 2, 2, 2, 2, 0, - 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } - } else { /* Big Endian CPU */ - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_clzll(val) >> 3); -# else - unsigned r; - const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ - if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - unsigned long r = 0; - _BitScanReverse( &r, (unsigned long)val ); - return (unsigned)(r>>3); -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_clz((U32)val) >> 3); -# else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif - } } -} - - -static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) -{ - const BYTE* const pStart = pIn; - const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); - - while (pIn < pInLoopLimit) { - size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); - if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } - pIn += ZSTD_NbCommonBytes(diff); - return (size_t)(pIn - pStart); - } - if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } - if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } - if ((pIn> (32-h) ; } -MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ - -static const U32 prime4bytes = 2654435761U; -static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } -static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } - -static const U64 prime5bytes = 889523592379ULL; -static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } -static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } - -static const U64 prime6bytes = 227718039650203ULL; -static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } -static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } - -static const U64 prime7bytes = 58295818150454627ULL; -static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } -static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } - -static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; -static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } -static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } - -static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) -{ - switch(mls) - { - default: - case 4: return ZSTD_hash4Ptr(p, hBits); - case 5: return ZSTD_hash5Ptr(p, hBits); - case 6: return ZSTD_hash6Ptr(p, hBits); - case 7: return ZSTD_hash7Ptr(p, hBits); - case 8: return ZSTD_hash8Ptr(p, hBits); - } -} - - -/*-************************************* -* Fast Scan -***************************************/ -static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) -{ - U32* const hashTable = zc->hashTable; - U32 const hBits = zc->appliedParams.cParams.hashLog; - const BYTE* const base = zc->base; - const BYTE* ip = base + zc->nextToUpdate; - const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; - const size_t fastHashFillStep = 3; - - while(ip <= iend) { - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base); - ip += fastHashFillStep; - } -} - - -FORCE_INLINE_TEMPLATE -void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, - const void* src, size_t srcSize, - const U32 mls) -{ - U32* const hashTable = cctx->hashTable; - U32 const hBits = cctx->appliedParams.cParams.hashLog; - seqStore_t* seqStorePtr = &(cctx->seqStore); - const BYTE* const base = cctx->base; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 lowestIndex = cctx->dictLimit; - const BYTE* const lowest = base + lowestIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; - U32 offsetSaved = 0; - - /* init */ - ip += (ip==lowest); - { U32 const maxRep = (U32)(ip-lowest); - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; - } - - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ - size_t mLength; - size_t const h = ZSTD_hashPtr(ip, hBits, mls); - U32 const current = (U32)(ip-base); - U32 const matchIndex = hashTable[h]; - const BYTE* match = base + matchIndex; - hashTable[h] = current; /* update hash table */ - - if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { - mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - ip++; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); - } else { - U32 offset; - if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { - ip += ((ip-anchor) >> g_searchStrength) + 1; - continue; - } - mLength = ZSTD_count(ip+4, match+4, iend) + 4; - offset = (U32)(ip-match); - while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - } - - /* match found */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */ - hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); - /* check immediate repcode */ - while ( (ip <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } } - - /* save reps for next block */ - seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; - seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; - - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - - -static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, - const void* src, size_t srcSize) -{ - const U32 mls = ctx->appliedParams.cParams.searchLength; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return; - case 5 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return; - case 6 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return; - case 7 : - ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return; - } -} - - -static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, - const U32 mls) -{ - U32* hashTable = ctx->hashTable; - const U32 hBits = ctx->appliedParams.cParams.hashLog; - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const base = ctx->base; - const BYTE* const dictBase = ctx->dictBase; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 lowestIndex = ctx->lowLimit; - const BYTE* const dictStart = dictBase + lowestIndex; - const U32 dictLimit = ctx->dictLimit; - const BYTE* const lowPrefixPtr = base + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; - - /* Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - const size_t h = ZSTD_hashPtr(ip, hBits, mls); - const U32 matchIndex = hashTable[h]; - const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; - const BYTE* match = matchBase + matchIndex; - const U32 current = (U32)(ip-base); - const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ - const BYTE* repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* repMatch = repBase + repIndex; - size_t mLength; - hashTable[h] = current; /* update hash table */ - - if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4; - ip++; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); - } else { - if ( (matchIndex < lowestIndex) || - (MEM_read32(match) != MEM_read32(ip)) ) { - ip += ((ip-anchor) >> g_searchStrength) + 1; - continue; - } - { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; - const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; - U32 offset; - mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4; - while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - offset = current - matchIndex; - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - } } - - /* found a match : store it */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; - hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } } - - /* save reps for next block */ - seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; - - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - - -static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, - const void* src, size_t srcSize) -{ - U32 const mls = ctx->appliedParams.cParams.searchLength; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return; - case 5 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return; - case 6 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return; - case 7 : - ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return; - } -} - - -/*-************************************* -* Double Fast -***************************************/ -static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U32 mls) -{ - U32* const hashLarge = cctx->hashTable; - U32 const hBitsL = cctx->appliedParams.cParams.hashLog; - U32* const hashSmall = cctx->chainTable; - U32 const hBitsS = cctx->appliedParams.cParams.chainLog; - const BYTE* const base = cctx->base; - const BYTE* ip = base + cctx->nextToUpdate; - const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; - const size_t fastHashFillStep = 3; - - while(ip <= iend) { - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base); - hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base); - ip += fastHashFillStep; - } -} - - -FORCE_INLINE_TEMPLATE -void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, - const void* src, size_t srcSize, - const U32 mls) -{ - U32* const hashLong = cctx->hashTable; - const U32 hBitsL = cctx->appliedParams.cParams.hashLog; - U32* const hashSmall = cctx->chainTable; - const U32 hBitsS = cctx->appliedParams.cParams.chainLog; - seqStore_t* seqStorePtr = &(cctx->seqStore); - const BYTE* const base = cctx->base; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 lowestIndex = cctx->dictLimit; - const BYTE* const lowest = base + lowestIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; - U32 offsetSaved = 0; - - /* init */ - ip += (ip==lowest); - { U32 const maxRep = (U32)(ip-lowest); - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; - } - - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ - size_t mLength; - size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); - size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); - U32 const current = (U32)(ip-base); - U32 const matchIndexL = hashLong[h2]; - U32 const matchIndexS = hashSmall[h]; - const BYTE* matchLong = base + matchIndexL; - const BYTE* match = base + matchIndexS; - hashLong[h2] = hashSmall[h] = current; /* update hash tables */ - - assert(offset_1 <= current); /* supposed guaranteed by construction */ - if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { - /* favor repcode */ - mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - ip++; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); - } else { - U32 offset; - if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) { - mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; - offset = (U32)(ip-matchLong); - while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ - } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) { - size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); - U32 const matchIndexL3 = hashLong[hl3]; - const BYTE* matchL3 = base + matchIndexL3; - hashLong[hl3] = current + 1; - if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) { - mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; - ip++; - offset = (U32)(ip-matchL3); - while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ - } else { - mLength = ZSTD_count(ip+4, match+4, iend) + 4; - offset = (U32)(ip-match); - while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - } - } else { - ip += ((ip-anchor) >> g_searchStrength) + 1; - continue; - } - - offset_2 = offset_1; - offset_1 = offset; - - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - } - - /* match found */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); - - /* check immediate repcode */ - while ( (ip <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } } - - /* save reps for next block */ - seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; - seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; - - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - - -static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - const U32 mls = ctx->appliedParams.cParams.searchLength; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return; - case 5 : - ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return; - case 6 : - ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return; - case 7 : - ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return; - } -} - - -static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, - const U32 mls) -{ - U32* const hashLong = ctx->hashTable; - U32 const hBitsL = ctx->appliedParams.cParams.hashLog; - U32* const hashSmall = ctx->chainTable; - U32 const hBitsS = ctx->appliedParams.cParams.chainLog; - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const base = ctx->base; - const BYTE* const dictBase = ctx->dictBase; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 lowestIndex = ctx->lowLimit; - const BYTE* const dictStart = dictBase + lowestIndex; - const U32 dictLimit = ctx->dictLimit; - const BYTE* const lowPrefixPtr = base + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; - - /* Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); - const U32 matchIndex = hashSmall[hSmall]; - const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; - const BYTE* match = matchBase + matchIndex; - - const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); - const U32 matchLongIndex = hashLong[hLong]; - const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base; - const BYTE* matchLong = matchLongBase + matchLongIndex; - - const U32 current = (U32)(ip-base); - const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ - const BYTE* repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* repMatch = repBase + repIndex; - size_t mLength; - hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ - - if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4; - ip++; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); - } else { - if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { - const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend; - const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr; - U32 offset; - mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8; - offset = current - matchLongIndex; - while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - - } else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) { - size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); - U32 const matchIndex3 = hashLong[h3]; - const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base; - const BYTE* match3 = match3Base + matchIndex3; - U32 offset; - hashLong[h3] = current + 1; - if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { - const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend; - const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr; - mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8; - ip++; - offset = current+1 - matchIndex3; - while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ - } else { - const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; - const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; - mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4; - offset = current - matchIndex; - while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - } - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - - } else { - ip += ((ip-anchor) >> g_searchStrength) + 1; - continue; - } } - - /* found a match : store it */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; - hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2; - hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } } - - /* save reps for next block */ - seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; - - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - - -static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, - const void* src, size_t srcSize) -{ - U32 const mls = ctx->appliedParams.cParams.searchLength; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return; - case 5 : - ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return; - case 6 : - ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return; - case 7 : - ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return; - } -} - - -/*-************************************* -* Binary Tree search -***************************************/ -/** ZSTD_insertBt1() : add one or multiple positions to tree. -* ip : assumed <= iend-8 . -* @return : nb of positions added */ -static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares, - U32 extDict) -{ - U32* const hashTable = zc->hashTable; - U32 const hashLog = zc->appliedParams.cParams.hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32* const bt = zc->chainTable; - U32 const btLog = zc->appliedParams.cParams.chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 matchIndex = hashTable[h]; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const base = zc->base; - const BYTE* const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* match; - const U32 current = (U32)(ip-base); - const U32 btLow = btMask >= current ? 0 : current - btMask; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = smallerPtr + 1; - U32 dummy32; /* to be nullified at the end */ - U32 const windowLow = zc->lowLimit; - U32 matchEndIdx = current+8; - size_t bestLength = 8; -#ifdef ZSTD_C_PREDICT - U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); - U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); - predictedSmall += (predictedSmall>0); - predictedLarge += (predictedLarge>0); -#endif /* ZSTD_C_PREDICT */ - - hashTable[h] = current; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32* const nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - -#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ - const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ - if (matchIndex == predictedSmall) { - /* no need to check length, result known */ - *smallerPtr = matchIndex; - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - predictedSmall = predictPtr[1] + (predictPtr[1]>0); - continue; - } - if (matchIndex == predictedLarge) { - *largerPtr = matchIndex; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - predictedLarge = predictPtr[0] + (predictPtr[0]>0); - continue; - } -#endif - if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { - match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) - matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > bestLength) { - bestLength = matchLength; - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - } - - if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ - break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */ - - if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */ - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } - - *smallerPtr = *largerPtr = 0; - if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ - if (matchEndIdx > current + 8) return matchEndIdx - current - 8; - return 1; -} - - -static size_t ZSTD_insertBtAndFindBestMatch ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iend, - size_t* offsetPtr, - U32 nbCompares, const U32 mls, - U32 extDict) -{ - U32* const hashTable = zc->hashTable; - U32 const hashLog = zc->appliedParams.cParams.hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32* const bt = zc->chainTable; - U32 const btLog = zc->appliedParams.cParams.chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 matchIndex = hashTable[h]; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const base = zc->base; - const BYTE* const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const U32 current = (U32)(ip-base); - const U32 btLow = btMask >= current ? 0 : current - btMask; - const U32 windowLow = zc->lowLimit; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current+8; - U32 dummy32; /* to be nullified at the end */ - size_t bestLength = 0; - - hashTable[h] = current; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32* const nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE* match; - - if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { - match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) - matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > bestLength) { - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; - if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - - if (match[matchLength] < ip[matchLength]) { - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } - - *smallerPtr = *largerPtr = 0; - - zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; - return bestLength; -} - - -static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) -{ - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while(idx < target) - idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0); -} - -/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ -static size_t ZSTD_BtFindBestMatch ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 mls) -{ - if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0); -} - - -static size_t ZSTD_BtFindBestMatch_selectMLS ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - switch(matchLengthSearch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 7 : - case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); - } -} - - -static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) -{ - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1); -} - - -/** Tree updater, providing best match */ -static size_t ZSTD_BtFindBestMatch_extDict ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 mls) -{ - if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1); -} - - -static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - switch(matchLengthSearch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 7 : - case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); - } -} - - - -/* ********************************* -* Hash Chain -***********************************/ -#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] - -/* Update chains up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ -FORCE_INLINE_TEMPLATE -U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) -{ - U32* const hashTable = zc->hashTable; - const U32 hashLog = zc->appliedParams.cParams.hashLog; - U32* const chainTable = zc->chainTable; - const U32 chainMask = (1 << zc->appliedParams.cParams.chainLog) - 1; - const BYTE* const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while(idx < target) { /* catch up */ - size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); - NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; - hashTable[h] = idx; - idx++; - } - - zc->nextToUpdate = target; - return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; -} - - -/* inlining is important to hardwire a hot branch (template emulation) */ -FORCE_INLINE_TEMPLATE -size_t ZSTD_HcFindBestMatch_generic ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 mls, const U32 extDict) -{ - U32* const chainTable = zc->chainTable; - const U32 chainSize = (1 << zc->appliedParams.cParams.chainLog); - const U32 chainMask = chainSize-1; - const BYTE* const base = zc->base; - const BYTE* const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const U32 lowLimit = zc->lowLimit; - const U32 current = (U32)(ip-base); - const U32 minChain = current > chainSize ? current - chainSize : 0; - int nbAttempts=maxNbAttempts; - size_t ml=4-1; - - /* HC4 match finder */ - U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); - - for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { - const BYTE* match; - size_t currentMl=0; - if ((!extDict) || matchIndex >= dictLimit) { - match = base + matchIndex; - if (match[ml] == ip[ml]) /* potentially better */ - currentMl = ZSTD_count(ip, match, iLimit); - } else { - match = dictBase + matchIndex; - if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; - } - - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; - *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - - if (matchIndex <= minChain) break; - matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); - } - - return ml; -} - - -FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( - ZSTD_CCtx* zc, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - switch(matchLengthSearch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); - case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); - } -} - - -FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( - ZSTD_CCtx* zc, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - switch(matchLengthSearch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); - case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); - } -} - - -/* ******************************* -* Common parser - lazy strategy -*********************************/ -FORCE_INLINE_TEMPLATE -void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, - const U32 searchMethod, const U32 depth) -{ - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ctx->base + ctx->dictLimit; - - U32 const maxSearches = 1 << ctx->appliedParams.cParams.searchLog; - U32 const mls = ctx->appliedParams.cParams.searchLength; - - typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, - size_t* offsetPtr, - U32 maxNbAttempts, U32 matchLengthSearch); - searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; - U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1], savedOffset=0; - - /* init */ - ip += (ip==base); - ctx->nextToUpdate3 = ctx->nextToUpdate; - { U32 const maxRep = (U32)(ip-base); - if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; - if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; - } - - /* Match Loop */ - while (ip < ilimit) { - size_t matchLength=0; - size_t offset=0; - const BYTE* start=ip+1; - - /* check repCode */ - if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) { - /* repcode : we take it */ - matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - if (depth==0) goto _storeSequence; - } - - /* first search (depth 0) */ - { size_t offsetFound = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; - } - - if (matchLength < 4) { - ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } - - /* let's try to find a better solution */ - if (depth>=1) - while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { - size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; - int const gain2 = (int)(mlRep * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - { size_t offset2=99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); - if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } } - - /* let's find an even better one */ - if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { - size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; - int const gain2 = (int)(ml2 * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); - if ((ml2 >= 4) && (gain2 > gain1)) - matchLength = ml2, offset = 0, start = ip; - } - { size_t offset2=99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); - if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; - } } } - break; /* nothing found : store previous solution */ - } - - /* NOTE: - * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. - * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which - * overflows the pointer, which is undefined behavior. - */ - /* catch up */ - if (offset) { - while ( (start > anchor) - && (start > base+offset-ZSTD_REP_MOVE) - && (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1]) ) /* only search for offset within prefix */ - { start--; matchLength++; } - offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); - } - /* store sequence */ -_storeSequence: - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); - anchor = ip = start + matchLength; - } - - /* check immediate repcode */ - while ( (ip <= ilimit) - && ((offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } - - /* Save reps for next block */ - seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : savedOffset; - seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : savedOffset; - - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - - -static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); -} - -static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); -} - -static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); -} - -static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); -} - - -FORCE_INLINE_TEMPLATE -void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, - const U32 searchMethod, const U32 depth) -{ - seqStore_t* seqStorePtr = &(ctx->seqStore); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ctx->base; - const U32 dictLimit = ctx->dictLimit; - const U32 lowestIndex = ctx->lowLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictBase = ctx->dictBase; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const dictStart = dictBase + ctx->lowLimit; - - const U32 maxSearches = 1 << ctx->appliedParams.cParams.searchLog; - const U32 mls = ctx->appliedParams.cParams.searchLength; - - typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, - size_t* offsetPtr, - U32 maxNbAttempts, U32 matchLengthSearch); - searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; - - U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1]; - - /* init */ - ctx->nextToUpdate3 = ctx->nextToUpdate; - ip += (ip == prefixStart); - - /* Match Loop */ - while (ip < ilimit) { - size_t matchLength=0; - size_t offset=0; - const BYTE* start=ip+1; - U32 current = (U32)(ip-base); - - /* check repCode */ - { const U32 repIndex = (U32)(current+1 - offset_1); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - if (MEM_read32(ip+1) == MEM_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; - if (depth==0) goto _storeSequence; - } } - - /* first search (depth 0) */ - { size_t offsetFound = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; - } - - if (matchLength < 4) { - ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } - - /* let's try to find a better solution */ - if (depth>=1) - while (ip= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - int const gain2 = (int)(repLength * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); - if ((repLength >= 4) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } } - - /* search match, depth 1 */ - { size_t offset2=99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); - if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } } - - /* let's find an even better one */ - if ((depth==2) && (ip= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - int const gain2 = (int)(repLength * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); - if ((repLength >= 4) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } } - - /* search match, depth 2 */ - { size_t offset2=99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); - if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; - } } } - break; /* nothing found : store previous solution */ - } - - /* catch up */ - if (offset) { - U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); - const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; - const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; - while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ - offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); - } - - /* store sequence */ -_storeSequence: - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); - anchor = ip = start + matchLength; - } - - /* check immediate repcode */ - while (ip <= ilimit) { - const U32 repIndex = (U32)((ip-base) - offset_2); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } - break; - } } - - /* Save reps for next block */ - seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; - - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - - -void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); -} - -static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); -} - -static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); -} - -static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); -} - - -/* The optimal parser */ -#include "zstd_opt.h" - -static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ -#ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0); -#else - (void)ctx; (void)src; (void)srcSize; - return; -#endif -} - -static void ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ -#ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1); -#else - (void)ctx; (void)src; (void)srcSize; - return; -#endif -} - -static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ -#ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0); -#else - (void)ctx; (void)src; (void)srcSize; - return; -#endif -} - -static void ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) -{ -#ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1); -#else - (void)ctx; (void)src; (void)srcSize; - return; -#endif -} - - /* ZSTD_selectBlockCompressor() : * assumption : strat is a valid strategy */ typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize); diff --git a/lib/compress/zstd_compress.h b/lib/compress/zstd_compress.h new file mode 100644 index 00000000..a136a949 --- /dev/null +++ b/lib/compress/zstd_compress.h @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + + +#ifndef ZSTD_COMPRESS_H +#define ZSTD_COMPRESS_H + +/*-************************************* +* Dependencies +***************************************/ +#include "zstd_internal.h" +#include "zstdmt_compress.h" + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Constants +***************************************/ +static const U32 g_searchStrength = 8; +#define HASH_READ_SIZE 8 + + +/*-************************************* +* Context memory management +***************************************/ +typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; +typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; + +typedef struct ZSTD_prefixDict_s { + const void* dict; + size_t dictSize; + ZSTD_dictMode_e dictMode; +} ZSTD_prefixDict; + +struct ZSTD_CCtx_s { + const BYTE* nextSrc; /* next block here to continue on current prefix */ + const BYTE* base; /* All regular indexes relative to this position */ + const BYTE* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more data */ + U32 nextToUpdate; /* index from which to continue dictionary update */ + U32 nextToUpdate3; /* index from which to continue dictionary update */ + U32 hashLog3; /* dispatch table : larger == faster, more memory */ + U32 loadedDictEnd; /* index of end of dictionary */ + ZSTD_compressionStage_e stage; + U32 dictID; + ZSTD_CCtx_params requestedParams; + ZSTD_CCtx_params appliedParams; + void* workSpace; + size_t workSpaceSize; + size_t blockSize; + U64 pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ + U64 consumedSrcSize; + XXH64_state_t xxhState; + ZSTD_customMem customMem; + size_t staticSize; + + seqStore_t seqStore; /* sequences storage ptrs */ + optState_t optState; + U32* hashTable; + U32* hashTable3; + U32* chainTable; + ZSTD_entropyCTables_t* entropy; + + /* streaming */ + char* inBuff; + size_t inBuffSize; + size_t inToCompress; + size_t inBuffPos; + size_t inBuffTarget; + char* outBuff; + size_t outBuffSize; + size_t outBuffContentSize; + size_t outBuffFlushedSize; + ZSTD_cStreamStage streamStage; + U32 frameEnded; + + /* Dictionary */ + ZSTD_CDict* cdictLocal; + const ZSTD_CDict* cdict; + ZSTD_prefixDict prefixDict; /* single-usage dictionary */ + + /* Multi-threading */ + ZSTDMT_CCtx* mtctx; +}; + + +static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24 }; + +static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + +/*! ZSTD_storeSeq() : + Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. + `offsetCode` : distance to match, or 0 == repCode. + `matchCode` : matchLength - MINMATCH +*/ +MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode) +{ +#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6) + static const BYTE* g_start = NULL; + U32 const pos = (U32)((const BYTE*)literals - g_start); + if (g_start==NULL) g_start = (const BYTE*)literals; + if ((pos > 0) && (pos < 1000000000)) + DEBUGLOG(6, "Cpos %6u :%5u literals & match %3u bytes at distance %6u", + pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); +#endif + /* copy Literals */ + assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB); + ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); + seqStorePtr->lit += litLength; + + /* literal Length */ + if (litLength>0xFFFF) { + seqStorePtr->longLengthID = 1; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].litLength = (U16)litLength; + + /* match offset */ + seqStorePtr->sequences[0].offset = offsetCode + 1; + + /* match Length */ + if (matchCode>0xFFFF) { + seqStorePtr->longLengthID = 2; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].matchLength = (U16)matchCode; + + seqStorePtr->sequences++; +} + + +/*-************************************* +* Match length counter +***************************************/ +static unsigned ZSTD_NbCommonBytes (register size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) +{ + const BYTE* const pStart = pIn; + const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); + + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } + if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn> (32-h) ; } +MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ + +static const U32 prime4bytes = 2654435761U; +static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } + +static const U64 prime5bytes = 889523592379ULL; +static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } + +static const U64 prime6bytes = 227718039650203ULL; +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } + +static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; +static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } +static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } + +MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return ZSTD_hash4Ptr(p, hBits); + case 5: return ZSTD_hash5Ptr(p, hBits); + case 6: return ZSTD_hash6Ptr(p, hBits); + case 7: return ZSTD_hash7Ptr(p, hBits); + case 8: return ZSTD_hash8Ptr(p, hBits); + } +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_COMPRESS_H */ diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c new file mode 100644 index 00000000..437e9a2c --- /dev/null +++ b/lib/compress/zstd_double_fast.c @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#include "zstd_double_fast.h" + + +void ZSTD_fillDoubleHashTable(ZSTD_CCtx* cctx, const void* end, const U32 mls) +{ + U32* const hashLarge = cctx->hashTable; + U32 const hBitsL = cctx->appliedParams.cParams.hashLog; + U32* const hashSmall = cctx->chainTable; + U32 const hBitsS = cctx->appliedParams.cParams.chainLog; + const BYTE* const base = cctx->base; + const BYTE* ip = base + cctx->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const size_t fastHashFillStep = 3; + + while(ip <= iend) { + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base); + hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base); + ip += fastHashFillStep; + } +} + + +FORCE_INLINE_TEMPLATE +void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* const hashLong = cctx->hashTable; + const U32 hBitsL = cctx->appliedParams.cParams.hashLog; + U32* const hashSmall = cctx->chainTable; + const U32 hBitsS = cctx->appliedParams.cParams.chainLog; + seqStore_t* seqStorePtr = &(cctx->seqStore); + const BYTE* const base = cctx->base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = cctx->dictLimit; + const BYTE* const lowest = base + lowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; + U32 offsetSaved = 0; + + /* init */ + ip += (ip==lowest); + { U32 const maxRep = (U32)(ip-lowest); + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); + size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); + U32 const current = (U32)(ip-base); + U32 const matchIndexL = hashLong[h2]; + U32 const matchIndexS = hashSmall[h]; + const BYTE* matchLong = base + matchIndexL; + const BYTE* match = base + matchIndexS; + hashLong[h2] = hashSmall[h] = current; /* update hash tables */ + + assert(offset_1 <= current); /* supposed guaranteed by construction */ + if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { + /* favor repcode */ + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + U32 offset; + if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) { + mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; + offset = (U32)(ip-matchLong); + while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) { + size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndexL3 = hashLong[hl3]; + const BYTE* matchL3 = base + matchIndexL3; + hashLong[hl3] = current + 1; + if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) { + mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; + ip++; + offset = (U32)(ip-matchL3); + while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ + } else { + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip-match); + while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + } else { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); + + /* check immediate repcode */ + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* save reps for next block */ + seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; + seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + const U32 mls = ctx->appliedParams.cParams.searchLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return; + case 5 : + ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return; + case 6 : + ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return; + case 7 : + ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return; + } +} + + +static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* const hashLong = ctx->hashTable; + U32 const hBitsL = ctx->appliedParams.cParams.hashLog; + U32* const hashSmall = ctx->chainTable; + U32 const hBitsS = ctx->appliedParams.cParams.chainLog; + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = ctx->lowLimit; + const BYTE* const dictStart = dictBase + lowestIndex; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const lowPrefixPtr = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 matchIndex = hashSmall[hSmall]; + const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + + const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); + const U32 matchLongIndex = hashLong[hLong]; + const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base; + const BYTE* matchLong = matchLongBase + matchLongIndex; + + const U32 current = (U32)(ip-base); + const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ + const BYTE* repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* repMatch = repBase + repIndex; + size_t mLength; + hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ + + if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4; + ip++; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { + const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr; + U32 offset; + mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8; + offset = current - matchLongIndex; + while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) { + size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndex3 = hashLong[h3]; + const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base; + const BYTE* match3 = match3Base + matchIndex3; + U32 offset; + hashLong[h3] = current + 1; + if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { + const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr; + mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8; + ip++; + offset = current+1 - matchIndex3; + while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ + } else { + const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4; + offset = current - matchIndex; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } } + + /* found a match : store it */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; + hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2; + hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + U32 const mls = ctx->appliedParams.cParams.searchLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return; + case 5 : + ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return; + case 6 : + ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return; + case 7 : + ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return; + } +} diff --git a/lib/compress/zstd_double_fast.h b/lib/compress/zstd_double_fast.h new file mode 100644 index 00000000..64f8bfef --- /dev/null +++ b/lib/compress/zstd_double_fast.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#ifndef ZSTD_DOUBLE_FAST_H +#define ZSTD_DOUBLE_FAST_H + +#include "zstd_compress.h" + +#if defined (__cplusplus) +extern "C" { +#endif + +void ZSTD_fillDoubleHashTable(ZSTD_CCtx* cctx, const void* end, const U32 mls); +void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_DOUBLE_FAST_H */ diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c new file mode 100644 index 00000000..82ab15a4 --- /dev/null +++ b/lib/compress/zstd_fast.c @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#include "zstd_fast.h" + + +void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) +{ + U32* const hashTable = zc->hashTable; + U32 const hBits = zc->appliedParams.cParams.hashLog; + const BYTE* const base = zc->base; + const BYTE* ip = base + zc->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const size_t fastHashFillStep = 3; + + while(ip <= iend) { + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base); + ip += fastHashFillStep; + } +} + + +FORCE_INLINE_TEMPLATE +void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* const hashTable = cctx->hashTable; + U32 const hBits = cctx->appliedParams.cParams.hashLog; + seqStore_t* seqStorePtr = &(cctx->seqStore); + const BYTE* const base = cctx->base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = cctx->dictLimit; + const BYTE* const lowest = base + lowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; + U32 offsetSaved = 0; + + /* init */ + ip += (ip==lowest); + { U32 const maxRep = (U32)(ip-lowest); + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + size_t const h = ZSTD_hashPtr(ip, hBits, mls); + U32 const current = (U32)(ip-base); + U32 const matchIndex = hashTable[h]; + const BYTE* match = base + matchIndex; + hashTable[h] = current; /* update hash table */ + + if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + U32 offset; + if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip-match); + while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* save reps for next block */ + seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; + seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + const U32 mls = ctx->appliedParams.cParams.searchLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return; + case 5 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return; + case 6 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return; + case 7 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return; + } +} + + +static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* hashTable = ctx->hashTable; + const U32 hBits = ctx->appliedParams.cParams.hashLog; + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = ctx->lowLimit; + const BYTE* const dictStart = dictBase + lowestIndex; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const lowPrefixPtr = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t h = ZSTD_hashPtr(ip, hBits, mls); + const U32 matchIndex = hashTable[h]; + const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + const U32 current = (U32)(ip-base); + const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ + const BYTE* repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* repMatch = repBase + repIndex; + size_t mLength; + hashTable[h] = current; /* update hash table */ + + if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4; + ip++; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + if ( (matchIndex < lowestIndex) || + (MEM_read32(match) != MEM_read32(ip)) ) { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; + U32 offset; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset = current - matchIndex; + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } } + + /* found a match : store it */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; + hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + U32 const mls = ctx->appliedParams.cParams.searchLength; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return; + case 5 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return; + case 6 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return; + case 7 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return; + } +} diff --git a/lib/compress/zstd_fast.h b/lib/compress/zstd_fast.h new file mode 100644 index 00000000..f18b4617 --- /dev/null +++ b/lib/compress/zstd_fast.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#ifndef ZSTD_FAST_H +#define ZSTD_FAST_H + +#include "zstd_compress.h" + +#if defined (__cplusplus) +extern "C" { +#endif + +void ZSTD_fillHashTable(ZSTD_CCtx* zc, const void* end, const U32 mls); +void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, + const void* src, size_t srcSize); +void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, + const void* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FAST_H */ diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c new file mode 100644 index 00000000..00ec1e28 --- /dev/null +++ b/lib/compress/zstd_lazy.c @@ -0,0 +1,752 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#include "zstd_lazy.h" + + +/*-************************************* +* Binary Tree search +***************************************/ +/** ZSTD_insertBt1() : add one or multiple positions to tree. +* ip : assumed <= iend-8 . +* @return : nb of positions added */ +static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares, + U32 extDict) +{ + U32* const hashTable = zc->hashTable; + U32 const hashLog = zc->appliedParams.cParams.hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = zc->chainTable; + U32 const btLog = zc->appliedParams.cParams.chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + const U32 current = (U32)(ip-base); + const U32 btLow = btMask >= current ? 0 : current - btMask; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = smallerPtr + 1; + U32 dummy32; /* to be nullified at the end */ + U32 const windowLow = zc->lowLimit; + U32 matchEndIdx = current+8; + size_t bestLength = 8; +#ifdef ZSTD_C_PREDICT + U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); + U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); + predictedSmall += (predictedSmall>0); + predictedLarge += (predictedLarge>0); +#endif /* ZSTD_C_PREDICT */ + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + +#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ + const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ + if (matchIndex == predictedSmall) { + /* no need to check length, result known */ + *smallerPtr = matchIndex; + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + predictedSmall = predictPtr[1] + (predictPtr[1]>0); + continue; + } + if (matchIndex == predictedLarge) { + *largerPtr = matchIndex; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + predictedLarge = predictPtr[0] + (predictPtr[0]>0); + continue; + } +#endif + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + if (match[matchLength] == ip[matchLength]) + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + bestLength = matchLength; + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + } + + if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */ + + if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ + if (matchEndIdx > current + 8) return matchEndIdx - current - 8; + return 1; +} + + +static size_t ZSTD_insertBtAndFindBestMatch ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + U32 nbCompares, const U32 mls, + U32 extDict) +{ + U32* const hashTable = zc->hashTable; + U32 const hashLog = zc->appliedParams.cParams.hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = zc->chainTable; + U32 const btLog = zc->appliedParams.cParams.chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const U32 current = (U32)(ip-base); + const U32 btLow = btMask >= current ? 0 : current - btMask; + const U32 windowLow = zc->lowLimit; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current+8; + U32 dummy32; /* to be nullified at the end */ + size_t bestLength = 0; + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + if (match[matchLength] == ip[matchLength]) + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; + if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; + return bestLength; +} + + +void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) +{ + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + while(idx < target) + idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0); +} + +/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ +static size_t ZSTD_BtFindBestMatch ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0); +} + + +static size_t ZSTD_BtFindBestMatch_selectMLS ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + } +} + + +void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) +{ + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1); +} + + +/** Tree updater, providing best match */ +static size_t ZSTD_BtFindBestMatch_extDict ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1); +} + + +static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 7 : + case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + } +} + + + +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] + +/* Update chains up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) +{ + U32* const hashTable = zc->hashTable; + const U32 hashLog = zc->appliedParams.cParams.hashLog; + U32* const chainTable = zc->chainTable; + const U32 chainMask = (1 << zc->appliedParams.cParams.chainLog) - 1; + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + zc->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + + +/* inlining is important to hardwire a hot branch (template emulation) */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_HcFindBestMatch_generic ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls, const U32 extDict) +{ + U32* const chainTable = zc->chainTable; + const U32 chainSize = (1 << zc->appliedParams.cParams.chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 lowLimit = zc->lowLimit; + const U32 current = (U32)(ip-base); + const U32 minChain = current > chainSize ? current - chainSize : 0; + int nbAttempts=maxNbAttempts; + size_t ml=4-1; + + /* HC4 match finder */ + U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); + + for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { + const BYTE* match; + size_t currentMl=0; + if ((!extDict) || matchIndex >= dictLimit) { + match = base + matchIndex; + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + match = dictBase + matchIndex; + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } + + return ml; +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_CCtx* zc, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); + case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); + } +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( + ZSTD_CCtx* zc, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); + case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); + } +} + + +/* ******************************* +* Common parser - lazy strategy +*********************************/ +FORCE_INLINE_TEMPLATE +void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base + ctx->dictLimit; + + U32 const maxSearches = 1 << ctx->appliedParams.cParams.searchLog; + U32 const mls = ctx->appliedParams.cParams.searchLength; + + typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, + size_t* offsetPtr, + U32 maxNbAttempts, U32 matchLengthSearch); + searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; + U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1], savedOffset=0; + + /* init */ + ip += (ip==base); + ctx->nextToUpdate3 = ctx->nextToUpdate; + { U32 const maxRep = (U32)(ip-base); + if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; + if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; + } + + /* Match Loop */ + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + + /* check repCode */ + if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) { + /* repcode : we take it */ + matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + if (depth==0) goto _storeSequence; + } + + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(ml2 * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((ml2 >= 4) && (gain2 > gain1)) + matchLength = ml2, offset = 0, start = ip; + } + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* NOTE: + * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. + * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which + * overflows the pointer, which is undefined behavior. + */ + /* catch up */ + if (offset) { + while ( (start > anchor) + && (start > base+offset-ZSTD_REP_MOVE) + && (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1]) ) /* only search for offset within prefix */ + { start--; matchLength++; } + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while ( (ip <= ilimit) + && ((offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } + + /* Save reps for next block */ + seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : savedOffset; + seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : savedOffset; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); +} + +void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); +} + +void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); +} + +void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); +} + + +FORCE_INLINE_TEMPLATE +void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base; + const U32 dictLimit = ctx->dictLimit; + const U32 lowestIndex = ctx->lowLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const dictStart = dictBase + ctx->lowLimit; + + const U32 maxSearches = 1 << ctx->appliedParams.cParams.searchLog; + const U32 mls = ctx->appliedParams.cParams.searchLength; + + typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, + size_t* offsetPtr, + U32 maxNbAttempts, U32 matchLengthSearch); + searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; + + U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1]; + + /* init */ + ctx->nextToUpdate3 = ctx->nextToUpdate; + ip += (ip == prefixStart); + + /* Match Loop */ + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + U32 current = (U32)(ip-base); + + /* check repCode */ + { const U32 repIndex = (U32)(current+1 - offset_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; + if (depth==0) goto _storeSequence; + } } + + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 1 */ + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 2 */ + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; + const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while (ip <= ilimit) { + const U32 repIndex = (U32)((ip-base) - offset_2); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; + } } + + /* Save reps for next block */ + seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); +} + +void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); +} + +void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); +} + +void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); +} diff --git a/lib/compress/zstd_lazy.h b/lib/compress/zstd_lazy.h new file mode 100644 index 00000000..451cddf9 --- /dev/null +++ b/lib/compress/zstd_lazy.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#ifndef ZSTD_LAZY_H +#define ZSTD_LAZY_H + +#include "zstd_compress.h" + +#if defined (__cplusplus) +extern "C" { +#endif + +U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls); +void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls); +void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls); + +void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize); + +void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_LAZY_H */ diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c new file mode 100644 index 00000000..03e94551 --- /dev/null +++ b/lib/compress/zstd_opt.c @@ -0,0 +1,954 @@ +/* + * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#include "zstd_opt.h" +#include "zstd_lazy.h" + + +#define ZSTD_LITFREQ_ADD 2 +#define ZSTD_FREQ_DIV 4 +#define ZSTD_MAX_PRICE (1<<30) + +/*-************************************* +* Price functions for optimal parser +***************************************/ +static void ZSTD_setLog2Prices(optState_t* optPtr) +{ + optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1); + optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1); + optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1); + optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1); + optPtr->factor = 1 + ((optPtr->litSum>>5) / optPtr->litLengthSum) + ((optPtr->litSum<<1) / (optPtr->litSum + optPtr->matchSum)); +} + + +static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSize) +{ + unsigned u; + + optPtr->cachedLiterals = NULL; + optPtr->cachedPrice = optPtr->cachedLitLength = 0; + optPtr->staticPrices = 0; + + if (optPtr->litLengthSum == 0) { + if (srcSize <= 1024) optPtr->staticPrices = 1; + + assert(optPtr->litFreq!=NULL); + for (u=0; u<=MaxLit; u++) + optPtr->litFreq[u] = 0; + for (u=0; ulitFreq[src[u]]++; + + optPtr->litSum = 0; + optPtr->litLengthSum = MaxLL+1; + optPtr->matchLengthSum = MaxML+1; + optPtr->offCodeSum = (MaxOff+1); + optPtr->matchSum = (ZSTD_LITFREQ_ADD<litFreq[u] = 1 + (optPtr->litFreq[u]>>ZSTD_FREQ_DIV); + optPtr->litSum += optPtr->litFreq[u]; + } + for (u=0; u<=MaxLL; u++) + optPtr->litLengthFreq[u] = 1; + for (u=0; u<=MaxML; u++) + optPtr->matchLengthFreq[u] = 1; + for (u=0; u<=MaxOff; u++) + optPtr->offCodeFreq[u] = 1; + } else { + optPtr->matchLengthSum = 0; + optPtr->litLengthSum = 0; + optPtr->offCodeSum = 0; + optPtr->matchSum = 0; + optPtr->litSum = 0; + + for (u=0; u<=MaxLit; u++) { + optPtr->litFreq[u] = 1 + (optPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1)); + optPtr->litSum += optPtr->litFreq[u]; + } + for (u=0; u<=MaxLL; u++) { + optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1)); + optPtr->litLengthSum += optPtr->litLengthFreq[u]; + } + for (u=0; u<=MaxML; u++) { + optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); + optPtr->matchLengthSum += optPtr->matchLengthFreq[u]; + optPtr->matchSum += optPtr->matchLengthFreq[u] * (u + 3); + } + optPtr->matchSum *= ZSTD_LITFREQ_ADD; + for (u=0; u<=MaxOff; u++) { + optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); + optPtr->offCodeSum += optPtr->offCodeFreq[u]; + } + } + + ZSTD_setLog2Prices(optPtr); +} + + +static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 litLength, const BYTE* literals) +{ + U32 price, u; + + if (optPtr->staticPrices) + return ZSTD_highbit32((U32)litLength+1) + (litLength*6); + + if (litLength == 0) + return optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[0]+1); + + /* literals */ + if (optPtr->cachedLiterals == literals) { + U32 const additional = litLength - optPtr->cachedLitLength; + const BYTE* literals2 = optPtr->cachedLiterals + optPtr->cachedLitLength; + price = optPtr->cachedPrice + additional * optPtr->log2litSum; + for (u=0; u < additional; u++) + price -= ZSTD_highbit32(optPtr->litFreq[literals2[u]]+1); + optPtr->cachedPrice = price; + optPtr->cachedLitLength = litLength; + } else { + price = litLength * optPtr->log2litSum; + for (u=0; u < litLength; u++) + price -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1); + + if (litLength >= 12) { + optPtr->cachedLiterals = literals; + optPtr->cachedPrice = price; + optPtr->cachedLitLength = litLength; + } + } + + /* literal Length */ + { const BYTE LL_deltaCode = 19; + const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; + price += LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1); + } + + return price; +} + + +FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra) +{ + /* offset */ + U32 price; + BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); + + if (optPtr->staticPrices) + return ZSTD_getLiteralPrice(optPtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode; + + price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1); + if (!ultra && offCode >= 20) price += (offCode-19)*2; + + /* match Length */ + { const BYTE ML_deltaCode = 36; + const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; + price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1); + } + + return price + ZSTD_getLiteralPrice(optPtr, litLength, literals) + optPtr->factor; +} + + +static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +{ + U32 u; + + /* literals */ + optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; + for (u=0; u < litLength; u++) + optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + + /* literal Length */ + { const BYTE LL_deltaCode = 19; + const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; + optPtr->litLengthFreq[llCode]++; + optPtr->litLengthSum++; + } + + /* match offset */ + { BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); + optPtr->offCodeSum++; + optPtr->offCodeFreq[offCode]++; + } + + /* match Length */ + { const BYTE ML_deltaCode = 36; + const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; + optPtr->matchLengthFreq[mlCode]++; + optPtr->matchLengthSum++; + } + + ZSTD_setLog2Prices(optPtr); +} + + +#define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \ + { \ + while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \ + opt[pos].mlen = mlen_; \ + opt[pos].off = offset_; \ + opt[pos].litlen = litlen_; \ + opt[pos].price = price_; \ + } + + +/* function safe only for comparisons */ +static U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) +{ + switch (length) + { + default : + case 4 : return MEM_read32(memPtr); + case 3 : if (MEM_isLittleEndian()) + return MEM_read32(memPtr)<<8; + else + return MEM_read32(memPtr)>>8; + } +} + + +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +static +U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) +{ + U32* const hashTable3 = zc->hashTable3; + U32 const hashLog3 = zc->hashLog3; + const BYTE* const base = zc->base; + U32 idx = zc->nextToUpdate3; + const U32 target = zc->nextToUpdate3 = (U32)(ip - base); + const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3); + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + return hashTable3[hash3]; +} + + +/*-************************************* +* Binary Tree search +***************************************/ +static U32 ZSTD_insertBtAndGetAllMatches ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + U32 nbCompares, const U32 mls, + U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen) +{ + const BYTE* const base = zc->base; + const U32 current = (U32)(ip-base); + const U32 hashLog = zc->appliedParams.cParams.hashLog; + const size_t h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const hashTable = zc->hashTable; + U32 matchIndex = hashTable[h]; + U32* const bt = zc->chainTable; + const U32 btLog = zc->appliedParams.cParams.chainLog - 1; + const U32 btMask= (1U << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const U32 btLow = btMask >= current ? 0 : current - btMask; + const U32 windowLow = zc->lowLimit; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current+8; + U32 dummy32; /* to be nullified at the end */ + U32 mnum = 0; + + const U32 minMatch = (mls == 3) ? 3 : 4; + size_t bestLength = minMatchLen-1; + + if (minMatch == 3) { /* HC3 match finder */ + U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); + if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) { + const BYTE* match; + size_t currentMl=0; + if ((!extDict) || matchIndex3 >= dictLimit) { + match = base + matchIndex3; + if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit); + } else { + match = dictBase + matchIndex3; + if (ZSTD_readMINMATCH(match, MINMATCH) == ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH; + } + + /* save best solution */ + if (currentMl > bestLength) { + bestLength = currentMl; + matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3; + matches[mnum].len = (U32)currentMl; + mnum++; + if (currentMl > ZSTD_OPT_NUM) goto update; + if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/ + } + } + } + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + if (match[matchLength] == ip[matchLength]) { + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1; + } + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex; + matches[mnum].len = (U32)matchLength; + mnum++; + if (matchLength > ZSTD_OPT_NUM) break; + if (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + +update: + zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; + return mnum; +} + + +/** Tree updater, providing best match */ +static U32 ZSTD_BtGetAllMatches ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen); +} + + +static U32 ZSTD_BtGetAllMatches_selectMLS ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iHighLimit, + const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) +{ + switch(matchLengthSearch) + { + case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); + default : + case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); + case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); + case 7 : + case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); + } +} + +/** Tree updater, providing best match */ +static U32 ZSTD_BtGetAllMatches_extDict ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen); +} + + +static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iHighLimit, + const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) +{ + switch(matchLengthSearch) + { + case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); + default : + case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); + case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); + case 7 : + case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); + } +} + + +/*-******************************* +* Optimal parser +*********************************/ +FORCE_INLINE_TEMPLATE +void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, const int ultra) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + optState_t* optStatePtr = &(ctx->optState); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base; + const BYTE* const prefixStart = base + ctx->dictLimit; + + const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog; + const U32 sufficient_len = ctx->appliedParams.cParams.targetLength; + const U32 mls = ctx->appliedParams.cParams.searchLength; + const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4; + + ZSTD_optimal_t* opt = optStatePtr->priceTable; + ZSTD_match_t* matches = optStatePtr->matchTable; + const BYTE* inr; + U32 offset, rep[ZSTD_REP_NUM]; + + /* init */ + ctx->nextToUpdate3 = ctx->nextToUpdate; + ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize); + ip += (ip==prefixStart); + { U32 i; for (i=0; irep[i]; } + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, match_num, last_pos, litlen, price; + U32 u, mlen, best_mlen, best_off, litLength; + memset(opt, 0, sizeof(ZSTD_optimal_t)); + last_pos = 0; + litlen = (U32)(ip - anchor); + + /* check repCode */ + { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); + for (i=(ip == anchor); i 0) && (repCur < (S32)(ip-prefixStart)) + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) { + mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch; + if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; + goto _storeSequence; + } + best_off = i - (ip == anchor); + do { + price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ + mlen--; + } while (mlen >= minMatch); + } } } + + match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch); + + if (!last_pos && !match_num) { ip++; continue; } + + if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + cur = 0; + last_pos = 1; + goto _storeSequence; + } + + /* set prices using matches at position = 0 */ + best_mlen = (last_pos) ? last_pos : minMatch; + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = matches[u].len; + while (mlen <= best_mlen) { + price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */ + mlen++; + } } + + if (last_pos < minMatch) { ip++; continue; } + + /* initialize opt[0] */ + { U32 i ; for (i=0; i litlen) { + price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen); + } else + price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor); + } else { + litlen = 1; + price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1); + } + + if (cur > last_pos || price <= opt[cur].price) + SET_PRICE(cur, 1, 0, litlen, price); + + if (cur == last_pos) break; + + if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ + continue; + + mlen = opt[cur].mlen; + if (opt[cur].off > ZSTD_REP_MOVE_OPT) { + opt[cur].rep[2] = opt[cur-mlen].rep[1]; + opt[cur].rep[1] = opt[cur-mlen].rep[0]; + opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; + } else { + opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; + opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); + } + + best_mlen = minMatch; + { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); + for (i=(opt[cur].mlen != 1); i 0) && (repCur < (S32)(inr-prefixStart)) + && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) { + mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch; + + if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; best_off = i; last_pos = cur + 1; + goto _storeSequence; + } + + best_off = i - (opt[cur].mlen != 1); + if (mlen > best_mlen) best_mlen = mlen; + + do { + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra); + } else + price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); + } + + if (cur + mlen > last_pos || price <= opt[cur + mlen].price) + SET_PRICE(cur + mlen, mlen, i, litlen, price); + mlen--; + } while (mlen >= minMatch); + } } } + + match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen); + + if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + last_pos = cur + 1; + goto _storeSequence; + } + + /* set prices using matches at position = cur */ + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = matches[u].len; + + while (mlen <= best_mlen) { + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) + price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra); + else + price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra); + } + + if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) + SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); + + mlen++; + } } } + + best_mlen = opt[last_pos].mlen; + best_off = opt[last_pos].off; + cur = last_pos - best_mlen; + + /* store sequence */ +_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ + opt[0].mlen = 1; + + while (1) { + mlen = opt[cur].mlen; + offset = opt[cur].off; + opt[cur].mlen = best_mlen; + opt[cur].off = best_off; + best_mlen = mlen; + best_off = offset; + if (mlen > cur) break; + cur -= mlen; + } + + for (u = 0; u <= last_pos;) { + u += opt[u].mlen; + } + + for (cur=0; cur < last_pos; ) { + mlen = opt[cur].mlen; + if (mlen == 1) { ip++; cur++; continue; } + offset = opt[cur].off; + cur += mlen; + litLength = (U32)(ip - anchor); + + if (offset > ZSTD_REP_MOVE_OPT) { + rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = offset - ZSTD_REP_MOVE_OPT; + offset--; + } else { + if (offset != 0) { + best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); + if (offset != 1) rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = best_off; + } + if (litLength==0) offset--; + } + + ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + anchor = ip = ip + mlen; + } } /* for (cur=0; cur < last_pos; ) */ + + /* Save reps for next block */ + { int i; for (i=0; irepToConfirm[i] = rep[i]; } + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0); +} + +void ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1); +} + + +FORCE_INLINE_TEMPLATE +void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, const int ultra) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + optState_t* optStatePtr = &(ctx->optState); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base; + const U32 lowestIndex = ctx->lowLimit; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + + const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog; + const U32 sufficient_len = ctx->appliedParams.cParams.targetLength; + const U32 mls = ctx->appliedParams.cParams.searchLength; + const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4; + + ZSTD_optimal_t* opt = optStatePtr->priceTable; + ZSTD_match_t* matches = optStatePtr->matchTable; + const BYTE* inr; + + /* init */ + U32 offset, rep[ZSTD_REP_NUM]; + { U32 i; for (i=0; irep[i]; } + + ctx->nextToUpdate3 = ctx->nextToUpdate; + ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize); + ip += (ip==prefixStart); + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, match_num, last_pos, litlen, price; + U32 u, mlen, best_mlen, best_off, litLength; + U32 current = (U32)(ip-base); + memset(opt, 0, sizeof(ZSTD_optimal_t)); + last_pos = 0; + opt[0].litlen = (U32)(ip - anchor); + + /* check repCode */ + { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); + for (i = (ip==anchor); i 0 && repCur <= (S32)current) + && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; + + if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; + goto _storeSequence; + } + + best_off = i - (ip==anchor); + litlen = opt[0].litlen; + do { + price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ + mlen--; + } while (mlen >= minMatch); + } } } + + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */ + + if (!last_pos && !match_num) { ip++; continue; } + + { U32 i; for (i=0; i sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + cur = 0; + last_pos = 1; + goto _storeSequence; + } + + best_mlen = (last_pos) ? last_pos : minMatch; + + /* set prices using matches at position = 0 */ + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = matches[u].len; + litlen = opt[0].litlen; + while (mlen <= best_mlen) { + price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, matches[u].off, litlen, price); + mlen++; + } } + + if (last_pos < minMatch) { + ip++; continue; + } + + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + inr = ip + cur; + + if (opt[cur-1].mlen == 1) { + litlen = opt[cur-1].litlen + 1; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen); + } else + price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor); + } else { + litlen = 1; + price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1); + } + + if (cur > last_pos || price <= opt[cur].price) + SET_PRICE(cur, 1, 0, litlen, price); + + if (cur == last_pos) break; + + if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ + continue; + + mlen = opt[cur].mlen; + if (opt[cur].off > ZSTD_REP_MOVE_OPT) { + opt[cur].rep[2] = opt[cur-mlen].rep[1]; + opt[cur].rep[1] = opt[cur-mlen].rep[0]; + opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; + } else { + opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; + opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); + } + + best_mlen = minMatch; + { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); + for (i = (mlen != 1); i 0 && repCur <= (S32)(current+cur)) + && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ + && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; + + if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; best_off = i; last_pos = cur + 1; + goto _storeSequence; + } + + best_off = i - (opt[cur].mlen != 1); + if (mlen > best_mlen) best_mlen = mlen; + + do { + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra); + } else + price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); + } + + if (cur + mlen > last_pos || price <= opt[cur + mlen].price) + SET_PRICE(cur + mlen, mlen, i, litlen, price); + mlen--; + } while (mlen >= minMatch); + } } } + + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch); + + if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + last_pos = cur + 1; + goto _storeSequence; + } + + /* set prices using matches at position = cur */ + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = matches[u].len; + + while (mlen <= best_mlen) { + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) + price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra); + else + price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra); + } + + if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) + SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); + + mlen++; + } } } /* for (cur = 1; cur <= last_pos; cur++) */ + + best_mlen = opt[last_pos].mlen; + best_off = opt[last_pos].off; + cur = last_pos - best_mlen; + + /* store sequence */ +_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ + opt[0].mlen = 1; + + while (1) { + mlen = opt[cur].mlen; + offset = opt[cur].off; + opt[cur].mlen = best_mlen; + opt[cur].off = best_off; + best_mlen = mlen; + best_off = offset; + if (mlen > cur) break; + cur -= mlen; + } + + for (u = 0; u <= last_pos; ) { + u += opt[u].mlen; + } + + for (cur=0; cur < last_pos; ) { + mlen = opt[cur].mlen; + if (mlen == 1) { ip++; cur++; continue; } + offset = opt[cur].off; + cur += mlen; + litLength = (U32)(ip - anchor); + + if (offset > ZSTD_REP_MOVE_OPT) { + rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = offset - ZSTD_REP_MOVE_OPT; + offset--; + } else { + if (offset != 0) { + best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); + if (offset != 1) rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = best_off; + } + + if (litLength==0) offset--; + } + + ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + anchor = ip = ip + mlen; + } } /* for (cur=0; cur < last_pos; ) */ + + /* Save reps for next block */ + { int i; for (i=0; irepToConfirm[i] = rep[i]; } + + /* Last Literals */ + { size_t lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0); +} + +void ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1); +} diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index 4d938c80..0991a1f1 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -7,932 +7,23 @@ * in the COPYING file in the root directory of this source tree). */ +#ifndef ZSTD_OPT_H +#define ZSTD_OPT_H -/* Note : this file is intended to be included within zstd_compress.c */ +#include "zstd_compress.h" +#if defined (__cplusplus) +extern "C" { +#endif -#ifndef ZSTD_OPT_H_91842398743 -#define ZSTD_OPT_H_91842398743 +void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +void ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); +void ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize); -#define ZSTD_LITFREQ_ADD 2 -#define ZSTD_FREQ_DIV 4 -#define ZSTD_MAX_PRICE (1<<30) - -/*-************************************* -* Price functions for optimal parser -***************************************/ -static void ZSTD_setLog2Prices(optState_t* optPtr) -{ - optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1); - optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1); - optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1); - optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1); - optPtr->factor = 1 + ((optPtr->litSum>>5) / optPtr->litLengthSum) + ((optPtr->litSum<<1) / (optPtr->litSum + optPtr->matchSum)); +#if defined (__cplusplus) } +#endif - -static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSize) -{ - unsigned u; - - optPtr->cachedLiterals = NULL; - optPtr->cachedPrice = optPtr->cachedLitLength = 0; - optPtr->staticPrices = 0; - - if (optPtr->litLengthSum == 0) { - if (srcSize <= 1024) optPtr->staticPrices = 1; - - assert(optPtr->litFreq!=NULL); - for (u=0; u<=MaxLit; u++) - optPtr->litFreq[u] = 0; - for (u=0; ulitFreq[src[u]]++; - - optPtr->litSum = 0; - optPtr->litLengthSum = MaxLL+1; - optPtr->matchLengthSum = MaxML+1; - optPtr->offCodeSum = (MaxOff+1); - optPtr->matchSum = (ZSTD_LITFREQ_ADD<litFreq[u] = 1 + (optPtr->litFreq[u]>>ZSTD_FREQ_DIV); - optPtr->litSum += optPtr->litFreq[u]; - } - for (u=0; u<=MaxLL; u++) - optPtr->litLengthFreq[u] = 1; - for (u=0; u<=MaxML; u++) - optPtr->matchLengthFreq[u] = 1; - for (u=0; u<=MaxOff; u++) - optPtr->offCodeFreq[u] = 1; - } else { - optPtr->matchLengthSum = 0; - optPtr->litLengthSum = 0; - optPtr->offCodeSum = 0; - optPtr->matchSum = 0; - optPtr->litSum = 0; - - for (u=0; u<=MaxLit; u++) { - optPtr->litFreq[u] = 1 + (optPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1)); - optPtr->litSum += optPtr->litFreq[u]; - } - for (u=0; u<=MaxLL; u++) { - optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1)); - optPtr->litLengthSum += optPtr->litLengthFreq[u]; - } - for (u=0; u<=MaxML; u++) { - optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); - optPtr->matchLengthSum += optPtr->matchLengthFreq[u]; - optPtr->matchSum += optPtr->matchLengthFreq[u] * (u + 3); - } - optPtr->matchSum *= ZSTD_LITFREQ_ADD; - for (u=0; u<=MaxOff; u++) { - optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); - optPtr->offCodeSum += optPtr->offCodeFreq[u]; - } - } - - ZSTD_setLog2Prices(optPtr); -} - - -static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 litLength, const BYTE* literals) -{ - U32 price, u; - - if (optPtr->staticPrices) - return ZSTD_highbit32((U32)litLength+1) + (litLength*6); - - if (litLength == 0) - return optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[0]+1); - - /* literals */ - if (optPtr->cachedLiterals == literals) { - U32 const additional = litLength - optPtr->cachedLitLength; - const BYTE* literals2 = optPtr->cachedLiterals + optPtr->cachedLitLength; - price = optPtr->cachedPrice + additional * optPtr->log2litSum; - for (u=0; u < additional; u++) - price -= ZSTD_highbit32(optPtr->litFreq[literals2[u]]+1); - optPtr->cachedPrice = price; - optPtr->cachedLitLength = litLength; - } else { - price = litLength * optPtr->log2litSum; - for (u=0; u < litLength; u++) - price -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1); - - if (litLength >= 12) { - optPtr->cachedLiterals = literals; - optPtr->cachedPrice = price; - optPtr->cachedLitLength = litLength; - } - } - - /* literal Length */ - { const BYTE LL_deltaCode = 19; - const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; - price += LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1); - } - - return price; -} - - -FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra) -{ - /* offset */ - U32 price; - BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); - - if (optPtr->staticPrices) - return ZSTD_getLiteralPrice(optPtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode; - - price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1); - if (!ultra && offCode >= 20) price += (offCode-19)*2; - - /* match Length */ - { const BYTE ML_deltaCode = 36; - const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; - price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1); - } - - return price + ZSTD_getLiteralPrice(optPtr, litLength, literals) + optPtr->factor; -} - - -static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) -{ - U32 u; - - /* literals */ - optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; - for (u=0; u < litLength; u++) - optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; - - /* literal Length */ - { const BYTE LL_deltaCode = 19; - const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; - optPtr->litLengthFreq[llCode]++; - optPtr->litLengthSum++; - } - - /* match offset */ - { BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); - optPtr->offCodeSum++; - optPtr->offCodeFreq[offCode]++; - } - - /* match Length */ - { const BYTE ML_deltaCode = 36; - const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; - optPtr->matchLengthFreq[mlCode]++; - optPtr->matchLengthSum++; - } - - ZSTD_setLog2Prices(optPtr); -} - - -#define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \ - { \ - while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \ - opt[pos].mlen = mlen_; \ - opt[pos].off = offset_; \ - opt[pos].litlen = litlen_; \ - opt[pos].price = price_; \ - } - - -/* function safe only for comparisons */ -static U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) -{ - switch (length) - { - default : - case 4 : return MEM_read32(memPtr); - case 3 : if (MEM_isLittleEndian()) - return MEM_read32(memPtr)<<8; - else - return MEM_read32(memPtr)>>8; - } -} - - -/* Update hashTable3 up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ -static -U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) -{ - U32* const hashTable3 = zc->hashTable3; - U32 const hashLog3 = zc->hashLog3; - const BYTE* const base = zc->base; - U32 idx = zc->nextToUpdate3; - const U32 target = zc->nextToUpdate3 = (U32)(ip - base); - const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3); - - while(idx < target) { - hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; - idx++; - } - - return hashTable3[hash3]; -} - - -/*-************************************* -* Binary Tree search -***************************************/ -static U32 ZSTD_insertBtAndGetAllMatches ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - U32 nbCompares, const U32 mls, - U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen) -{ - const BYTE* const base = zc->base; - const U32 current = (U32)(ip-base); - const U32 hashLog = zc->appliedParams.cParams.hashLog; - const size_t h = ZSTD_hashPtr(ip, hashLog, mls); - U32* const hashTable = zc->hashTable; - U32 matchIndex = hashTable[h]; - U32* const bt = zc->chainTable; - const U32 btLog = zc->appliedParams.cParams.chainLog - 1; - const U32 btMask= (1U << btLog) - 1; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const U32 btLow = btMask >= current ? 0 : current - btMask; - const U32 windowLow = zc->lowLimit; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current+8; - U32 dummy32; /* to be nullified at the end */ - U32 mnum = 0; - - const U32 minMatch = (mls == 3) ? 3 : 4; - size_t bestLength = minMatchLen-1; - - if (minMatch == 3) { /* HC3 match finder */ - U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); - if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) { - const BYTE* match; - size_t currentMl=0; - if ((!extDict) || matchIndex3 >= dictLimit) { - match = base + matchIndex3; - if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit); - } else { - match = dictBase + matchIndex3; - if (ZSTD_readMINMATCH(match, MINMATCH) == ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH; - } - - /* save best solution */ - if (currentMl > bestLength) { - bestLength = currentMl; - matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3; - matches[mnum].len = (U32)currentMl; - mnum++; - if (currentMl > ZSTD_OPT_NUM) goto update; - if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/ - } - } - } - - hashTable[h] = current; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32* nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE* match; - - if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { - match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) { - matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1; - } - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > bestLength) { - if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; - bestLength = matchLength; - matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex; - matches[mnum].len = (U32)matchLength; - mnum++; - if (matchLength > ZSTD_OPT_NUM) break; - if (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */ - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - - if (match[matchLength] < ip[matchLength]) { - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } - - *smallerPtr = *largerPtr = 0; - -update: - zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; - return mnum; -} - - -/** Tree updater, providing best match */ -static U32 ZSTD_BtGetAllMatches ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) -{ - if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen); -} - - -static U32 ZSTD_BtGetAllMatches_selectMLS ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) -{ - switch(matchLengthSearch) - { - case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); - default : - case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); - case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); - case 7 : - case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); - } -} - -/** Tree updater, providing best match */ -static U32 ZSTD_BtGetAllMatches_extDict ( - ZSTD_CCtx* zc, - const BYTE* const ip, const BYTE* const iLimit, - const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) -{ - if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen); -} - - -static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( - ZSTD_CCtx* zc, /* Index table will be updated */ - const BYTE* ip, const BYTE* const iHighLimit, - const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) -{ - switch(matchLengthSearch) - { - case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); - default : - case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); - case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); - case 7 : - case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); - } -} - - -/*-******************************* -* Optimal parser -*********************************/ -FORCE_INLINE_TEMPLATE -void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, const int ultra) -{ - seqStore_t* seqStorePtr = &(ctx->seqStore); - optState_t* optStatePtr = &(ctx->optState); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ctx->base; - const BYTE* const prefixStart = base + ctx->dictLimit; - - const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog; - const U32 sufficient_len = ctx->appliedParams.cParams.targetLength; - const U32 mls = ctx->appliedParams.cParams.searchLength; - const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4; - - ZSTD_optimal_t* opt = optStatePtr->priceTable; - ZSTD_match_t* matches = optStatePtr->matchTable; - const BYTE* inr; - U32 offset, rep[ZSTD_REP_NUM]; - - /* init */ - ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize); - ip += (ip==prefixStart); - { U32 i; for (i=0; irep[i]; } - - /* Match Loop */ - while (ip < ilimit) { - U32 cur, match_num, last_pos, litlen, price; - U32 u, mlen, best_mlen, best_off, litLength; - memset(opt, 0, sizeof(ZSTD_optimal_t)); - last_pos = 0; - litlen = (U32)(ip - anchor); - - /* check repCode */ - { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); - for (i=(ip == anchor); i 0) && (repCur < (S32)(ip-prefixStart)) - && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) { - mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch; - if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; - goto _storeSequence; - } - best_off = i - (ip == anchor); - do { - price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ - mlen--; - } while (mlen >= minMatch); - } } } - - match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch); - - if (!last_pos && !match_num) { ip++; continue; } - - if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - cur = 0; - last_pos = 1; - goto _storeSequence; - } - - /* set prices using matches at position = 0 */ - best_mlen = (last_pos) ? last_pos : minMatch; - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = matches[u].len; - while (mlen <= best_mlen) { - price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */ - mlen++; - } } - - if (last_pos < minMatch) { ip++; continue; } - - /* initialize opt[0] */ - { U32 i ; for (i=0; i litlen) { - price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen); - } else - price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor); - } else { - litlen = 1; - price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1); - } - - if (cur > last_pos || price <= opt[cur].price) - SET_PRICE(cur, 1, 0, litlen, price); - - if (cur == last_pos) break; - - if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ - continue; - - mlen = opt[cur].mlen; - if (opt[cur].off > ZSTD_REP_MOVE_OPT) { - opt[cur].rep[2] = opt[cur-mlen].rep[1]; - opt[cur].rep[1] = opt[cur-mlen].rep[0]; - opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; - } else { - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; - opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; - opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); - } - - best_mlen = minMatch; - { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); - for (i=(opt[cur].mlen != 1); i 0) && (repCur < (S32)(inr-prefixStart)) - && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) { - mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch; - - if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; last_pos = cur + 1; - goto _storeSequence; - } - - best_off = i - (opt[cur].mlen != 1); - if (mlen > best_mlen) best_mlen = mlen; - - do { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra); - } else - price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || price <= opt[cur + mlen].price) - SET_PRICE(cur + mlen, mlen, i, litlen, price); - mlen--; - } while (mlen >= minMatch); - } } } - - match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen); - - if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - last_pos = cur + 1; - goto _storeSequence; - } - - /* set prices using matches at position = cur */ - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = matches[u].len; - - while (mlen <= best_mlen) { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra); - else - price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) - SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); - - mlen++; - } } } - - best_mlen = opt[last_pos].mlen; - best_off = opt[last_pos].off; - cur = last_pos - best_mlen; - - /* store sequence */ -_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ - opt[0].mlen = 1; - - while (1) { - mlen = opt[cur].mlen; - offset = opt[cur].off; - opt[cur].mlen = best_mlen; - opt[cur].off = best_off; - best_mlen = mlen; - best_off = offset; - if (mlen > cur) break; - cur -= mlen; - } - - for (u = 0; u <= last_pos;) { - u += opt[u].mlen; - } - - for (cur=0; cur < last_pos; ) { - mlen = opt[cur].mlen; - if (mlen == 1) { ip++; cur++; continue; } - offset = opt[cur].off; - cur += mlen; - litLength = (U32)(ip - anchor); - - if (offset > ZSTD_REP_MOVE_OPT) { - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = offset - ZSTD_REP_MOVE_OPT; - offset--; - } else { - if (offset != 0) { - best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); - if (offset != 1) rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = best_off; - } - if (litLength==0) offset--; - } - - ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); - anchor = ip = ip + mlen; - } } /* for (cur=0; cur < last_pos; ) */ - - /* Save reps for next block */ - { int i; for (i=0; irepToConfirm[i] = rep[i]; } - - /* Last Literals */ - { size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - - -FORCE_INLINE_TEMPLATE -void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize, const int ultra) -{ - seqStore_t* seqStorePtr = &(ctx->seqStore); - optState_t* optStatePtr = &(ctx->optState); - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ctx->base; - const U32 lowestIndex = ctx->lowLimit; - const U32 dictLimit = ctx->dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictBase = ctx->dictBase; - const BYTE* const dictEnd = dictBase + dictLimit; - - const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog; - const U32 sufficient_len = ctx->appliedParams.cParams.targetLength; - const U32 mls = ctx->appliedParams.cParams.searchLength; - const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4; - - ZSTD_optimal_t* opt = optStatePtr->priceTable; - ZSTD_match_t* matches = optStatePtr->matchTable; - const BYTE* inr; - - /* init */ - U32 offset, rep[ZSTD_REP_NUM]; - { U32 i; for (i=0; irep[i]; } - - ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize); - ip += (ip==prefixStart); - - /* Match Loop */ - while (ip < ilimit) { - U32 cur, match_num, last_pos, litlen, price; - U32 u, mlen, best_mlen, best_off, litLength; - U32 current = (U32)(ip-base); - memset(opt, 0, sizeof(ZSTD_optimal_t)); - last_pos = 0; - opt[0].litlen = (U32)(ip - anchor); - - /* check repCode */ - { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); - for (i = (ip==anchor); i 0 && repCur <= (S32)current) - && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ - && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; - - if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; - goto _storeSequence; - } - - best_off = i - (ip==anchor); - litlen = opt[0].litlen; - do { - price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ - mlen--; - } while (mlen >= minMatch); - } } } - - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */ - - if (!last_pos && !match_num) { ip++; continue; } - - { U32 i; for (i=0; i sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - cur = 0; - last_pos = 1; - goto _storeSequence; - } - - best_mlen = (last_pos) ? last_pos : minMatch; - - /* set prices using matches at position = 0 */ - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = matches[u].len; - litlen = opt[0].litlen; - while (mlen <= best_mlen) { - price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, matches[u].off, litlen, price); - mlen++; - } } - - if (last_pos < minMatch) { - ip++; continue; - } - - /* check further positions */ - for (cur = 1; cur <= last_pos; cur++) { - inr = ip + cur; - - if (opt[cur-1].mlen == 1) { - litlen = opt[cur-1].litlen + 1; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen); - } else - price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor); - } else { - litlen = 1; - price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1); - } - - if (cur > last_pos || price <= opt[cur].price) - SET_PRICE(cur, 1, 0, litlen, price); - - if (cur == last_pos) break; - - if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ - continue; - - mlen = opt[cur].mlen; - if (opt[cur].off > ZSTD_REP_MOVE_OPT) { - opt[cur].rep[2] = opt[cur-mlen].rep[1]; - opt[cur].rep[1] = opt[cur-mlen].rep[0]; - opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; - } else { - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; - opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; - opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); - } - - best_mlen = minMatch; - { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); - for (i = (mlen != 1); i 0 && repCur <= (S32)(current+cur)) - && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ - && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; - - if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; best_off = i; last_pos = cur + 1; - goto _storeSequence; - } - - best_off = i - (opt[cur].mlen != 1); - if (mlen > best_mlen) best_mlen = mlen; - - do { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra); - } else - price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || price <= opt[cur + mlen].price) - SET_PRICE(cur + mlen, mlen, i, litlen, price); - mlen--; - } while (mlen >= minMatch); - } } } - - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch); - - if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num-1].len; - best_off = matches[match_num-1].off; - last_pos = cur + 1; - goto _storeSequence; - } - - /* set prices using matches at position = cur */ - for (u = 0; u < match_num; u++) { - mlen = (u>0) ? matches[u-1].len+1 : best_mlen; - best_mlen = matches[u].len; - - while (mlen <= best_mlen) { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra); - else - price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) - SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); - - mlen++; - } } } /* for (cur = 1; cur <= last_pos; cur++) */ - - best_mlen = opt[last_pos].mlen; - best_off = opt[last_pos].off; - cur = last_pos - best_mlen; - - /* store sequence */ -_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ - opt[0].mlen = 1; - - while (1) { - mlen = opt[cur].mlen; - offset = opt[cur].off; - opt[cur].mlen = best_mlen; - opt[cur].off = best_off; - best_mlen = mlen; - best_off = offset; - if (mlen > cur) break; - cur -= mlen; - } - - for (u = 0; u <= last_pos; ) { - u += opt[u].mlen; - } - - for (cur=0; cur < last_pos; ) { - mlen = opt[cur].mlen; - if (mlen == 1) { ip++; cur++; continue; } - offset = opt[cur].off; - cur += mlen; - litLength = (U32)(ip - anchor); - - if (offset > ZSTD_REP_MOVE_OPT) { - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = offset - ZSTD_REP_MOVE_OPT; - offset--; - } else { - if (offset != 0) { - best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); - if (offset != 1) rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = best_off; - } - - if (litLength==0) offset--; - } - - ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); - anchor = ip = ip + mlen; - } } /* for (cur=0; cur < last_pos; ) */ - - /* Save reps for next block */ - { int i; for (i=0; irepToConfirm[i] = rep[i]; } - - /* Last Literals */ - { size_t lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -#endif /* ZSTD_OPT_H_91842398743 */ +#endif /* ZSTD_OPT_H */