Merge pull request #1117 from felixhandte/zstd-fast-in-place-dict

ZSTD_fast: Support Searching the Dictionary Context In-Place
dev
Yann Collet 2018-05-23 19:32:25 -07:00 committed by GitHub
commit 08c5be5db3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 269 additions and 74 deletions

View File

@ -963,6 +963,7 @@ static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
ms->nextToUpdate = ms->window.dictLimit + 1; ms->nextToUpdate = ms->window.dictLimit + 1;
ms->loadedDictEnd = 0; ms->loadedDictEnd = 0;
ms->opt.litLengthSum = 0; /* force reset of btopt stats */ ms->opt.litLengthSum = 0; /* force reset of btopt stats */
ms->dictMatchState = NULL;
} }
/*! ZSTD_continueCCtx() : /*! ZSTD_continueCCtx() :
@ -1203,42 +1204,80 @@ static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
U64 pledgedSrcSize, U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff) ZSTD_buffered_policy_e zbuff)
{ {
/* We have a choice between copying the dictionary context into the working
* context, or referencing the dictionary context from the working context
* in-place. We decide here which strategy to use. */
const int attachDict = ( pledgedSrcSize <= 8 KB
|| pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN )
&& !params.forceWindow /* dictMatchState isn't correctly
* handled in _enforceMaxDist */
&& cdict->cParams.strategy == ZSTD_fast
&& ZSTD_equivalentCParams(cctx->appliedParams.cParams,
cdict->cParams);
{ unsigned const windowLog = params.cParams.windowLog; { unsigned const windowLog = params.cParams.windowLog;
assert(windowLog != 0); assert(windowLog != 0);
/* Copy only compression parameters related to tables. */ /* Copy only compression parameters related to tables. */
params.cParams = cdict->cParams; params.cParams = cdict->cParams;
params.cParams.windowLog = windowLog; params.cParams.windowLog = windowLog;
ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, ZSTDcrp_noMemset, zbuff); ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
attachDict ? ZSTDcrp_continue : ZSTDcrp_noMemset,
zbuff);
assert(cctx->appliedParams.cParams.strategy == cdict->cParams.strategy); assert(cctx->appliedParams.cParams.strategy == cdict->cParams.strategy);
assert(cctx->appliedParams.cParams.hashLog == cdict->cParams.hashLog); assert(cctx->appliedParams.cParams.hashLog == cdict->cParams.hashLog);
assert(cctx->appliedParams.cParams.chainLog == cdict->cParams.chainLog); assert(cctx->appliedParams.cParams.chainLog == cdict->cParams.chainLog);
} }
/* copy tables */ if (attachDict) {
{ size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog); const U32 cdictLen = (U32)( cdict->matchState.window.nextSrc
size_t const hSize = (size_t)1 << cdict->cParams.hashLog; - cdict->matchState.window.base);
size_t const tableSpace = (chainSize + hSize) * sizeof(U32); if (cdictLen == 0) {
assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */ /* don't even attach dictionaries with no contents */
assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize); DEBUGLOG(4, "skipping attaching empty dictionary");
assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */ } else {
assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize); DEBUGLOG(4, "attaching dictionary into context");
memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */ cctx->blockState.matchState.dictMatchState = &cdict->matchState;
}
/* Zero the hashTable3, since the cdict never fills it */ /* prep working match state so dict matches never have negative indices
{ size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3; * when they are translated to the working context's index space. */
assert(cdict->matchState.hashLog3 == 0); if (cctx->blockState.matchState.window.dictLimit < cdictLen) {
memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); cctx->blockState.matchState.window.nextSrc =
cctx->blockState.matchState.window.base + cdictLen;
ZSTD_window_clear(&cctx->blockState.matchState.window);
}
cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
}
} else {
DEBUGLOG(4, "copying dictionary into context");
/* copy tables */
{ size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog);
size_t const hSize = (size_t)1 << cdict->cParams.hashLog;
size_t const tableSpace = (chainSize + hSize) * sizeof(U32);
assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize);
assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */
assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize);
memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */
}
/* Zero the hashTable3, since the cdict never fills it */
{ size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3;
assert(cdict->matchState.hashLog3 == 0);
memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
}
/* copy dictionary offsets */
{
ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
dstMatchState->window = srcMatchState->window;
dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
}
} }
/* copy dictionary offsets */
{
ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
dstMatchState->window = srcMatchState->window;
dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
}
cctx->dictID = cdict->dictID; cctx->dictID = cdict->dictID;
/* copy block state */ /* copy block state */
@ -2140,9 +2179,9 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
/* ZSTD_selectBlockCompressor() : /* ZSTD_selectBlockCompressor() :
* Not static, but internal use only (used by long distance matcher) * Not static, but internal use only (used by long distance matcher)
* assumption : strat is a valid strategy */ * assumption : strat is a valid strategy */
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
{ {
static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = { static const ZSTD_blockCompressor blockCompressor[3][(unsigned)ZSTD_btultra+1] = {
{ ZSTD_compressBlock_fast /* default for 0 */, { ZSTD_compressBlock_fast /* default for 0 */,
ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy,
ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2,
@ -2150,13 +2189,19 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict
{ ZSTD_compressBlock_fast_extDict /* default for 0 */, { ZSTD_compressBlock_fast_extDict /* default for 0 */,
ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict,
ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict,
ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict } ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict },
{ ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,
ZSTD_compressBlock_fast_dictMatchState,
NULL, NULL, NULL, NULL, NULL, NULL, NULL /* unimplemented as of yet */ }
}; };
ZSTD_blockCompressor selectedCompressor;
ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
assert((U32)strat >= (U32)ZSTD_fast); assert((U32)strat >= (U32)ZSTD_fast);
assert((U32)strat <= (U32)ZSTD_btultra); assert((U32)strat <= (U32)ZSTD_btultra);
return blockCompressor[extDict!=0][(U32)strat]; selectedCompressor = blockCompressor[(int)dictMode][(U32)strat];
assert(selectedCompressor != NULL);
return selectedCompressor;
} }
static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
@ -2188,6 +2233,11 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
ZSTD_resetSeqStore(&(zc->seqStore)); ZSTD_resetSeqStore(&(zc->seqStore));
ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */ ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */
/* a gap between an attached dict and the current window is not safe,
* they must remain adjacent, and when that stops being the case, the dict
* must be unset */
assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
/* limited update after a very long match */ /* limited update after a very long match */
{ const BYTE* const base = ms->window.base; { const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src; const BYTE* const istart = (const BYTE*)src;
@ -2198,7 +2248,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
} }
/* select and store sequences */ /* select and store sequences */
{ U32 const extDict = ZSTD_window_hasExtDict(ms->window); { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
size_t lastLLSize; size_t lastLLSize;
{ int i; { int i;
for (i = 0; i < ZSTD_REP_NUM; ++i) for (i = 0; i < ZSTD_REP_NUM; ++i)
@ -2212,7 +2262,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
ms, &zc->seqStore, ms, &zc->seqStore,
zc->blockState.nextCBlock->rep, zc->blockState.nextCBlock->rep,
&zc->appliedParams.cParams, &zc->appliedParams.cParams,
src, srcSize, extDict); src, srcSize);
assert(zc->externSeqStore.pos <= zc->externSeqStore.size); assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
} else if (zc->appliedParams.ldmParams.enableLdm) { } else if (zc->appliedParams.ldmParams.enableLdm) {
rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
@ -2229,10 +2279,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
ms, &zc->seqStore, ms, &zc->seqStore,
zc->blockState.nextCBlock->rep, zc->blockState.nextCBlock->rep,
&zc->appliedParams.cParams, &zc->appliedParams.cParams,
src, srcSize, extDict); src, srcSize);
assert(ldmSeqStore.pos == ldmSeqStore.size); assert(ldmSeqStore.pos == ldmSeqStore.size);
} else { /* not long range mode */ } else { /* not long range mode */
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict); ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize);
} }
{ const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
@ -2299,8 +2349,9 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
else ms->nextToUpdate -= correction; else ms->nextToUpdate -= correction;
ms->loadedDictEnd = 0; ms->loadedDictEnd = 0;
ms->dictMatchState = NULL;
} }
ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd); ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
{ size_t cSize = ZSTD_compressBlock_internal(cctx, { size_t cSize = ZSTD_compressBlock_internal(cctx,

View File

@ -122,7 +122,8 @@ typedef struct {
U32 lowLimit; /* below that point, no more data */ U32 lowLimit; /* below that point, no more data */
} ZSTD_window_t; } ZSTD_window_t;
typedef struct { typedef struct ZSTD_matchState_t ZSTD_matchState_t;
struct ZSTD_matchState_t {
ZSTD_window_t window; /* State for window round buffer management */ ZSTD_window_t window; /* State for window round buffer management */
U32 loadedDictEnd; /* index of end of dictionary */ U32 loadedDictEnd; /* index of end of dictionary */
U32 nextToUpdate; /* index from which to continue table update */ U32 nextToUpdate; /* index from which to continue table update */
@ -132,7 +133,8 @@ typedef struct {
U32* hashTable3; U32* hashTable3;
U32* chainTable; U32* chainTable;
optState_t opt; /* optimal parser state */ optState_t opt; /* optimal parser state */
} ZSTD_matchState_t; const ZSTD_matchState_t *dictMatchState;
};
typedef struct { typedef struct {
ZSTD_compressedBlockState_t* prevCBlock; ZSTD_compressedBlockState_t* prevCBlock;
@ -248,10 +250,13 @@ struct ZSTD_CCtx_s {
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
typedef size_t (*ZSTD_blockCompressor) ( typedef size_t (*ZSTD_blockCompressor) (
ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict); ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
MEM_STATIC U32 ZSTD_LLcode(U32 litLength) MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
@ -508,6 +513,20 @@ MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
return window.lowLimit < window.dictLimit; return window.lowLimit < window.dictLimit;
} }
/**
* ZSTD_matchState_dictMode():
* Inspects the provided matchState and figures out what dictMode should be
* passed to the compressor.
*/
MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
{
return ZSTD_window_hasExtDict(ms->window) ?
ZSTD_extDict :
ms->dictMatchState != NULL ?
ZSTD_dictMatchState :
ZSTD_noDict;
}
/** /**
* ZSTD_window_needOverflowCorrection(): * ZSTD_window_needOverflowCorrection():
* Returns non-zero if the indices are getting too large and need overflow * Returns non-zero if the indices are getting too large and need overflow
@ -575,18 +594,25 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
* ZSTD_window_enforceMaxDist(): * ZSTD_window_enforceMaxDist():
* Updates lowLimit so that: * Updates lowLimit so that:
* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
*
* This allows a simple check that index >= lowLimit to see if index is valid. * This allows a simple check that index >= lowLimit to see if index is valid.
* This must be called before a block compression call, with srcEnd as the block * This must be called before a block compression call, with srcEnd as the block
* source end. * source end.
*
* If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit. * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
* This is because dictionaries are allowed to be referenced as long as the last * This is because dictionaries are allowed to be referenced as long as the last
* byte of the dictionary is in the window, but once they are out of range, * byte of the dictionary is in the window, but once they are out of range,
* they cannot be referenced. If loadedDictEndPtr is NULL, we use * they cannot be referenced. If loadedDictEndPtr is NULL, we use
* loadedDictEnd == 0. * loadedDictEnd == 0.
*
* In normal dict mode, the dict is between lowLimit and dictLimit. In
* dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
* is below them. forceWindow and dictMatchState are therefore incompatible.
*/ */
MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window, MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
void const* srcEnd, U32 maxDist, void const* srcEnd, U32 maxDist,
U32* loadedDictEndPtr) U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr)
{ {
U32 const current = (U32)((BYTE const*)srcEnd - window->base); U32 const current = (U32)((BYTE const*)srcEnd - window->base);
U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0; U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0;
@ -600,6 +626,8 @@ MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
} }
if (loadedDictEndPtr) if (loadedDictEndPtr)
*loadedDictEndPtr = 0; *loadedDictEndPtr = 0;
if (dictMatchStatePtr)
*dictMatchStatePtr = NULL;
} }
} }

View File

@ -45,26 +45,57 @@ FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_fast_generic( size_t ZSTD_compressBlock_fast_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize, void const* src, size_t srcSize,
U32 const hlog, U32 const stepSize, U32 const mls) U32 const hlog, U32 const stepSize, U32 const mls,
ZSTD_dictMode_e const dictMode)
{ {
U32* const hashTable = ms->hashTable; U32* const hashTable = ms->hashTable;
const BYTE* const base = ms->window.base; const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src; const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart; const BYTE* ip = istart;
const BYTE* anchor = istart; const BYTE* anchor = istart;
const U32 lowestIndex = ms->window.dictLimit; const U32 prefixLowestIndex = ms->window.dictLimit;
const BYTE* const lowest = base + lowestIndex; const BYTE* const prefixLowest = base + prefixLowestIndex;
const BYTE* const iend = istart + srcSize; const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE; const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1]; U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0; U32 offsetSaved = 0;
const ZSTD_matchState_t* const dms = ms->dictMatchState;
const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ?
dms->hashTable : NULL;
const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
dms->window.dictLimit : 0;
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
dms->window.base : NULL;
const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
dictBase + dictLowestIndex : NULL;
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
dms->window.nextSrc : NULL;
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
prefixLowestIndex - (U32)(dictEnd - dictBase) :
0;
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
/* otherwise, we would get index underflow when translating a dict index
* into a local index */
assert(dictMode != ZSTD_dictMatchState
|| prefixLowestIndex >= (U32)(dictEnd - dictBase));
/* init */ /* init */
ip += (ip==lowest); ip += (dictAndPrefixLength == 0);
{ U32 const maxRep = (U32)(ip-lowest); if (dictMode == ZSTD_noDict) {
U32 const maxRep = (U32)(ip - prefixLowest);
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
} }
if (dictMode == ZSTD_dictMatchState) {
/* dictMatchState repCode checks don't currently handle repCode == 0
* disabling. */
assert(offset_1 <= dictAndPrefixLength);
assert(offset_2 <= dictAndPrefixLength);
}
/* Main Search Loop */ /* Main Search Loop */
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
@ -73,26 +104,62 @@ size_t ZSTD_compressBlock_fast_generic(
U32 const current = (U32)(ip-base); U32 const current = (U32)(ip-base);
U32 const matchIndex = hashTable[h]; U32 const matchIndex = hashTable[h];
const BYTE* match = base + matchIndex; const BYTE* match = base + matchIndex;
const U32 repIndex = current + 1 - offset_1;
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
&& repIndex < prefixLowestIndex) ?
dictBase + (repIndex - dictIndexDelta) :
base + repIndex;
hashTable[h] = current; /* update hash table */ hashTable[h] = current; /* update hash table */
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { if (dictMode == ZSTD_dictMatchState
&& ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, istart) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
} else if ( dictMode == ZSTD_noDict
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++; ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
} else { } else if ( (matchIndex <= prefixLowestIndex)
if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) {
|| (MEM_read32(match) != MEM_read32(ip)) ) { if (dictMode == ZSTD_dictMatchState) {
U32 const dictMatchIndex = dictHashTable[h];
const BYTE* dictMatch = dictBase + dictMatchIndex;
if (dictMatchIndex <= dictLowestIndex ||
MEM_read32(dictMatch) != MEM_read32(ip)) {
assert(stepSize >= 1);
ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
} else {
/* found a dict match */
U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, istart) + 4;
while (((ip>anchor) & (dictMatch>dictLowest))
&& (ip[-1] == dictMatch[-1])) {
ip--; dictMatch--; mLength++;
} /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
} else {
assert(stepSize >= 1); assert(stepSize >= 1);
ip += ((ip-anchor) >> kSearchStrength) + stepSize; ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue; continue;
} }
} else {
/* found a regular match */
U32 const offset = (U32)(ip-match);
mLength = ZSTD_count(ip+4, match+4, iend) + 4; mLength = ZSTD_count(ip+4, match+4, iend) + 4;
{ U32 const offset = (U32)(ip-match); while (((ip>anchor) & (match>prefixLowest))
while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} } }
/* match found */ /* match found */
ip += mLength; ip += mLength;
@ -102,19 +169,43 @@ size_t ZSTD_compressBlock_fast_generic(
/* Fill Table */ /* Fill Table */
hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
/* check immediate repcode */ /* check immediate repcode */
while ( (ip <= ilimit) if (dictMode == ZSTD_dictMatchState) {
&& ( (offset_2>0) while (ip <= ilimit) {
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { U32 const current2 = (U32)(ip-base);
/* store sequence */ U32 const repIndex2 = current2 - offset_2;
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ dictBase - dictIndexDelta + repIndex2 :
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base); base + repIndex2;
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
ip += rLength; && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
anchor = ip; const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
continue; /* faster when present ... (?) */ size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, istart) + 4;
} } } U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
ip += repLength2;
anchor = ip;
continue;
}
break;
}
}
if (dictMode == ZSTD_noDict) {
while ( (ip <= ilimit)
&& ( (offset_2>0)
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
/* store sequence */
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
} } } }
/* save reps for next block */ /* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved; rep[0] = offset_1 ? offset_1 : offsetSaved;
@ -132,17 +223,40 @@ size_t ZSTD_compressBlock_fast(
U32 const hlog = cParams->hashLog; U32 const hlog = cParams->hashLog;
U32 const mls = cParams->searchLength; U32 const mls = cParams->searchLength;
U32 const stepSize = cParams->targetLength; U32 const stepSize = cParams->targetLength;
assert(ms->dictMatchState == NULL);
switch(mls) switch(mls)
{ {
default: /* includes case 3 */ default: /* includes case 3 */
case 4 : case 4 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4); return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_noDict);
case 5 : case 5 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5); return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_noDict);
case 6 : case 6 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6); return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_noDict);
case 7 : case 7 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7); return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_noDict);
}
}
size_t ZSTD_compressBlock_fast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
U32 const hlog = cParams->hashLog;
U32 const mls = cParams->searchLength;
U32 const stepSize = cParams->targetLength;
assert(ms->dictMatchState != NULL);
switch(mls)
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_dictMatchState);
case 5 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_dictMatchState);
case 6 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_dictMatchState);
case 7 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_dictMatchState);
} }
} }

View File

@ -24,6 +24,9 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
size_t ZSTD_compressBlock_fast( size_t ZSTD_compressBlock_fast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
size_t ZSTD_compressBlock_fast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
size_t ZSTD_compressBlock_fast_extDict( size_t ZSTD_compressBlock_fast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize); ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);

View File

@ -508,7 +508,7 @@ size_t ZSTD_ldm_generateSequences(
* * Try invalidation after the sequence generation and test the * * Try invalidation after the sequence generation and test the
* the offset against maxDist directly. * the offset against maxDist directly.
*/ */
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL); ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
newLeftoverSize = ZSTD_ldm_generateSequences_internal( newLeftoverSize = ZSTD_ldm_generateSequences_internal(
ldmState, sequences, params, chunkStart, chunkSize); ldmState, sequences, params, chunkStart, chunkSize);
@ -591,12 +591,12 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
int const extDict)
{ {
unsigned const minMatch = cParams->searchLength; unsigned const minMatch = cParams->searchLength;
ZSTD_blockCompressor const blockCompressor = ZSTD_blockCompressor const blockCompressor =
ZSTD_selectBlockCompressor(cParams->strategy, extDict); ZSTD_selectBlockCompressor(cParams->strategy,
ZSTD_matchState_dictMode(ms));
BYTE const* const base = ms->window.base; BYTE const* const base = ms->window.base;
/* Input bounds */ /* Input bounds */
BYTE const* const istart = (BYTE const*)src; BYTE const* const istart = (BYTE const*)src;

View File

@ -62,8 +62,7 @@ size_t ZSTD_ldm_generateSequences(
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, ZSTD_compressionParameters const* cParams,
void const* src, size_t srcSize, void const* src, size_t srcSize);
int const extDict);
/** /**
* ZSTD_ldm_skipSequences(): * ZSTD_ldm_skipSequences():