Refactor separate ldm variables all into one struct

dev
senhuang42 2020-10-05 18:10:10 -04:00
parent 0731b94e7c
commit a5500cf2af
2 changed files with 67 additions and 74 deletions

View File

@ -574,7 +574,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
size_t lastLLSize; size_t lastLLSize;
ms->ldmSeqStore = *rawSeqStore; ms->ldmSeqStore = *rawSeqStore;
lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize); lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
*rawSeqStore = ms->ldmSeqStore; /* Persist changes to ldmSeqStore during blockCompressor() */ *rawSeqStore = ms->ldmSeqStore;
return lastLLSize; return lastLLSize;
} }

View File

@ -768,10 +768,17 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
* LDM helper functions * * LDM helper functions *
*************************/ *************************/
/* ZSTD_opt_skipBytesInLdmSeqStore(): typedef struct {
rawSeqStore_t* seqStore; /* Reference to struct containing external candidates */
U32 startPosInBlock; /* Start position of the current match candidate */
U32 endPosInBlock; /* End position of the current match candidate */
U32 offset; /* Offset of the match candidate */
} ZSTD_optLdm_t;
/* ZSTD_opt_skipRawSeqStoreBytes():
* Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'. * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
*/ */
static void ZSTD_opt_skipBytesInLdmSeqStore(rawSeqStore_t* ldmSeqStore, size_t nbBytes) { static void ZSTD_opt_skipRawSeqStoreBytes(rawSeqStore_t* ldmSeqStore, size_t nbBytes) {
while (nbBytes && ldmSeqStore->pos < ldmSeqStore->size) { while (nbBytes && ldmSeqStore->pos < ldmSeqStore->size) {
rawSeq currSeq = ldmSeqStore->seq[ldmSeqStore->pos]; rawSeq currSeq = ldmSeqStore->seq[ldmSeqStore->pos];
/* posInSequence necessarily must never represent a value beyond the sequence */ /* posInSequence necessarily must never represent a value beyond the sequence */
@ -798,84 +805,81 @@ static void ZSTD_opt_skipBytesInLdmSeqStore(rawSeqStore_t* ldmSeqStore, size_t n
} }
} }
/* ZSTD_opt_getNextLdmAndUpdateSeqStore(): /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
* Calculates the beginning and end of the next match in the current block. * Calculates the beginning and end of the next match in the current block.
* Updates 'pos' and 'posInSequence' of the ldmSeqStore. * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
*/ */
static void ZSTD_opt_getNextLdmAndUpdateSeqStore(rawSeqStore_t* ldmSeqStore, static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
U32* matchStartPosInBlock, U32* matchEndPosInBlock, U32 blockBytesRemaining) {
U32* matchOffset, U32 currPosInBlock,
U32 blockBytesRemaining) {
rawSeq currSeq; rawSeq currSeq;
U32 currBlockEndPos; U32 currBlockEndPos;
U32 literalsBytesRemaining; U32 literalsBytesRemaining;
U32 matchBytesRemaining; U32 matchBytesRemaining;
/* Setting match end position to MAX to ensure we never use an LDM during this block */ /* Setting match end position to MAX to ensure we never use an LDM during this block */
if (ldmSeqStore->pos >= ldmSeqStore->size) { if (optLdm->seqStore->pos >= optLdm->seqStore->size) {
*matchStartPosInBlock = UINT_MAX; optLdm->startPosInBlock = UINT_MAX;
*matchEndPosInBlock = UINT_MAX; optLdm->endPosInBlock = UINT_MAX;
return; return;
} }
/* Calculate appropriate bytes left in matchLength and litLength after adjusting /* Calculate appropriate bytes left in matchLength and litLength after adjusting
based on ldmSeqStore->posInSequence */ based on ldmSeqStore->posInSequence */
currSeq = ldmSeqStore->seq[ldmSeqStore->pos]; currSeq = optLdm->seqStore->seq[optLdm->seqStore->pos];
assert(ldmSeqStore->posInSequence <= currSeq.litLength + currSeq.matchLength); assert(optLdm->seqStore->posInSequence <= currSeq.litLength + currSeq.matchLength);
currBlockEndPos = currPosInBlock + blockBytesRemaining; currBlockEndPos = currPosInBlock + blockBytesRemaining;
literalsBytesRemaining = (ldmSeqStore->posInSequence < currSeq.litLength) ? literalsBytesRemaining = (optLdm->seqStore->posInSequence < currSeq.litLength) ?
currSeq.litLength - (U32)ldmSeqStore->posInSequence : currSeq.litLength - (U32)optLdm->seqStore->posInSequence :
0; 0;
matchBytesRemaining = (literalsBytesRemaining == 0) ? matchBytesRemaining = (literalsBytesRemaining == 0) ?
currSeq.matchLength - ((U32)ldmSeqStore->posInSequence - currSeq.litLength) : currSeq.matchLength - ((U32)optLdm->seqStore->posInSequence - currSeq.litLength) :
currSeq.matchLength; currSeq.matchLength;
/* If there are more literal bytes than bytes remaining in block, no ldm is possible */ /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
if (literalsBytesRemaining >= blockBytesRemaining) { if (literalsBytesRemaining >= blockBytesRemaining) {
*matchStartPosInBlock = UINT_MAX; optLdm->startPosInBlock = UINT_MAX;
*matchEndPosInBlock = UINT_MAX; optLdm->endPosInBlock = UINT_MAX;
ZSTD_opt_skipBytesInLdmSeqStore(ldmSeqStore, blockBytesRemaining); ZSTD_opt_skipRawSeqStoreBytes(optLdm->seqStore, blockBytesRemaining);
return; return;
} }
/* Matches may be < MINMATCH by this process. In that case, we will reject them /* Matches may be < MINMATCH by this process. In that case, we will reject them
when we are deciding whether or not to add the ldm */ when we are deciding whether or not to add the ldm */
*matchStartPosInBlock = currPosInBlock + literalsBytesRemaining; optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
*matchEndPosInBlock = *matchStartPosInBlock + matchBytesRemaining; optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
*matchOffset = currSeq.offset; optLdm->offset = currSeq.offset;
if (*matchEndPosInBlock > currBlockEndPos) { if (optLdm->endPosInBlock > currBlockEndPos) {
/* Match ends after the block ends, we can't use the whole match */ /* Match ends after the block ends, we can't use the whole match */
*matchEndPosInBlock = currBlockEndPos; optLdm->endPosInBlock = currBlockEndPos;
ZSTD_opt_skipBytesInLdmSeqStore(ldmSeqStore, currBlockEndPos - currPosInBlock); ZSTD_opt_skipRawSeqStoreBytes(optLdm->seqStore, currBlockEndPos - currPosInBlock);
} else { } else {
/* If we can use the whole match point the ldmSeqStore at the next match */ /* If we can use the whole match point the ldmSeqStore at the next match */
ldmSeqStore->posInSequence = 0; optLdm->seqStore->posInSequence = 0;
ldmSeqStore->pos++; optLdm->seqStore->pos++;
} }
DEBUGLOG(6, "ZSTD_opt_getNextLdmAndUpdateSeqStore(): got an ldm that beginning at pos: %u, end at pos: %u, with offset: %u", DEBUGLOG(6, "ZSTD_opt_getNextMatchAndUpdateSeqStore(): got an ldm that beginning at pos: %u, end at pos: %u, with offset: %u",
*matchStartPosInBlock, *matchEndPosInBlock, *matchOffset); optLdm->startPosInBlock, optLdm->endPosInBlock, optLdm->offset);
} }
/* ZSTD_opt_maybeAddLdm(): /* ZSTD_opt_maybeAddMatch():
* Adds a match if it's long enough, based on it's 'matchStartPosInBlock' * Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
* and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches' * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
*/ */
static void ZSTD_opt_maybeAddLdm(ZSTD_match_t* matches, U32* nbMatches, static void ZSTD_opt_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
U32 matchStartPosInBlock, U32 matchEndPosInBlock, ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
U32 matchOffset, U32 currPosInBlock) { U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
U32 posDiff = currPosInBlock - matchStartPosInBlock;
/* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */ /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
U32 candidateMatchLength = matchEndPosInBlock - matchStartPosInBlock - posDiff; U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
U32 candidateOffCode = matchOffset + ZSTD_REP_MOVE; U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
/* Ensure that current block position is not outside of the match */ /* Ensure that current block position is not outside of the match */
if (currPosInBlock < matchStartPosInBlock if (currPosInBlock < optLdm->startPosInBlock
|| currPosInBlock >= matchEndPosInBlock || currPosInBlock >= optLdm->endPosInBlock
|| candidateMatchLength < MINMATCH) { || candidateMatchLength < MINMATCH) {
return; return;
} }
DEBUGLOG(6, "ZSTD_opt_maybeAddLdm(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u", DEBUGLOG(6, "ZSTD_opt_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
candidateOffCode, candidateMatchLength, currPosInBlock); candidateOffCode, candidateMatchLength, currPosInBlock);
if (*nbMatches == 0) { if (*nbMatches == 0) {
matches[*nbMatches].len = candidateMatchLength; matches[*nbMatches].len = candidateMatchLength;
@ -892,29 +896,29 @@ static void ZSTD_opt_maybeAddLdm(ZSTD_match_t* matches, U32* nbMatches,
} }
} }
/* ZSTD_opt_processLdm(): /* ZSTD_opt_processMatchCandidate():
* Wrapper function to update ldm seq store and call ldm functions as necessary. * Wrapper function to update ldm seq store and call ldm functions as necessary.
*/ */
static void ZSTD_opt_processLdm(rawSeqStore_t* ldmSeqStore, ZSTD_match_t* matches, U32* nbMatches, static void ZSTD_opt_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
U32* matchStartPosInBlock, U32* matchEndPosInBlock, U32* matchOffset,
U32 currPosInBlock, U32 remainingBytes) { U32 currPosInBlock, U32 remainingBytes) {
if (currPosInBlock >= *matchEndPosInBlock) { if (optLdm->seqStore->size == 0 && optLdm->seqStore->pos >= optLdm->seqStore->size) {
if (currPosInBlock > *matchEndPosInBlock) { return;
/* The position at which ZSTD_opt_processLdm() is called is not necessarily }
if (currPosInBlock >= optLdm->endPosInBlock) {
if (currPosInBlock > optLdm->endPosInBlock) {
/* The position at which ZSTD_opt_processMatchCandidate() is called is not necessarily
* at the end of a match from the ldm seq store, and will often be some bytes * at the end of a match from the ldm seq store, and will often be some bytes
* over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots" * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
*/ */
U32 posOvershoot = currPosInBlock - *matchEndPosInBlock; U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
ZSTD_opt_skipBytesInLdmSeqStore(ldmSeqStore, posOvershoot); ZSTD_opt_skipRawSeqStoreBytes(optLdm->seqStore, posOvershoot);
} }
ZSTD_opt_getNextLdmAndUpdateSeqStore(ldmSeqStore, matchStartPosInBlock, ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
matchEndPosInBlock, matchOffset,
currPosInBlock, remainingBytes);
} }
ZSTD_opt_maybeAddLdm(matches, nbMatches, *matchStartPosInBlock, *matchEndPosInBlock, *matchOffset, currPosInBlock); ZSTD_opt_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
} }
/*-******************************* /*-*******************************
* Optimal parser * Optimal parser
*********************************/ *********************************/
@ -968,15 +972,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
ZSTD_match_t* const matches = optStatePtr->matchTable; ZSTD_match_t* const matches = optStatePtr->matchTable;
ZSTD_optimal_t lastSequence; ZSTD_optimal_t lastSequence;
U32 ldmStartPosInBlock = 0; /* Make a copy so that ms->ldmSeqStore stays immutable during compressBlock() */
U32 ldmEndPosInBlock = 0; ZSTD_optLdm_t optLdm = {&ms->ldmSeqStore, 0, 0, 0};
U32 ldmOffset = 0;
/* Get first match from ldm seq store if long mode is enabled */ /* Get first match from ldm seq store if long mode is enabled */
if (ms->ldmSeqStore.size > 0 && ms->ldmSeqStore.pos < ms->ldmSeqStore.size) { if (ms->ldmSeqStore.size > 0 && ms->ldmSeqStore.pos < ms->ldmSeqStore.size) {
ZSTD_opt_getNextLdmAndUpdateSeqStore(&ms->ldmSeqStore, &ldmStartPosInBlock, ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
&ldmEndPosInBlock, &ldmOffset,
(U32)(ip-istart), (U32)(iend-ip));
} }
/* init */ /* init */
@ -994,12 +995,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
{ U32 const litlen = (U32)(ip - anchor); { U32 const litlen = (U32)(ip - anchor);
U32 const ll0 = !litlen; U32 const ll0 = !litlen;
U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
if (ms->ldmSeqStore.size != 0 && ms->ldmSeqStore.pos < ms->ldmSeqStore.size) { ZSTD_opt_processMatchCandidate(&optLdm, matches, &nbMatches,
ZSTD_opt_processLdm(&ms->ldmSeqStore, matches, (U32)(ip-istart), (U32)(iend - ip));
&nbMatches, &ldmStartPosInBlock,
&ldmEndPosInBlock, &ldmOffset,
(U32)(ip-istart), (U32)(iend - ip));
}
if (!nbMatches) { ip++; continue; } if (!nbMatches) { ip++; continue; }
/* initialize opt[0] */ /* initialize opt[0] */
@ -1115,13 +1112,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
U32 matchNb; U32 matchNb;
ZSTD_opt_processMatchCandidate(&optLdm, matches, &nbMatches,
if (ms->ldmSeqStore.size != 0 && ms->ldmSeqStore.pos < ms->ldmSeqStore.size) { (U32)(inr-istart), (U32)(iend-inr));
ZSTD_opt_processLdm(&ms->ldmSeqStore, matches,
&nbMatches, &ldmStartPosInBlock,
&ldmEndPosInBlock, &ldmOffset,
(U32)(inr-istart), (U32)(iend-inr));
}
if (!nbMatches) { if (!nbMatches) {
DEBUGLOG(7, "rPos:%u : no match found", cur); DEBUGLOG(7, "rPos:%u : no match found", cur);
@ -1236,12 +1228,13 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
ZSTD_setBasePrices(optStatePtr, optLevel); ZSTD_setBasePrices(optStatePtr, optLevel);
} }
} /* while (ip < ilimit) */ } /* while (ip < ilimit) */
if (ldmEndPosInBlock < srcSize) { if (optLdm.endPosInBlock < srcSize) {
/* This can occur if after adding the final match in an ldm seq store within this block, /* This can occur if after adding the final match in an ldm seq store within this block,
ip reaches end of the block without calling ZSTD_opt_getNextLdmAndUpdateSeqStore() */ ip reaches end of the block without calling ZSTD_opt_getNextLdmAndUpdateSeqStore() */
ZSTD_opt_skipBytesInLdmSeqStore(&ms->ldmSeqStore, srcSize - ldmEndPosInBlock); ZSTD_opt_skipRawSeqStoreBytes(optLdm.seqStore, srcSize - optLdm.endPosInBlock);
} }
/* Return the last literals size */ /* Return the last literals size */
return (size_t)(iend - anchor); return (size_t)(iend - anchor);
} }