Add support for representing last literals in the extracted seqs

This commit is contained in:
senhuang42 2020-10-27 11:19:48 -04:00
parent 9171f920cd
commit 1d221ecc03
2 changed files with 18 additions and 4 deletions

View File

@ -2444,6 +2444,9 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
const seqDef* seqStoreSeqs = seqStore->sequencesStart;
size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
size_t literalsRead = 0;
size_t lastLLSize;
ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
size_t i;
@ -2451,7 +2454,10 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
int repIdx;
assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
for (i = 0, position = 0; i < seqStoreSeqSize; ++i) {
/* Ensure we have enough space for last literals "sequence" */
assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
for (i = 0; i < seqStoreSeqSize; ++i) {
literalsRead += seqStoreSeqs[i].litLength;
outSeqs[i].litLength = seqStoreSeqs[i].litLength;
outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH;
@ -2496,8 +2502,15 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
} else {
outSeqs[i].offset = seqStoreSeqs[i].offset - ZSTD_REP_NUM;
}
position += outSeqs[i].litLength + outSeqs[i].matchLength;
}
/* Insert last literals (if any exist) in the block as a sequence with ml == off == 0 */
lastLLSize = seqStoreLiteralsSize - literalsRead;
if (lastLLSize) {
outSeqs[i].litLength = lastLLSize;
outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
}
zc->seqCollector.seqIndex += seqStoreSeqSize;
}

View File

@ -1115,7 +1115,7 @@ typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
typedef struct {
unsigned int offset; /* The offset of the match.
* If == 0, then represents a block of literals, determined by litLength
* If == 0, then represents a section of literals of litLength size
*/
unsigned int litLength; /* Literal length */
@ -1278,7 +1278,8 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS
ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
/*! ZSTD_getSequences() :
* Extract sequences from the sequence store
* Extract sequences from the sequence store. Any last literals in the block will be represented as a sequence
* with offset == 0, matchLength == 0, litLength == last literals size.
* zc can be used to insert custom compression params.
* This function invokes ZSTD_compress2
* @return : number of sequences extracted