Add initial support for new ZSTD_Sequence mode

dev
senhuang42 2020-11-02 13:29:56 -05:00
parent e0f26afce9
commit c86151f53c
1 changed files with 78 additions and 36 deletions

View File

@ -2148,14 +2148,14 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
DEBUGLOG(4, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq);
DEBUGLOG(4, "First seqs:", nbSeq);
/*DEBUGLOG(4, "First seqs:", nbSeq);
for (int i = 0; i < 5; ++i) {
DEBUGLOG(4, "(of: %u ml: %u ll: %u)", seqStorePtr->sequencesStart[i].offset, seqStorePtr->sequencesStart[i].matchLength, seqStorePtr->sequencesStart[i].litLength);
}
DEBUGLOG(4, "Final seqs:", nbSeq);
for (int i = 1; i < 5; ++i) {
DEBUGLOG(4, "(of: %u ml: %u ll: %u)", seqStorePtr->sequencesStart[nbSeq-i].offset, seqStorePtr->sequencesStart[nbSeq-i].matchLength, seqStorePtr->sequencesStart[nbSeq-i].litLength);
}
}*/
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
@ -4508,15 +4508,19 @@ typedef struct {
U32 endPosInSequence; /* Position within sequence at index 'endIdx' where range ends */
} ZSTD_sequenceRange;
static void ZSTD_updateSequenceRange(ZSTD_sequenceRange* sequenceRange, size_t nbBytes,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize) {
/* Returns the number of additional bytes consumed, after blockSize. Can be negative */
static int ZSTD_updateSequenceRange(ZSTD_sequenceRange* sequenceRange, size_t blockSize,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
ZSTD_sequenceFormat_e format) {
U32 idx = sequenceRange->endIdx;
U32 endPosInSequence = sequenceRange->endPosInSequence + nbBytes;
DEBUGLOG(4, "ZSTD_updateSequenceRange: startidx %u startpos: %u endidx: %u endpos: %u",
U32 endPosInSequence = sequenceRange->endPosInSequence + blockSize;
int bytesAdjustment = 0;
DEBUGLOG(4, "ZSTD_updateSequenceRange: %u bytes, startidx %u startpos: %u endidx: %u endpos: %u", blockSize,
sequenceRange->startIdx, sequenceRange->startPosInSequence, sequenceRange->endIdx, sequenceRange->endPosInSequence);
DEBUGLOG(4, "endPosInSequence begin val: %u", endPosInSequence);
while (endPosInSequence && idx < inSeqsSize) {
ZSTD_Sequence currSeq = inSeqs[idx];
DEBUGLOG(4, "curr Seq: idx: %u ll: %u ml: %u, of: %u", idx, currSeq.litLength, currSeq.matchLength, currSeq.offset);
if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
endPosInSequence -= currSeq.litLength + currSeq.matchLength;
idx++;
@ -4525,12 +4529,31 @@ static void ZSTD_updateSequenceRange(ZSTD_sequenceRange* sequenceRange, size_t n
}
}
if (idx == inSeqsSize) {
endPosInSequence = 0;
}
if (endPosInSequence == inSeqs[idx].litLength + inSeqs[idx].matchLength) {
endPosInSequence = 0;
idx++;
if (format == ZSTD_sf_noBlockDelimiters) {
assert(endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
DEBUGLOG(4, "idx: %u", idx);
if (idx != inSeqsSize && endPosInSequence > inSeqs[idx].litLength) {
DEBUGLOG(4, "Endpos is in the match");
if (inSeqs[idx].matchLength >= blockSize) {
/* Only split the match if it's too large */
U32 firstHalfMatchLength = endPosInSequence - inSeqs[idx].litLength;
U32 secondHalfMatchLength = inSeqs[idx].litLength - firstHalfMatchLength;
assert(firstHalfMatchLength >= MINMATCH || secondHalfMatchLength >= MINMATCH);
if (firstHalfMatchLength < MINMATCH && firstHalfMatchLength != 0) {
/* Move the endPos forward so that it creates match of at least MINMATCH length */
endPosInSequence = MINMATCH + inSeqs[idx].litLength;
bytesAdjustment = MINMATCH - firstHalfMatchLength;
} else if (secondHalfMatchLength < MINMATCH && secondHalfMatchLength != 0) {
/* Move the endPos backward so that it creates match of at least MINMATCH length */
endPosInSequence -= MINMATCH - secondHalfMatchLength;
bytesAdjustment = secondHalfMatchLength - MINMATCH;
}
} else {
/* If we can, prefer to simply move endPos to the end of the literals */
bytesAdjustment = (int)inSeqs[idx].litLength - (int)endPosInSequence;
endPosInSequence = inSeqs[idx].litLength;
}
}
}
sequenceRange->startIdx = sequenceRange->endIdx;
@ -4541,12 +4564,14 @@ static void ZSTD_updateSequenceRange(ZSTD_sequenceRange* sequenceRange, size_t n
DEBUGLOG(4, "endidx: (of: %u ml: %u ll: %u)", inSeqs[sequenceRange->endIdx].offset, inSeqs[sequenceRange->endIdx].matchLength, inSeqs[sequenceRange->endIdx].litLength);
DEBUGLOG(4, "finished update: startidx %u startpos: %u endidx: %u endpos: %u",
sequenceRange->startIdx, sequenceRange->startPosInSequence, sequenceRange->endIdx, sequenceRange->endPosInSequence);
DEBUGLOG(4, "final PIS was adjusted by: %d bytes", bytesAdjustment);
return bytesAdjustment;
}
/* Returns size of sequences range copied, otherwise ZSTD error code */
static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, const ZSTD_sequenceRange* seqRange,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t srcSize) {
const void* src, size_t srcSize, ZSTD_sequenceFormat_e format) {
DEBUGLOG(4, "ZSTD_copySequencesToSeqStore: numSeqs: %zu srcSize: %zu", inSeqsSize, srcSize);
size_t idx = seqRange->startIdx;
BYTE const* istart = (BYTE const*)src;
@ -4559,26 +4584,34 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, const ZSTD_sequ
U32 offCode = inSeqs[idx].offset + ZSTD_REP_MOVE;
/* Adjust litLength and matchLength for the sequence at startIdx */
if (idx == seqRange->startIdx) {
if (seqRange->startIdx == seqRange->endIdx) {
U32 seqLength = seqRange->endPosInSequence - seqRange->startPosInSequence;
RETURN_ERROR_IF(seqLength > litLength + matchLength, corruption_detected, "SeqLength cannot be bigger than sequence length!");
if (seqLength <= litLength) {
/* Sequence is entirely literals, break and store last literals */
break;
} else if (seqLength <= litLength + matchLength) {
matchLength = seqLength - litLength;
}
} else if (idx == seqRange->startIdx) {
U32 posInSequence = seqRange->startPosInSequence;
DEBUGLOG(4, "At startIdx: idx: %u PIS: %u", idx, posInSequence);
assert(posInSequence <= litLength + matchLength);
if (posInSequence >= litLength) {
litLength = 0;
/* position is within the match */
posInSequence -= litLength;
litLength = 0;
matchLength -= posInSequence;
} else {
/* position is within the literals */
litLength -= posInSequence;
}
matchLength -= posInSequence;
if (matchLength <= MINMATCH && offCode != ZSTD_REP_MOVE /* dont trigger this in lastLL case */) {
DEBUGLOG(4, "start idx: %zu, seq: (ll: %u, ml: %u, of: %u)", idx, litLength, matchLength, offCode);
RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small! Start Idx");
}
DEBUGLOG(4, "startIdx seq finalized: ll: %u ml: %u", litLength, matchLength);
}
/* Adjust litLength and matchLength for the sequence at endIdx */
if (idx == seqRange->endIdx) {
} else if (idx == seqRange->endIdx) {
U32 posInSequence = seqRange->endPosInSequence;
DEBUGLOG(4, "Reached endIdx. idx: %u PIS: %u", idx, posInSequence);
if (posInSequence == 0) {
@ -4586,21 +4619,24 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, const ZSTD_sequ
corruption_detected, "Contract violation");
}
assert(posInSequence <= litLength + matchLength);
if (posInSequence < litLength) {
litLength = posInSequence;
matchLength = 0;
if (posInSequence <= litLength) {
/* position is within the last literals, break and store last literals */
break;
} else {
/* position is within the match */
matchLength = posInSequence - litLength;
/* ZSTD_updateSequenceRange should never give us a position such that we generate a match too small */
if (matchLength <= MINMATCH && offCode != ZSTD_REP_MOVE) {
DEBUGLOG(4, "start idx: %zu, seq: (ll: %u, ml: %u, of: %u)", idx, litLength, matchLength, offCode);
RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small! Start Idx");
}
}
DEBUGLOG(4, "final ll:%u ml: %u", litLength, matchLength);
DEBUGLOG(4, "endIdx seq finalized: ll:%u ml: %u", litLength, matchLength);
}
/* ML == 0 and Offset == 0 (or offCode == ZSTD_REP_MOVE) implies we are ending a block */
/* ML == 0 and Offset == 0 (or offCode == ZSTD_REP_MOVE) signals block delimiters */
if (matchLength == 0 && offCode == ZSTD_REP_MOVE) {
RETURN_ERROR_IF(format == ZSTD_sf_noBlockDelimiters, corruption_detected, "No block delimiters allowed!");
/* Handle last literals case */
if (litLength > 0) {
DEBUGLOG(4, "Storing last literals: %u bytes, idx: %u", litLength, idx);
@ -4610,7 +4646,7 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, const ZSTD_sequ
continue;
}
//DEBUGLOG(4, "Storing in actual seqStore idx: %zu, seq: (ll: %u, ml: %u, of: %u), rep: %u", idx, litLength, matchLength - MINMATCH, offCode, inSeqs[idx].rep);
DEBUGLOG(4, "Storing in actual seqStore idx: %zu, seq: (ll: %u, ml: %u, of: %u), rep: %u", idx, litLength, matchLength - MINMATCH, offCode, inSeqs[idx].rep);
RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small! of: %u ml: %u ll: %u", offCode, matchLength, litLength);
if (inSeqs[idx].rep) {
ZSTD_storeSeq(seqStore, litLength, ip, iend, inSeqs[idx].rep - 1, matchLength - MINMATCH);
@ -4620,6 +4656,13 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, const ZSTD_sequ
ip += matchLength + litLength;
}
if (format == ZSTD_sf_noBlockDelimiters && ip != iend) {
assert(ip <= iend);
U32 lastLLSize = (U32)(iend - ip);
DEBUGLOG(4, "Storing last literals of size: %u", lastLLSize);
ZSTD_storeLastLiterals(seqStore, ip, lastLLSize);
}
DEBUGLOG(4, "ZSTD_copySequencesToSeqStore: done");
return 0;
}
@ -4647,12 +4690,14 @@ size_t ZSTD_compressSequences_ext_internal(void* dst, size_t dstCapacity,
while (remaining) {
U32 cBlockSize;
int additionalByteAdjustment;
lastBlock = remaining <= cctx->blockSize;
blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize;
blockSeqStore = cctx->seqStore;
ZSTD_resetSeqStore(&blockSeqStore);
additionalByteAdjustment = ZSTD_updateSequenceRange(&seqRange, blockSize, inSeqs, inSeqsSize, format);
blockSize += additionalByteAdjustment;
DEBUGLOG(4, "Working on new block. Blocksize: %u", blockSize);
ZSTD_updateSequenceRange(&seqRange, blockSize, inSeqs, inSeqsSize);
/* Skip over uncompressible blocks */
if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
@ -4667,7 +4712,7 @@ size_t ZSTD_compressSequences_ext_internal(void* dst, size_t dstCapacity,
continue;
}
ZSTD_copySequencesToSeqStore(&blockSeqStore, &seqRange, inSeqs, inSeqsSize, ip, blockSize);
ZSTD_copySequencesToSeqStore(&blockSeqStore, &seqRange, inSeqs, inSeqsSize, ip, blockSize, format);
compressedSeqsSize = ZSTD_compressSequences(&blockSeqStore,
&cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
&cctx->appliedParams,
@ -4720,13 +4765,13 @@ size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity,
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
const void* src, size_t srcSize, int compressionLevel,
ZSTD_sequenceFormat_e format) {
DEBUGLOG(4, "ZSTD_compressSequences_ext()");
BYTE* op = (BYTE*)dst;
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
size_t cSize = 0;
size_t compressedBlocksSize = 0;
size_t frameHeaderSize = 0;
DEBUGLOG(4, "ZSTD_compressSequences_ext()");
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1);
/* Initialization stage */
@ -4758,7 +4803,7 @@ size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity,
ZSTDb_buffered) , "");
assert(cctx->appliedParams.nbWorkers == 0);
}
DEBUGLOG(4, "CCtx blockSize: %zu\n", cctx->blockSize);
DEBUGLOG(4, "CCtx blockSize: %zu", cctx->blockSize);
if (dstCapacity < ZSTD_compressBound(srcSize))
RETURN_ERROR(dstSize_tooSmall, "Destination buffer too small!");
@ -4767,9 +4812,6 @@ size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity,
dstCapacity -= frameHeaderSize;
cSize += frameHeaderSize;
if (cctx->appliedParams.ldmParams.enableLdm) {
ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
}
if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
XXH64_update(&cctx->xxhState, src, srcSize);
}
@ -4783,7 +4825,7 @@ size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity,
}
cSize += compressedBlocksSize;
dstCapacity -= compressedBlocksSize;
DEBUGLOG(4, "cSize after compressSequences_internal: %zu\n", cSize);
DEBUGLOG(4, "cSize after compressSequences_internal: %zu", cSize);
if (cctx->appliedParams.fParams.checksumFlag) {
U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
@ -4793,7 +4835,7 @@ size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity,
cSize += 4;
}
DEBUGLOG(4, "Final compressed size: %zu\n", cSize);
DEBUGLOG(4, "Final compressed size: %zu", cSize);
ZSTD_freeCCtx(cctx);
return cSize;
}