Add initial support for new ZSTD_Sequence mode
parent
e0f26afce9
commit
c86151f53c
|
@ -2148,14 +2148,14 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
|||
entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
|
||||
|
||||
DEBUGLOG(4, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq);
|
||||
DEBUGLOG(4, "First seqs:", nbSeq);
|
||||
/*DEBUGLOG(4, "First seqs:", nbSeq);
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
DEBUGLOG(4, "(of: %u ml: %u ll: %u)", seqStorePtr->sequencesStart[i].offset, seqStorePtr->sequencesStart[i].matchLength, seqStorePtr->sequencesStart[i].litLength);
|
||||
}
|
||||
DEBUGLOG(4, "Final seqs:", nbSeq);
|
||||
for (int i = 1; i < 5; ++i) {
|
||||
DEBUGLOG(4, "(of: %u ml: %u ll: %u)", seqStorePtr->sequencesStart[nbSeq-i].offset, seqStorePtr->sequencesStart[nbSeq-i].matchLength, seqStorePtr->sequencesStart[nbSeq-i].litLength);
|
||||
}
|
||||
}*/
|
||||
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
|
||||
assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
|
||||
|
||||
|
@ -4508,15 +4508,19 @@ typedef struct {
|
|||
U32 endPosInSequence; /* Position within sequence at index 'endIdx' where range ends */
|
||||
} ZSTD_sequenceRange;
|
||||
|
||||
|
||||
static void ZSTD_updateSequenceRange(ZSTD_sequenceRange* sequenceRange, size_t nbBytes,
|
||||
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize) {
|
||||
/* Returns the number of additional bytes consumed, after blockSize. Can be negative */
|
||||
static int ZSTD_updateSequenceRange(ZSTD_sequenceRange* sequenceRange, size_t blockSize,
|
||||
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
||||
ZSTD_sequenceFormat_e format) {
|
||||
U32 idx = sequenceRange->endIdx;
|
||||
U32 endPosInSequence = sequenceRange->endPosInSequence + nbBytes;
|
||||
DEBUGLOG(4, "ZSTD_updateSequenceRange: startidx %u startpos: %u endidx: %u endpos: %u",
|
||||
U32 endPosInSequence = sequenceRange->endPosInSequence + blockSize;
|
||||
int bytesAdjustment = 0;
|
||||
DEBUGLOG(4, "ZSTD_updateSequenceRange: %u bytes, startidx %u startpos: %u endidx: %u endpos: %u", blockSize,
|
||||
sequenceRange->startIdx, sequenceRange->startPosInSequence, sequenceRange->endIdx, sequenceRange->endPosInSequence);
|
||||
DEBUGLOG(4, "endPosInSequence begin val: %u", endPosInSequence);
|
||||
while (endPosInSequence && idx < inSeqsSize) {
|
||||
ZSTD_Sequence currSeq = inSeqs[idx];
|
||||
DEBUGLOG(4, "curr Seq: idx: %u ll: %u ml: %u, of: %u", idx, currSeq.litLength, currSeq.matchLength, currSeq.offset);
|
||||
if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
|
||||
endPosInSequence -= currSeq.litLength + currSeq.matchLength;
|
||||
idx++;
|
||||
|
@ -4525,12 +4529,31 @@ static void ZSTD_updateSequenceRange(ZSTD_sequenceRange* sequenceRange, size_t n
|
|||
}
|
||||
}
|
||||
|
||||
if (idx == inSeqsSize) {
|
||||
endPosInSequence = 0;
|
||||
}
|
||||
if (endPosInSequence == inSeqs[idx].litLength + inSeqs[idx].matchLength) {
|
||||
endPosInSequence = 0;
|
||||
idx++;
|
||||
if (format == ZSTD_sf_noBlockDelimiters) {
|
||||
assert(endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
|
||||
DEBUGLOG(4, "idx: %u", idx);
|
||||
if (idx != inSeqsSize && endPosInSequence > inSeqs[idx].litLength) {
|
||||
DEBUGLOG(4, "Endpos is in the match");
|
||||
if (inSeqs[idx].matchLength >= blockSize) {
|
||||
/* Only split the match if it's too large */
|
||||
U32 firstHalfMatchLength = endPosInSequence - inSeqs[idx].litLength;
|
||||
U32 secondHalfMatchLength = inSeqs[idx].litLength - firstHalfMatchLength;
|
||||
assert(firstHalfMatchLength >= MINMATCH || secondHalfMatchLength >= MINMATCH);
|
||||
if (firstHalfMatchLength < MINMATCH && firstHalfMatchLength != 0) {
|
||||
/* Move the endPos forward so that it creates match of at least MINMATCH length */
|
||||
endPosInSequence = MINMATCH + inSeqs[idx].litLength;
|
||||
bytesAdjustment = MINMATCH - firstHalfMatchLength;
|
||||
} else if (secondHalfMatchLength < MINMATCH && secondHalfMatchLength != 0) {
|
||||
/* Move the endPos backward so that it creates match of at least MINMATCH length */
|
||||
endPosInSequence -= MINMATCH - secondHalfMatchLength;
|
||||
bytesAdjustment = secondHalfMatchLength - MINMATCH;
|
||||
}
|
||||
} else {
|
||||
/* If we can, prefer to simply move endPos to the end of the literals */
|
||||
bytesAdjustment = (int)inSeqs[idx].litLength - (int)endPosInSequence;
|
||||
endPosInSequence = inSeqs[idx].litLength;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sequenceRange->startIdx = sequenceRange->endIdx;
|
||||
|
@ -4541,12 +4564,14 @@ static void ZSTD_updateSequenceRange(ZSTD_sequenceRange* sequenceRange, size_t n
|
|||
DEBUGLOG(4, "endidx: (of: %u ml: %u ll: %u)", inSeqs[sequenceRange->endIdx].offset, inSeqs[sequenceRange->endIdx].matchLength, inSeqs[sequenceRange->endIdx].litLength);
|
||||
DEBUGLOG(4, "finished update: startidx %u startpos: %u endidx: %u endpos: %u",
|
||||
sequenceRange->startIdx, sequenceRange->startPosInSequence, sequenceRange->endIdx, sequenceRange->endPosInSequence);
|
||||
DEBUGLOG(4, "final PIS was adjusted by: %d bytes", bytesAdjustment);
|
||||
return bytesAdjustment;
|
||||
}
|
||||
|
||||
/* Returns size of sequences range copied, otherwise ZSTD error code */
|
||||
static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, const ZSTD_sequenceRange* seqRange,
|
||||
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
||||
const void* src, size_t srcSize) {
|
||||
const void* src, size_t srcSize, ZSTD_sequenceFormat_e format) {
|
||||
DEBUGLOG(4, "ZSTD_copySequencesToSeqStore: numSeqs: %zu srcSize: %zu", inSeqsSize, srcSize);
|
||||
size_t idx = seqRange->startIdx;
|
||||
BYTE const* istart = (BYTE const*)src;
|
||||
|
@ -4559,26 +4584,34 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, const ZSTD_sequ
|
|||
U32 offCode = inSeqs[idx].offset + ZSTD_REP_MOVE;
|
||||
|
||||
/* Adjust litLength and matchLength for the sequence at startIdx */
|
||||
if (idx == seqRange->startIdx) {
|
||||
if (seqRange->startIdx == seqRange->endIdx) {
|
||||
U32 seqLength = seqRange->endPosInSequence - seqRange->startPosInSequence;
|
||||
RETURN_ERROR_IF(seqLength > litLength + matchLength, corruption_detected, "SeqLength cannot be bigger than sequence length!");
|
||||
if (seqLength <= litLength) {
|
||||
/* Sequence is entirely literals, break and store last literals */
|
||||
break;
|
||||
} else if (seqLength <= litLength + matchLength) {
|
||||
matchLength = seqLength - litLength;
|
||||
}
|
||||
} else if (idx == seqRange->startIdx) {
|
||||
U32 posInSequence = seqRange->startPosInSequence;
|
||||
DEBUGLOG(4, "At startIdx: idx: %u PIS: %u", idx, posInSequence);
|
||||
assert(posInSequence <= litLength + matchLength);
|
||||
if (posInSequence >= litLength) {
|
||||
litLength = 0;
|
||||
/* position is within the match */
|
||||
posInSequence -= litLength;
|
||||
litLength = 0;
|
||||
matchLength -= posInSequence;
|
||||
} else {
|
||||
/* position is within the literals */
|
||||
litLength -= posInSequence;
|
||||
}
|
||||
matchLength -= posInSequence;
|
||||
if (matchLength <= MINMATCH && offCode != ZSTD_REP_MOVE /* dont trigger this in lastLL case */) {
|
||||
DEBUGLOG(4, "start idx: %zu, seq: (ll: %u, ml: %u, of: %u)", idx, litLength, matchLength, offCode);
|
||||
RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small! Start Idx");
|
||||
}
|
||||
DEBUGLOG(4, "startIdx seq finalized: ll: %u ml: %u", litLength, matchLength);
|
||||
}
|
||||
|
||||
/* Adjust litLength and matchLength for the sequence at endIdx */
|
||||
if (idx == seqRange->endIdx) {
|
||||
} else if (idx == seqRange->endIdx) {
|
||||
U32 posInSequence = seqRange->endPosInSequence;
|
||||
DEBUGLOG(4, "Reached endIdx. idx: %u PIS: %u", idx, posInSequence);
|
||||
if (posInSequence == 0) {
|
||||
|
@ -4586,21 +4619,24 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, const ZSTD_sequ
|
|||
corruption_detected, "Contract violation");
|
||||
}
|
||||
assert(posInSequence <= litLength + matchLength);
|
||||
if (posInSequence < litLength) {
|
||||
litLength = posInSequence;
|
||||
matchLength = 0;
|
||||
if (posInSequence <= litLength) {
|
||||
/* position is within the last literals, break and store last literals */
|
||||
break;
|
||||
} else {
|
||||
/* position is within the match */
|
||||
matchLength = posInSequence - litLength;
|
||||
/* ZSTD_updateSequenceRange should never give us a position such that we generate a match too small */
|
||||
if (matchLength <= MINMATCH && offCode != ZSTD_REP_MOVE) {
|
||||
DEBUGLOG(4, "start idx: %zu, seq: (ll: %u, ml: %u, of: %u)", idx, litLength, matchLength, offCode);
|
||||
RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small! Start Idx");
|
||||
}
|
||||
}
|
||||
DEBUGLOG(4, "final ll:%u ml: %u", litLength, matchLength);
|
||||
DEBUGLOG(4, "endIdx seq finalized: ll:%u ml: %u", litLength, matchLength);
|
||||
}
|
||||
|
||||
/* ML == 0 and Offset == 0 (or offCode == ZSTD_REP_MOVE) implies we are ending a block */
|
||||
/* ML == 0 and Offset == 0 (or offCode == ZSTD_REP_MOVE) signals block delimiters */
|
||||
if (matchLength == 0 && offCode == ZSTD_REP_MOVE) {
|
||||
RETURN_ERROR_IF(format == ZSTD_sf_noBlockDelimiters, corruption_detected, "No block delimiters allowed!");
|
||||
/* Handle last literals case */
|
||||
if (litLength > 0) {
|
||||
DEBUGLOG(4, "Storing last literals: %u bytes, idx: %u", litLength, idx);
|
||||
|
@ -4610,7 +4646,7 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, const ZSTD_sequ
|
|||
continue;
|
||||
}
|
||||
|
||||
//DEBUGLOG(4, "Storing in actual seqStore idx: %zu, seq: (ll: %u, ml: %u, of: %u), rep: %u", idx, litLength, matchLength - MINMATCH, offCode, inSeqs[idx].rep);
|
||||
DEBUGLOG(4, "Storing in actual seqStore idx: %zu, seq: (ll: %u, ml: %u, of: %u), rep: %u", idx, litLength, matchLength - MINMATCH, offCode, inSeqs[idx].rep);
|
||||
RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small! of: %u ml: %u ll: %u", offCode, matchLength, litLength);
|
||||
if (inSeqs[idx].rep) {
|
||||
ZSTD_storeSeq(seqStore, litLength, ip, iend, inSeqs[idx].rep - 1, matchLength - MINMATCH);
|
||||
|
@ -4620,6 +4656,13 @@ static size_t ZSTD_copySequencesToSeqStore(seqStore_t* seqStore, const ZSTD_sequ
|
|||
ip += matchLength + litLength;
|
||||
}
|
||||
|
||||
if (format == ZSTD_sf_noBlockDelimiters && ip != iend) {
|
||||
assert(ip <= iend);
|
||||
U32 lastLLSize = (U32)(iend - ip);
|
||||
DEBUGLOG(4, "Storing last literals of size: %u", lastLLSize);
|
||||
ZSTD_storeLastLiterals(seqStore, ip, lastLLSize);
|
||||
}
|
||||
|
||||
DEBUGLOG(4, "ZSTD_copySequencesToSeqStore: done");
|
||||
return 0;
|
||||
}
|
||||
|
@ -4647,12 +4690,14 @@ size_t ZSTD_compressSequences_ext_internal(void* dst, size_t dstCapacity,
|
|||
|
||||
while (remaining) {
|
||||
U32 cBlockSize;
|
||||
int additionalByteAdjustment;
|
||||
lastBlock = remaining <= cctx->blockSize;
|
||||
blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize;
|
||||
blockSeqStore = cctx->seqStore;
|
||||
ZSTD_resetSeqStore(&blockSeqStore);
|
||||
additionalByteAdjustment = ZSTD_updateSequenceRange(&seqRange, blockSize, inSeqs, inSeqsSize, format);
|
||||
blockSize += additionalByteAdjustment;
|
||||
DEBUGLOG(4, "Working on new block. Blocksize: %u", blockSize);
|
||||
ZSTD_updateSequenceRange(&seqRange, blockSize, inSeqs, inSeqsSize);
|
||||
|
||||
/* Skip over uncompressible blocks */
|
||||
if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
|
||||
|
@ -4667,7 +4712,7 @@ size_t ZSTD_compressSequences_ext_internal(void* dst, size_t dstCapacity,
|
|||
continue;
|
||||
}
|
||||
|
||||
ZSTD_copySequencesToSeqStore(&blockSeqStore, &seqRange, inSeqs, inSeqsSize, ip, blockSize);
|
||||
ZSTD_copySequencesToSeqStore(&blockSeqStore, &seqRange, inSeqs, inSeqsSize, ip, blockSize, format);
|
||||
compressedSeqsSize = ZSTD_compressSequences(&blockSeqStore,
|
||||
&cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
|
||||
&cctx->appliedParams,
|
||||
|
@ -4720,13 +4765,13 @@ size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity,
|
|||
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
|
||||
const void* src, size_t srcSize, int compressionLevel,
|
||||
ZSTD_sequenceFormat_e format) {
|
||||
DEBUGLOG(4, "ZSTD_compressSequences_ext()");
|
||||
BYTE* op = (BYTE*)dst;
|
||||
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
size_t cSize = 0;
|
||||
size_t compressedBlocksSize = 0;
|
||||
size_t frameHeaderSize = 0;
|
||||
|
||||
DEBUGLOG(4, "ZSTD_compressSequences_ext()");
|
||||
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
|
||||
ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1);
|
||||
/* Initialization stage */
|
||||
|
@ -4758,7 +4803,7 @@ size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity,
|
|||
ZSTDb_buffered) , "");
|
||||
assert(cctx->appliedParams.nbWorkers == 0);
|
||||
}
|
||||
DEBUGLOG(4, "CCtx blockSize: %zu\n", cctx->blockSize);
|
||||
DEBUGLOG(4, "CCtx blockSize: %zu", cctx->blockSize);
|
||||
if (dstCapacity < ZSTD_compressBound(srcSize))
|
||||
RETURN_ERROR(dstSize_tooSmall, "Destination buffer too small!");
|
||||
|
||||
|
@ -4767,9 +4812,6 @@ size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity,
|
|||
dstCapacity -= frameHeaderSize;
|
||||
cSize += frameHeaderSize;
|
||||
|
||||
if (cctx->appliedParams.ldmParams.enableLdm) {
|
||||
ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
|
||||
}
|
||||
if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
|
||||
XXH64_update(&cctx->xxhState, src, srcSize);
|
||||
}
|
||||
|
@ -4783,7 +4825,7 @@ size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity,
|
|||
}
|
||||
cSize += compressedBlocksSize;
|
||||
dstCapacity -= compressedBlocksSize;
|
||||
DEBUGLOG(4, "cSize after compressSequences_internal: %zu\n", cSize);
|
||||
DEBUGLOG(4, "cSize after compressSequences_internal: %zu", cSize);
|
||||
|
||||
if (cctx->appliedParams.fParams.checksumFlag) {
|
||||
U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
|
||||
|
@ -4793,7 +4835,7 @@ size_t ZSTD_compressSequences_ext(void* dst, size_t dstCapacity,
|
|||
cSize += 4;
|
||||
}
|
||||
|
||||
DEBUGLOG(4, "Final compressed size: %zu\n", cSize);
|
||||
DEBUGLOG(4, "Final compressed size: %zu", cSize);
|
||||
ZSTD_freeCCtx(cctx);
|
||||
return cSize;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue