diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 2f4d5b4c..d19f4a2a 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2196,13 +2196,13 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ const seqDef* const sequences = seqStorePtr->sequencesStart; + const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; const BYTE* const ofCodeTable = seqStorePtr->ofCode; const BYTE* const llCodeTable = seqStorePtr->llCode; const BYTE* const mlCodeTable = seqStorePtr->mlCode; BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; - size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); BYTE* seqHead; BYTE* lastNCount = NULL; @@ -2472,6 +2472,16 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; +static U32 countLiteralsBytes2(const seqStore_t* seqStore) { + U32 literalsBytes = 0; + U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart; + for (int i = 0; i < nbSeqs; ++i) { + seqDef seq = seqStore->sequencesStart[i]; + literalsBytes += seq.litLength; + } + return literalsBytes; +} + static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) { ZSTD_matchState_t* const ms = &zc->blockState.matchState; @@ -2695,12 +2705,194 @@ static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) /* Writes the block header */ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) { + DEBUGLOG(3, "writeBlockHeader: cSize: %u blockSize: %u lastBlock: %u", cSize, blockSize, lastBlock); U32 const cBlockHeader = cSize == 1 ? lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); MEM_writeLE24(op, cBlockHeader); } +static U32 countLiteralsBytes(const seqStore_t* seqStore) { + U32 literalsBytes = 0; + U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart; + for (int i = 0; i < nbSeqs; ++i) { + seqDef seq = seqStore->sequencesStart[i]; + literalsBytes += seq.litLength; + if (i == seqStore->longLengthPos && seqStore->longLengthID == 1) { + literalsBytes += 0x10000; + } + } + return literalsBytes; +} + +static U32 countMatchBytes(const seqStore_t* seqStore) { + U32 matchBytes = 0; + U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart; + for (int i = 0; i < nbSeqs; ++i) { + seqDef seq = seqStore->sequencesStart[i]; + matchBytes += seq.matchLength + MINMATCH; + if (i == seqStore->longLengthPos && seqStore->longLengthID == 2) { + matchBytes += 0x10000; + } + } + return matchBytes; +} + +/* The issue is with setting the end of the literals. Existence of last literals in the seq store make it so that + we have to be careful with where we put our litEnds and whatnot. */ +static void setUpSeqStores(seqStore_t* firstSeqStore, seqStore_t* secondSeqStore, U32 nbSeq, U32 srcSize) { + size_t nbSeqFirstHalf = nbSeq/2; + size_t nbSeqSecondHalf = (nbSeq % 2 == 0) ? nbSeq/2 : nbSeq/2 + 1; + DEBUGLOG(2, "first half nbseq: %u second half nbseq: %u", nbSeqFirstHalf, nbSeqSecondHalf); + + const BYTE* const litEnd = firstSeqStore->lit; + const BYTE* const seqEnd = firstSeqStore->sequences; + + if (firstSeqStore->longLengthID != 0) { + DEBUGLOG(2, "long lenght ID present"); + if (firstSeqStore->longLengthPos < nbSeqFirstHalf) { + secondSeqStore->longLengthID = 0; + } else { + firstSeqStore->longLengthID = 0; + secondSeqStore->longLengthPos = secondSeqStore->longLengthPos - nbSeqFirstHalf; + } + } + + firstSeqStore->sequences = firstSeqStore->sequencesStart+nbSeqFirstHalf; + + U32 literalsBytesFirstHalf = countLiteralsBytes(firstSeqStore); + firstSeqStore->lit = firstSeqStore->litStart+literalsBytesFirstHalf; + + secondSeqStore->sequencesStart += nbSeqFirstHalf; + secondSeqStore->sequences = seqEnd; + secondSeqStore->litStart += literalsBytesFirstHalf; + secondSeqStore->lit = litEnd; + secondSeqStore->llCode += nbSeqFirstHalf; + secondSeqStore->mlCode += nbSeqFirstHalf; + secondSeqStore->ofCode += nbSeqFirstHalf; + +} + +static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 frame, U32 lastBlock, U32 nbSeq) { + /* This the upper bound for the length of an rle block. + * This isn't the actual upper bound. Finding the real threshold + * needs further investigation. + */ + const U32 rleMaxLength = 25; + size_t cSize; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + DEBUGLOG(5, "ZSTD_compressBlock_splitBlock (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, + (unsigned)zc->blockState.matchState.nextToUpdate); + + /* Attempt block splitting here */ + DEBUGLOG(3, "Block size pre-split is: %u - lastBlock: %u, dst ptr: %u op: %u", srcSize, lastBlock, dst, op); + DEBUGLOG(3, "srcSize: %u seq store size: %u", srcSize, countLiteralsBytes(&zc->seqStore) + countMatchBytes(&zc->seqStore)); + seqStore_t firstHalfSeqStore = zc->seqStore; + seqStore_t secondHalfSeqStore = zc->seqStore; + + setUpSeqStores(&firstHalfSeqStore, &secondHalfSeqStore, nbSeq, srcSize); + + assert((U32)(firstHalfSeqStore.lit - firstHalfSeqStore.litStart) + (U32)(secondHalfSeqStore.lit - secondHalfSeqStore.litStart) == (U32)(zc->seqStore.lit - zc->seqStore.litStart)); + assert((U32)(firstHalfSeqStore.sequences - firstHalfSeqStore.sequencesStart) + (U32)(secondHalfSeqStore.sequences - secondHalfSeqStore.sequencesStart) == (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart)); + + size_t cSizeFirstHalf; + size_t cSizeSecondHalf; + + size_t literalsBytesFirstHalf = countLiteralsBytes(&firstHalfSeqStore); + size_t srcBytesFirstHalf = literalsBytesFirstHalf + countMatchBytes(&firstHalfSeqStore); + size_t srcBytesSecondHalf = srcSize - srcBytesFirstHalf; + DEBUGLOG(3, "literals bytes first half: %zu literals bytes second half: %zu, orig: %zu", literalsBytesFirstHalf, countLiteralsBytes(&secondHalfSeqStore), countLiteralsBytes(&zc->seqStore)); + DEBUGLOG(3, "match bytes first half: %zu match bytes second half: %zu, orig: %zu", countMatchBytes(&firstHalfSeqStore), countMatchBytes(&secondHalfSeqStore), countMatchBytes(&zc->seqStore)); + DEBUGLOG(2, "Src bytes first half: %zu src bytes second half: %zu", srcBytesFirstHalf, srcBytesSecondHalf); + + { U32 cSeqsSizeFirstHalf = ZSTD_compressSequences(&firstHalfSeqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, + srcBytesFirstHalf, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + + if (!zc->isFirstBlock && + ZSTD_maybeRLE(&firstHalfSeqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) { + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + cSeqsSizeFirstHalf = 1; + } + + if (cSeqsSizeFirstHalf == 0) { + cSizeFirstHalf = ZSTD_noCompressBlock(op, dstCapacity, ip, srcBytesFirstHalf, 0); + FORWARD_IF_ERROR(cSizeFirstHalf, "Nocompress block failed"); + DEBUGLOG(2, "1: Writing out nocompress block, size: %zu", cSizeFirstHalf); + } else if (cSeqsSizeFirstHalf == 1) { + cSizeFirstHalf = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcBytesFirstHalf, 0); + FORWARD_IF_ERROR(cSizeFirstHalf, "RLE compress block failed"); + DEBUGLOG(2, "1: Writing out RLE block, size: %zu", cSizeFirstHalf); + } else { + U32 cBlockHeader; + /* Error checking and repcodes update */ + ZSTD_confirmRepcodesAndEntropyTables(zc); + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + writeBlockHeader(op, cSeqsSizeFirstHalf, srcBytesFirstHalf, 0); + cSizeFirstHalf = ZSTD_blockHeaderSize + cSeqsSizeFirstHalf; + DEBUGLOG(3, "1: Writing out compressed block, size: %zu", cSizeFirstHalf); + } + } + + { int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; + } + + ip += srcBytesFirstHalf; + op += cSizeFirstHalf; + dstCapacity - cSizeFirstHalf; + + { U32 cSeqsSizeSecondHalf = ZSTD_compressSequences(&secondHalfSeqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, + srcBytesSecondHalf, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + if (ZSTD_maybeRLE(&firstHalfSeqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) { + cSeqsSizeSecondHalf = 1; + } + + if (cSeqsSizeSecondHalf == 0) { + cSizeSecondHalf = ZSTD_noCompressBlock(op, dstCapacity, ip, srcBytesSecondHalf, lastBlock); + FORWARD_IF_ERROR(cSizeFirstHalf, "Nocompress block failed"); + DEBUGLOG(2, "2: Writing out nocompress block, size: %zu", cSizeSecondHalf); + } else if (cSeqsSizeSecondHalf == 1) { + cSizeSecondHalf = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcBytesSecondHalf, lastBlock); + FORWARD_IF_ERROR(cSizeFirstHalf, "RLE compress block failed"); + DEBUGLOG(2, "2: Writing out RLE block, size: %zu", cSizeSecondHalf); + } else { + U32 cBlockHeader; + /* Error checking and repcodes update */ + ZSTD_confirmRepcodesAndEntropyTables(zc); + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + writeBlockHeader(op, cSeqsSizeSecondHalf, srcBytesSecondHalf, lastBlock); + cSizeSecondHalf = ZSTD_blockHeaderSize + cSeqsSizeSecondHalf; + DEBUGLOG(3, "2: Writing out compressed block, size: %zu", cSizeSecondHalf); + } + } + + DEBUGLOG(2, "cSizeFirstHalf: %u cSizeSecondHalf: %u", cSizeFirstHalf, cSizeSecondHalf); + cSize = cSizeFirstHalf + cSizeSecondHalf; + return cSize; +} + static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 frame, U32 lastBlock) @@ -2711,8 +2903,9 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, */ const U32 rleMaxLength = 25; size_t cSize; + size_t nbSeq; const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)(dst + ZSTD_blockHeaderSize); + BYTE* op = (BYTE*)dst; DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate); @@ -2720,6 +2913,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } + nbSeq = (size_t)(zc->seqStore.sequences - zc->seqStore.sequencesStart); } if (zc->seqCollector.collectSequences) { @@ -2728,11 +2922,15 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, return 0; } + if (nbSeq >= 2) { + return ZSTD_compressBlock_splitBlock(zc, dst, dstCapacity, src, srcSize, frame, lastBlock, nbSeq); + } + /* encode sequences and literals */ cSize = ZSTD_entropyCompressSequences(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, &zc->appliedParams, - op, dstCapacity, + op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, srcSize, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); @@ -2770,7 +2968,7 @@ out: cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); } else { - writeBlockHeader(dst, cSize, srcSize, lastBlock); + writeBlockHeader(op, cSize, srcSize, lastBlock); cSize += ZSTD_blockHeaderSize; } return cSize; @@ -2936,7 +3134,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, assert(dstCapacity >= cSize); dstCapacity -= cSize; cctx->isFirstBlock = 0; - DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", + DEBUGLOG(2, "ZSTD_compress_frameChunk: adding a block of size %u", (unsigned)cSize); } } diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 15139501..9f753911 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -763,7 +763,7 @@ size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSiz static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - DEBUGLOG(5, "ZSTD_copyRawBlock"); + DEBUGLOG(2, "ZSTD_copyRawBlock: %u", srcSize); RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); if (dst == NULL) { if (srcSize == 0) return 0; @@ -847,6 +847,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, /* Loop on each block */ while (1) { + DEBUGLOG(2, "Remaining dstCap: %u", (size_t)(oend-op)); size_t decodedSize; blockProperties_t blockProperties; size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); @@ -875,8 +876,10 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, if (ZSTD_isError(decodedSize)) return decodedSize; if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, op, decodedSize); - if (decodedSize != 0) + if (decodedSize != 0) { + DEBUGLOG(2, "Decoded: %u", decodedSize); op += decodedSize; + } assert(ip != NULL); ip += cBlockSize; remainingSrcSize -= cBlockSize; @@ -1189,7 +1192,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c } FORWARD_IF_ERROR(rSize, ""); RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); - DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); + DEBUGLOG(2, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); dctx->decodedSize += rSize; if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize); dctx->previousDstEnd = (char*)dst + rSize; diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index b71bc20d..56e4b5ef 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -775,6 +775,9 @@ size_t ZSTD_execSequenceEnd(BYTE* op, /* bounds checks : careful of address space overflow in 32-bit mode */ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); + DEBUGLOG(2, "sequence length: %u", sequenceLength); + DEBUGLOG(2, "oLitEnd: %u iLitEnd: %u match: %u", oLitEnd, iLitEnd, match); + DEBUGLOG(2, "seq ll: %u, condition: %u", sequence.litLength, (size_t)(litLimit - *litPtr)); RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); assert(op < op + sequenceLength); assert(oLitEnd < op + sequenceLength); @@ -850,8 +853,13 @@ size_t ZSTD_execSequence(BYTE* op, op = oLitEnd; *litPtr = iLitEnd; /* update for next sequence */ + /* Copy Match */ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /*DEBUGLOG(2, "oLitEnd: %u, oMatchEnd: %u iLitEnd: %u matchPos: %u", oLitEnd, oMatchEnd, iLitEnd, match); + DEBUGLOG(2, "off: %u ml: %u ll: %u", sequence.offset, sequence.matchLength, sequence.litLength); + DEBUGLOG(2, "first condition: %u", (size_t)(oLitEnd - prefixStart)); + DEBUGLOG(2, "break condition: %u", (size_t)(oLitEnd - virtualStart));*/ /* offset beyond prefix -> go into extDict */ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); match = dictEnd + (match - prefixStart); @@ -1210,6 +1218,9 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, /* last literal segment */ { size_t const lastLLSize = litEnd - litPtr; + if (lastLLSize > (size_t)(oend-op)) { + DEBUGLOG(2, "too small lastll"); + } RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); if (op != NULL) { ZSTD_memcpy(op, litPtr, lastLLSize); @@ -1458,7 +1469,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, * (note: but it could be evaluated from current-lowLimit) */ ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); - DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); + DEBUGLOG(2, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");