diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 0b1543f7..f06456af 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3434,11 +3434,13 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, /** * Returns the raw offset represented by the combination of offCode, ll0, and repcode history. - * offCode must be an offCode representing a repcode, therefore in the range of [0, 2]. + * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq(). */ -static U32 ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) { - U32 const adjustedOffCode = offCode + ll0; - assert(offCode < ZSTD_REP_NUM); +static U32 +ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) +{ + U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0; /* [ 0 - 3 ] */ + assert(STORED_IS_REPCODE(offCode)); if (adjustedOffCode == ZSTD_REP_NUM) { /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */ assert(rep[0] > 0); @@ -3466,9 +3468,9 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_ for (; idx < nbSeq; ++idx) { seqDef* const seq = seqStore->sequencesStart + idx; U32 const ll0 = (seq->litLength == 0); - U32 const offCode = seq->offBase - 1; + U32 const offCode = OFFBASE_TO_STORED(seq->offBase); assert(seq->offBase > 0); - if (offCode < ZSTD_REP_NUM) { + if (STORED_IS_REPCODE(offCode)) { U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0); U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0); /* Adjust simulated decompression repcode history if we come across a mismatch. Replace @@ -3482,7 +3484,7 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_ /* Compression repcode history is always updated with values directly from the unmodified seqStore. * Decompression repcode history may use modified seq->offset value taken from compression repcode history. */ - ZSTD_updateRep(dRepcodes->rep, seq->offBase - 1, ll0); + ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0); ZSTD_updateRep(cRepcodes->rep, offCode, ll0); } } @@ -3492,11 +3494,13 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_ * * Returns the total size of that block (including header) or a ZSTD error code. */ -static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore, - repcodes_t* const dRep, repcodes_t* const cRep, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - U32 lastBlock, U32 isPartition) { +static size_t +ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore, + repcodes_t* const dRep, repcodes_t* const cRep, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock, U32 isPartition) +{ const U32 rleMaxLength = 25; BYTE* op = (BYTE*)dst; const BYTE* ip = (const BYTE*)src; @@ -3505,6 +3509,7 @@ static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */ repcodes_t const dRepOriginal = *dRep; + DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock"); if (isPartition) ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart)); @@ -3577,8 +3582,10 @@ typedef struct { * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize * maximum of 128 KB, this value is actually impossible to reach. */ -static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx, - ZSTD_CCtx* zc, const seqStore_t* origSeqStore) { +static void +ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx, + ZSTD_CCtx* zc, const seqStore_t* origSeqStore) +{ seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk; seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore; seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore; @@ -3633,8 +3640,10 @@ static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) * * Returns combined size of all blocks (which includes headers), or a ZSTD error code. */ -static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, - const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) { +static size_t +ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, + const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) +{ size_t cSize = 0; const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; @@ -3720,9 +3729,11 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s return cSize; } -static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, U32 lastBlock) { +static size_t +ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 lastBlock) +{ const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; U32 nbSeq; @@ -3748,9 +3759,10 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, return cSize; } -static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, U32 frame) +static size_t +ZSTD_compressBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 frame) { /* This the upper bound for the length of an rle block. * This isn't the actual upper bound. Finding the real threshold diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 2d55ddd7..22579ca0 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -588,6 +588,7 @@ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE con #define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE) #define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */ #define STORED_TO_OFFBASE(o) ((o)+1) +#define OFFBASE_TO_STORED(o) ((o)-1) /*! ZSTD_storeSeq() : * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t. @@ -608,7 +609,7 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr, if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ { U32 const pos = (U32)((const BYTE*)literals - g_start); DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", - pos, (U32)litLength, (U32)matchLength, (U32)offCode); + pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1); } #endif assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 72bbe719..2e38dcb4 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -1539,8 +1539,9 @@ ZSTD_compressBlock_lazy_generic( #endif while (ip < ilimit) { size_t matchLength=0; - size_t offcode=0; + size_t offcode=STORE_REPCODE_1; const BYTE* start=ip+1; + DEBUGLOG(7, "search baseline (depth 0)"); /* check repCode */ if (isDxS) { @@ -1577,6 +1578,7 @@ ZSTD_compressBlock_lazy_generic( /* let's try to find a better solution */ if (depth>=1) while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { @@ -1584,7 +1586,7 @@ ZSTD_compressBlock_lazy_generic( int const gain2 = (int)(mlRep * 3); int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offcode = 0, start = ip; + matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; } if (isDxS) { const U32 repIndex = (U32)(ip - base) - offset_1; @@ -1598,12 +1600,12 @@ ZSTD_compressBlock_lazy_generic( int const gain2 = (int)(mlRep * 3); int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offcode = 0, start = ip; + matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; } } { size_t offset2=999999999; size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4); if ((ml2 >= 4) && (gain2 > gain1)) { matchLength = ml2, offcode = offset2, start = ip; @@ -1612,6 +1614,7 @@ ZSTD_compressBlock_lazy_generic( /* let's find an even better one */ if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { @@ -1619,7 +1622,7 @@ ZSTD_compressBlock_lazy_generic( int const gain2 = (int)(mlRep * 4); int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offcode = 0, start = ip; + matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; } if (isDxS) { const U32 repIndex = (U32)(ip - base) - offset_1; @@ -1633,7 +1636,7 @@ ZSTD_compressBlock_lazy_generic( int const gain2 = (int)(mlRep * 4); int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offcode = 0, start = ip; + matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; } } { size_t offset2=999999999; @@ -1648,11 +1651,11 @@ ZSTD_compressBlock_lazy_generic( } /* NOTE: - * Pay attention that `start[-value]` can lead to strange undefined behavior + * Pay attention that `start[-value]` can lead to strange undefined behavior * notably if `value` is unsigned, resulting in a large positive `-value`. */ /* catch up */ - if (offcode) { + if (STORED_IS_OFFSET(offcode)) { if (dictMode == ZSTD_noDict) { while ( ((start > anchor) & (start - STORED_OFFSET(offcode) > prefixLowest)) && (start[-1] == (start-STORED_OFFSET(offcode))[-1]) ) /* only search for offset within prefix */ @@ -1902,7 +1905,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( #endif while (ip < ilimit) { size_t matchLength=0; - size_t offcode=0; + size_t offcode=STORE_REPCODE_1; const BYTE* start=ip+1; U32 curr = (U32)(ip-base); @@ -1952,7 +1955,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( int const gain2 = (int)(repLength * 3); int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((repLength >= 4) && (gain2 > gain1)) - matchLength = repLength, offcode = 0, start = ip; + matchLength = repLength, offcode = STORE_REPCODE_1, start = ip; } } /* search match, depth 1 */ @@ -1984,7 +1987,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( int const gain2 = (int)(repLength * 4); int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); if ((repLength >= 4) && (gain2 > gain1)) - matchLength = repLength, offcode = 0, start = ip; + matchLength = repLength, offcode = STORE_REPCODE_1, start = ip; } } /* search match, depth 2 */ @@ -2000,7 +2003,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( } /* catch up */ - if (offcode) { + if (STORED_IS_OFFSET(offcode)) { U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode)); const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c index cc840bf8..a2959e1a 100644 --- a/tests/fuzz/sequence_compression_api.c +++ b/tests/fuzz/sequence_compression_api.c @@ -72,10 +72,10 @@ static size_t decodeSequences(void* dst, size_t nbSequences, size_t literalsSize, const void* dict, size_t dictSize) { const uint8_t* litPtr = literalsBuffer; const uint8_t* const litBegin = literalsBuffer; - const uint8_t* const litEnd = literalsBuffer + literalsSize; + const uint8_t* const litEnd = litBegin + literalsSize; const uint8_t* dictPtr = dict; uint8_t* op = dst; - const uint8_t* const oend = dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE; + const uint8_t* const oend = (uint8_t*)dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE; size_t generatedSrcBufferSize = 0; size_t bytesWritten = 0; uint32_t lastLLSize; diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index 0fbf3bed..f6657975 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -123,7 +123,7 @@ FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *pro size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1); size_t const limit = MIN(srcSize - offset, remaining); size_t const toCopy = MIN(limit, remaining / (nbSamples - sample)); - memcpy(samples + pos, src + offset, toCopy); + memcpy(samples + pos, (const char*)src + offset, toCopy); pos += toCopy; samplesSizes[sample] = toCopy; }