found a few more places which were dependent on seqStore offcode sumtype numeric representation

This commit is contained in:
Yann Collet 2021-12-28 16:18:44 -08:00
parent de9f52e945
commit 8da414231d
5 changed files with 54 additions and 38 deletions

View File

@ -3434,11 +3434,13 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
/** /**
* Returns the raw offset represented by the combination of offCode, ll0, and repcode history. * Returns the raw offset represented by the combination of offCode, ll0, and repcode history.
* offCode must be an offCode representing a repcode, therefore in the range of [0, 2]. * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq().
*/ */
static U32 ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) { static U32
U32 const adjustedOffCode = offCode + ll0; ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0)
assert(offCode < ZSTD_REP_NUM); {
U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0; /* [ 0 - 3 ] */
assert(STORED_IS_REPCODE(offCode));
if (adjustedOffCode == ZSTD_REP_NUM) { if (adjustedOffCode == ZSTD_REP_NUM) {
/* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */ /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */
assert(rep[0] > 0); assert(rep[0] > 0);
@ -3466,9 +3468,9 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
for (; idx < nbSeq; ++idx) { for (; idx < nbSeq; ++idx) {
seqDef* const seq = seqStore->sequencesStart + idx; seqDef* const seq = seqStore->sequencesStart + idx;
U32 const ll0 = (seq->litLength == 0); U32 const ll0 = (seq->litLength == 0);
U32 const offCode = seq->offBase - 1; U32 const offCode = OFFBASE_TO_STORED(seq->offBase);
assert(seq->offBase > 0); assert(seq->offBase > 0);
if (offCode < ZSTD_REP_NUM) { if (STORED_IS_REPCODE(offCode)) {
U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0); U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0);
U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0); U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0);
/* Adjust simulated decompression repcode history if we come across a mismatch. Replace /* Adjust simulated decompression repcode history if we come across a mismatch. Replace
@ -3482,7 +3484,7 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
/* Compression repcode history is always updated with values directly from the unmodified seqStore. /* Compression repcode history is always updated with values directly from the unmodified seqStore.
* Decompression repcode history may use modified seq->offset value taken from compression repcode history. * Decompression repcode history may use modified seq->offset value taken from compression repcode history.
*/ */
ZSTD_updateRep(dRepcodes->rep, seq->offBase - 1, ll0); ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0);
ZSTD_updateRep(cRepcodes->rep, offCode, ll0); ZSTD_updateRep(cRepcodes->rep, offCode, ll0);
} }
} }
@ -3492,11 +3494,13 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
* *
* Returns the total size of that block (including header) or a ZSTD error code. * Returns the total size of that block (including header) or a ZSTD error code.
*/ */
static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore, static size_t
ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
repcodes_t* const dRep, repcodes_t* const cRep, repcodes_t* const dRep, repcodes_t* const cRep,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
U32 lastBlock, U32 isPartition) { U32 lastBlock, U32 isPartition)
{
const U32 rleMaxLength = 25; const U32 rleMaxLength = 25;
BYTE* op = (BYTE*)dst; BYTE* op = (BYTE*)dst;
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;
@ -3505,6 +3509,7 @@ static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const
/* In case of an RLE or raw block, the simulated decompression repcode history must be reset */ /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */
repcodes_t const dRepOriginal = *dRep; repcodes_t const dRepOriginal = *dRep;
DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock");
if (isPartition) if (isPartition)
ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart)); ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart));
@ -3577,8 +3582,10 @@ typedef struct {
* Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize
* maximum of 128 KB, this value is actually impossible to reach. * maximum of 128 KB, this value is actually impossible to reach.
*/ */
static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx, static void
ZSTD_CCtx* zc, const seqStore_t* origSeqStore) { ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
ZSTD_CCtx* zc, const seqStore_t* origSeqStore)
{
seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk; seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore; seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore; seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
@ -3633,8 +3640,10 @@ static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)
* *
* Returns combined size of all blocks (which includes headers), or a ZSTD error code. * Returns combined size of all blocks (which includes headers), or a ZSTD error code.
*/ */
static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, static size_t
const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) { ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq)
{
size_t cSize = 0; size_t cSize = 0;
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst; BYTE* op = (BYTE*)dst;
@ -3720,9 +3729,11 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s
return cSize; return cSize;
} }
static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, static size_t
ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, U32 lastBlock) { const void* src, size_t srcSize, U32 lastBlock)
{
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst; BYTE* op = (BYTE*)dst;
U32 nbSeq; U32 nbSeq;
@ -3748,7 +3759,8 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
return cSize; return cSize;
} }
static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, static size_t
ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize, U32 frame) const void* src, size_t srcSize, U32 frame)
{ {

View File

@ -588,6 +588,7 @@ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE con
#define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE) #define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */ #define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */
#define STORED_TO_OFFBASE(o) ((o)+1) #define STORED_TO_OFFBASE(o) ((o)+1)
#define OFFBASE_TO_STORED(o) ((o)-1)
/*! ZSTD_storeSeq() : /*! ZSTD_storeSeq() :
* Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t. * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
@ -608,7 +609,7 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
{ U32 const pos = (U32)((const BYTE*)literals - g_start); { U32 const pos = (U32)((const BYTE*)literals - g_start);
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
pos, (U32)litLength, (U32)matchLength, (U32)offCode); pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
} }
#endif #endif
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);

View File

@ -1539,8 +1539,9 @@ ZSTD_compressBlock_lazy_generic(
#endif #endif
while (ip < ilimit) { while (ip < ilimit) {
size_t matchLength=0; size_t matchLength=0;
size_t offcode=0; size_t offcode=STORE_REPCODE_1;
const BYTE* start=ip+1; const BYTE* start=ip+1;
DEBUGLOG(7, "search baseline (depth 0)");
/* check repCode */ /* check repCode */
if (isDxS) { if (isDxS) {
@ -1577,6 +1578,7 @@ ZSTD_compressBlock_lazy_generic(
/* let's try to find a better solution */ /* let's try to find a better solution */
if (depth>=1) if (depth>=1)
while (ip<ilimit) { while (ip<ilimit) {
DEBUGLOG(7, "search depth 1");
ip ++; ip ++;
if ( (dictMode == ZSTD_noDict) if ( (dictMode == ZSTD_noDict)
&& (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
@ -1584,7 +1586,7 @@ ZSTD_compressBlock_lazy_generic(
int const gain2 = (int)(mlRep * 3); int const gain2 = (int)(mlRep * 3);
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
if ((mlRep >= 4) && (gain2 > gain1)) if ((mlRep >= 4) && (gain2 > gain1))
matchLength = mlRep, offcode = 0, start = ip; matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
} }
if (isDxS) { if (isDxS) {
const U32 repIndex = (U32)(ip - base) - offset_1; const U32 repIndex = (U32)(ip - base) - offset_1;
@ -1598,12 +1600,12 @@ ZSTD_compressBlock_lazy_generic(
int const gain2 = (int)(mlRep * 3); int const gain2 = (int)(mlRep * 3);
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
if ((mlRep >= 4) && (gain2 > gain1)) if ((mlRep >= 4) && (gain2 > gain1))
matchLength = mlRep, offcode = 0, start = ip; matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
} }
} }
{ size_t offset2=999999999; { size_t offset2=999999999;
size_t const ml2 = searchMax(ms, ip, iend, &offset2); size_t const ml2 = searchMax(ms, ip, iend, &offset2);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4); int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4);
if ((ml2 >= 4) && (gain2 > gain1)) { if ((ml2 >= 4) && (gain2 > gain1)) {
matchLength = ml2, offcode = offset2, start = ip; matchLength = ml2, offcode = offset2, start = ip;
@ -1612,6 +1614,7 @@ ZSTD_compressBlock_lazy_generic(
/* let's find an even better one */ /* let's find an even better one */
if ((depth==2) && (ip<ilimit)) { if ((depth==2) && (ip<ilimit)) {
DEBUGLOG(7, "search depth 2");
ip ++; ip ++;
if ( (dictMode == ZSTD_noDict) if ( (dictMode == ZSTD_noDict)
&& (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
@ -1619,7 +1622,7 @@ ZSTD_compressBlock_lazy_generic(
int const gain2 = (int)(mlRep * 4); int const gain2 = (int)(mlRep * 4);
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
if ((mlRep >= 4) && (gain2 > gain1)) if ((mlRep >= 4) && (gain2 > gain1))
matchLength = mlRep, offcode = 0, start = ip; matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
} }
if (isDxS) { if (isDxS) {
const U32 repIndex = (U32)(ip - base) - offset_1; const U32 repIndex = (U32)(ip - base) - offset_1;
@ -1633,7 +1636,7 @@ ZSTD_compressBlock_lazy_generic(
int const gain2 = (int)(mlRep * 4); int const gain2 = (int)(mlRep * 4);
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
if ((mlRep >= 4) && (gain2 > gain1)) if ((mlRep >= 4) && (gain2 > gain1))
matchLength = mlRep, offcode = 0, start = ip; matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
} }
} }
{ size_t offset2=999999999; { size_t offset2=999999999;
@ -1652,7 +1655,7 @@ ZSTD_compressBlock_lazy_generic(
* notably if `value` is unsigned, resulting in a large positive `-value`. * notably if `value` is unsigned, resulting in a large positive `-value`.
*/ */
/* catch up */ /* catch up */
if (offcode) { if (STORED_IS_OFFSET(offcode)) {
if (dictMode == ZSTD_noDict) { if (dictMode == ZSTD_noDict) {
while ( ((start > anchor) & (start - STORED_OFFSET(offcode) > prefixLowest)) while ( ((start > anchor) & (start - STORED_OFFSET(offcode) > prefixLowest))
&& (start[-1] == (start-STORED_OFFSET(offcode))[-1]) ) /* only search for offset within prefix */ && (start[-1] == (start-STORED_OFFSET(offcode))[-1]) ) /* only search for offset within prefix */
@ -1902,7 +1905,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
#endif #endif
while (ip < ilimit) { while (ip < ilimit) {
size_t matchLength=0; size_t matchLength=0;
size_t offcode=0; size_t offcode=STORE_REPCODE_1;
const BYTE* start=ip+1; const BYTE* start=ip+1;
U32 curr = (U32)(ip-base); U32 curr = (U32)(ip-base);
@ -1952,7 +1955,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
int const gain2 = (int)(repLength * 3); int const gain2 = (int)(repLength * 3);
int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
if ((repLength >= 4) && (gain2 > gain1)) if ((repLength >= 4) && (gain2 > gain1))
matchLength = repLength, offcode = 0, start = ip; matchLength = repLength, offcode = STORE_REPCODE_1, start = ip;
} } } }
/* search match, depth 1 */ /* search match, depth 1 */
@ -1984,7 +1987,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
int const gain2 = (int)(repLength * 4); int const gain2 = (int)(repLength * 4);
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
if ((repLength >= 4) && (gain2 > gain1)) if ((repLength >= 4) && (gain2 > gain1))
matchLength = repLength, offcode = 0, start = ip; matchLength = repLength, offcode = STORE_REPCODE_1, start = ip;
} } } }
/* search match, depth 2 */ /* search match, depth 2 */
@ -2000,7 +2003,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
} }
/* catch up */ /* catch up */
if (offcode) { if (STORED_IS_OFFSET(offcode)) {
U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode)); U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode));
const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;

View File

@ -72,10 +72,10 @@ static size_t decodeSequences(void* dst, size_t nbSequences,
size_t literalsSize, const void* dict, size_t dictSize) { size_t literalsSize, const void* dict, size_t dictSize) {
const uint8_t* litPtr = literalsBuffer; const uint8_t* litPtr = literalsBuffer;
const uint8_t* const litBegin = literalsBuffer; const uint8_t* const litBegin = literalsBuffer;
const uint8_t* const litEnd = literalsBuffer + literalsSize; const uint8_t* const litEnd = litBegin + literalsSize;
const uint8_t* dictPtr = dict; const uint8_t* dictPtr = dict;
uint8_t* op = dst; uint8_t* op = dst;
const uint8_t* const oend = dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE; const uint8_t* const oend = (uint8_t*)dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE;
size_t generatedSrcBufferSize = 0; size_t generatedSrcBufferSize = 0;
size_t bytesWritten = 0; size_t bytesWritten = 0;
uint32_t lastLLSize; uint32_t lastLLSize;

View File

@ -123,7 +123,7 @@ FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *pro
size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1); size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1);
size_t const limit = MIN(srcSize - offset, remaining); size_t const limit = MIN(srcSize - offset, remaining);
size_t const toCopy = MIN(limit, remaining / (nbSamples - sample)); size_t const toCopy = MIN(limit, remaining / (nbSamples - sample));
memcpy(samples + pos, src + offset, toCopy); memcpy(samples + pos, (const char*)src + offset, toCopy);
pos += toCopy; pos += toCopy;
samplesSizes[sample] = toCopy; samplesSizes[sample] = toCopy;
} }