diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 2f0736b2..7facbeff 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2265,6 +2265,77 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) return ZSTDbss_compress; } +static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) +{ + const seqStore_t* seqStore = ZSTD_getSeqStore(zc); + const seqDef* seqs = seqStore->sequencesStart; + size_t seqsSize = seqStore->sequences - seqs; + + ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; + size_t i; size_t position; int repIdx; + + assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); + for (i = 0, position = 0; i < seqsSize; ++i) { + outSeqs[i].offset = seqs[i].offset; + outSeqs[i].litLength = seqs[i].litLength; + outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH; + + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthID == 1) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthID == 2) { + outSeqs[i].matchLength += 0x10000; + } + } + + if (outSeqs[i].offset <= ZSTD_REP_NUM) { + outSeqs[i].rep = outSeqs[i].offset; + repIdx = (unsigned int)i - outSeqs[i].offset; + + if (outSeqs[i].litLength == 0) { + if (outSeqs[i].offset < 3) { + --repIdx; + } else { + repIdx = (unsigned int)i - 1; + } + ++outSeqs[i].rep; + } + assert(repIdx >= -3); + outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1]; + if (outSeqs[i].rep == 4) { + --outSeqs[i].offset; + } + } else { + outSeqs[i].offset -= ZSTD_REP_NUM; + } + + position += outSeqs[i].litLength; + outSeqs[i].matchPos = (unsigned int)position; + position += outSeqs[i].matchLength; + } + zc->seqCollector.seqIndex += seqsSize; +} + +size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize) +{ + const size_t dstCapacity = ZSTD_compressBound(srcSize); + void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem); + SeqCollector seqCollector; + + RETURN_ERROR_IF(dst == NULL, memory_allocation); + + seqCollector.collectSequences = 1; + seqCollector.seqStart = outSeqs; + seqCollector.seqIndex = 0; + seqCollector.maxSequences = outSeqsSize; + zc->seqCollector = seqCollector; + + ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); + ZSTD_free(dst, ZSTD_defaultCMem); + return zc->seqCollector.seqIndex; +} + /* Returns true if the given block is a RLE block */ static int ZSTD_isRLE(const BYTE *ip, size_t length) { size_t i; @@ -2296,6 +2367,11 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } } + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + return 0; + } + /* encode sequences and literals */ cSize = ZSTD_compressSequences(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, @@ -2360,7 +2436,6 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, } } - /*! ZSTD_compress_frameChunk() : * Compress a chunk of data into one or multiple blocks. * All blocks will be terminated, all input will be consumed. @@ -2405,7 +2480,6 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize, 1 /* frame */); FORWARD_IF_ERROR(cSize); - if (cSize == 0) { /* block is not compressible */ cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); FORWARD_IF_ERROR(cSize); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 1140945b..14036f87 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -193,6 +193,13 @@ typedef struct { size_t capacity; /* The capacity starting from `seq` pointer */ } rawSeqStore_t; +typedef struct { + int collectSequences; + ZSTD_Sequence* seqStart; + size_t seqIndex; + size_t maxSequences; +} SeqCollector; + struct ZSTD_CCtx_params_s { ZSTD_format_e format; ZSTD_compressionParameters cParams; @@ -240,6 +247,7 @@ struct ZSTD_CCtx_s { XXH64_state_t xxhState; ZSTD_customMem customMem; size_t staticSize; + SeqCollector seqCollector; int isFirstBlock; seqStore_t seqStore; /* sequences storage ptrs */ diff --git a/lib/zstd.h b/lib/zstd.h index 42d4188c..66784562 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1077,6 +1077,24 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params; +typedef struct { + unsigned int matchPos; /* Match pos in dst */ + /* If seqDef.offset > 3, then this is seqDef.offset - 3 + * If seqDef.offset < 3, then this is the corresponding repeat offset + * But if seqDef.offset < 3 and litLength == 0, this is the + * repeat offset before the corresponding repeat offset + * And if seqDef.offset == 3 and litLength == 0, this is the + * most recent repeat offset - 1 + */ + unsigned int offset; + unsigned int litLength; /* Literal length */ + unsigned int matchLength; /* Match length */ + /* 0 when seq not rep and seqDef.offset otherwise + * when litLength == 0 this will be <= 4, otherwise <= 3 like normal + */ + unsigned int rep; +} ZSTD_Sequence; + typedef struct { unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ @@ -1215,6 +1233,15 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS * or an error code (if srcSize is too small) */ ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); +/*! ZSTD_getSequences() : + * Extract sequences from the sequence store + * zc can be used to insert custom compression params. + * This function invokes ZSTD_compress2 + * @return : number of sequences extracted + */ +ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); + /*************************************** * Memory management diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 3baa9b07..1f46363a 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -304,6 +304,28 @@ static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part) #endif +static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize, BYTE* src, size_t size) +{ + size_t i; + size_t j; + for(i = 0; i < seqsSize - 1; ++i) { + assert(dst + seqs[i].litLength + seqs[i].matchLength < dst + size); + assert(src + seqs[i].litLength + seqs[i].matchLength < src + size); + + memcpy(dst, src, seqs[i].litLength); + dst += seqs[i].litLength; + src += seqs[i].litLength; + size -= seqs[i].litLength; + + for (j = 0; j < seqs[i].matchLength; ++j) + dst[j] = dst[j - seqs[i].offset]; + dst += seqs[i].matchLength; + src += seqs[i].matchLength; + size -= seqs[i].matchLength; + } + memcpy(dst, src, size); +} + /*============================================= * Unit tests =============================================*/ @@ -1960,6 +1982,33 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "OK \n"); } + DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences decode from sequences test : ", testNb++); + { + size_t srcSize = 100 KB; + BYTE* src = (BYTE*)CNBuffer; + BYTE* decoded = (BYTE*)compressedBuffer; + + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + ZSTD_Sequence* seqs = (ZSTD_Sequence*)malloc(srcSize * sizeof(ZSTD_Sequence)); + size_t seqsSize; + + if (seqs == NULL) goto _output_error; + assert(cctx != NULL); + + /* Populate src with random data */ + RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed); + + /* get the sequences */ + seqsSize = ZSTD_getSequences(cctx, seqs, srcSize, src, srcSize); + + /* "decode" and compare the sequences */ + FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize); + assert(!memcmp(CNBuffer, compressedBuffer, srcSize)); + + ZSTD_freeCCtx(cctx); + free(seqs); + } + /* Multiple blocks of zeros test */ #define LONGZEROSLENGTH 1000000 /* 1MB of zeros */ DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, LONGZEROSLENGTH); @@ -1972,7 +2021,6 @@ static int basicUnitTests(U32 const seed, double compressibility) if (r != LONGZEROSLENGTH) goto _output_error; } DISPLAYLEVEL(3, "OK \n"); - /* All zeroes test (test bug #137) */ #define ZEROESLENGTH 100 DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH);