From ece465644be7c45449bc4c06170afafa53d37119 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 29 Aug 2019 12:29:39 -0700 Subject: [PATCH 01/21] Adding api for extracting sequences from seqstore --- lib/compress/zstd_compress.c | 71 ++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index cd73db13..73f5cbff 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2190,6 +2190,77 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) ssPtr->longLengthID = 0; } +typedef struct { + U32 matchPos; + U32 offset; + U32 litLength; + U32 matchLength; + int rep; +} Sequence; + +static size_t ZSTD_getSequencesForOneBlock(ZSTD_CCtx* zc, ZSTD_CDict* cdict, + void* dst, size_t dstSize, + const void* src, size_t srcSize, + Sequence* outSeqs, size_t outSeqsSize) +{ + const seqStore_t* seqStore; + const seqDef* seqs; + size_t seqsSize; + + int i; int repIdx; U32 position; + + size_t blockSize = ZSTD_getBlockSize(zc); + size_t maxOutput = ZSTD_compressBound(blockSize); + + ASSERT(!ZSTD_isError(ZSTD_compressBegin_usingCDict(zc, cdict))); + ASSERT(dstSize >= maxOutput); dstSize = maxOutput; + ASSERT(srcSize >= blockSize); srcSize = blockSize; + ASSERT(!ZSTD_isError(ZSTD_compressBlock(zc, dst, dstSize, src, srcSize))); + + seqStore = ZSTD_getSeqStore(zc); + seqs = seqStore->sequencesStart; + seqsSize = seqStore->sequences - seqStore->sequencesStart; + + ASSERT(outSeqsSize >= seqsSize); outSeqsSize = seqsSize; + + for (i = 0, position = 0; i < seqsSize; ++i) { + outSeqs[i].offset = seqs[i].offset; + outSeqs[i].litLength = seqs[i].litLength; + outSeqs[i].matchLength = seqs[i].matchLength + 3 /* min match */; + + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthID == 1) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthID == 2) { + outSeqs[i].matchLength += 0x10000; + } + } + + if (outSeqs[i].offset <= 3 /* num reps */) { + outSeqs[i].rep = 1; + repIdx = i - outSeqs[i].offset; + + if (repIdx >= 0) { + outSeqs[i].offset = outSeqs[repIdx].offset; + } + + if (repIdx == -1) { + outSeqs[i].offset = 1; + } else if (repIdx == -2) { + outSeqs[i].offset = 4; + } else if (repIdx == -3) { + outSeqs[i].offset = 8; + } + } else { + outSeqs[i].offset -= 3 /* num reps */; + } + + position += outSeqs[i].litLength; + outSeqs[i].matchPos = position; + position += outSeqs[i].matchLength; + } +} + typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) From 623b90f85d94f57ebfa0c7ae61e25dd489f27ecb Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 29 Aug 2019 13:09:42 -0700 Subject: [PATCH 02/21] Fixing ci-circle test complaints --- lib/compress/zstd_compress.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 73f5cbff..f0b6136b 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2207,21 +2207,21 @@ static size_t ZSTD_getSequencesForOneBlock(ZSTD_CCtx* zc, ZSTD_CDict* cdict, const seqDef* seqs; size_t seqsSize; - int i; int repIdx; U32 position; + size_t i; int repIdx; size_t position; size_t blockSize = ZSTD_getBlockSize(zc); size_t maxOutput = ZSTD_compressBound(blockSize); - ASSERT(!ZSTD_isError(ZSTD_compressBegin_usingCDict(zc, cdict))); - ASSERT(dstSize >= maxOutput); dstSize = maxOutput; - ASSERT(srcSize >= blockSize); srcSize = blockSize; - ASSERT(!ZSTD_isError(ZSTD_compressBlock(zc, dst, dstSize, src, srcSize))); + assert(!ZSTD_isError(ZSTD_compressBegin_usingCDict(zc, cdict))); + assert(dstSize >= maxOutput); dstSize = maxOutput; + assert(srcSize >= blockSize); srcSize = blockSize; + assert(!ZSTD_isError(ZSTD_compressBlock(zc, dst, dstSize, src, srcSize))); seqStore = ZSTD_getSeqStore(zc); seqs = seqStore->sequencesStart; seqsSize = seqStore->sequences - seqStore->sequencesStart; - ASSERT(outSeqsSize >= seqsSize); outSeqsSize = seqsSize; + assert(outSeqsSize >= seqsSize); outSeqsSize = seqsSize; for (i = 0, position = 0; i < seqsSize; ++i) { outSeqs[i].offset = seqs[i].offset; From 5f8b0f6890e03050a3840ed996f95a5e8263c1ea Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Fri, 30 Aug 2019 09:18:44 -0700 Subject: [PATCH 03/21] Changing api to get sequences across all blocks --- lib/compress/zstd_compress.c | 150 ++++++++++++++------------ lib/compress/zstd_compress_internal.h | 8 ++ lib/zstd.h | 11 ++ tests/fuzzer.c | 5 + 4 files changed, 103 insertions(+), 71 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f0b6136b..f8588b34 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -13,6 +13,7 @@ ***************************************/ #include /* INT_MAX */ #include /* memset */ +#include #include "cpu.h" #include "mem.h" #include "hist.h" /* HIST_countFast_wksp */ @@ -2190,77 +2191,6 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) ssPtr->longLengthID = 0; } -typedef struct { - U32 matchPos; - U32 offset; - U32 litLength; - U32 matchLength; - int rep; -} Sequence; - -static size_t ZSTD_getSequencesForOneBlock(ZSTD_CCtx* zc, ZSTD_CDict* cdict, - void* dst, size_t dstSize, - const void* src, size_t srcSize, - Sequence* outSeqs, size_t outSeqsSize) -{ - const seqStore_t* seqStore; - const seqDef* seqs; - size_t seqsSize; - - size_t i; int repIdx; size_t position; - - size_t blockSize = ZSTD_getBlockSize(zc); - size_t maxOutput = ZSTD_compressBound(blockSize); - - assert(!ZSTD_isError(ZSTD_compressBegin_usingCDict(zc, cdict))); - assert(dstSize >= maxOutput); dstSize = maxOutput; - assert(srcSize >= blockSize); srcSize = blockSize; - assert(!ZSTD_isError(ZSTD_compressBlock(zc, dst, dstSize, src, srcSize))); - - seqStore = ZSTD_getSeqStore(zc); - seqs = seqStore->sequencesStart; - seqsSize = seqStore->sequences - seqStore->sequencesStart; - - assert(outSeqsSize >= seqsSize); outSeqsSize = seqsSize; - - for (i = 0, position = 0; i < seqsSize; ++i) { - outSeqs[i].offset = seqs[i].offset; - outSeqs[i].litLength = seqs[i].litLength; - outSeqs[i].matchLength = seqs[i].matchLength + 3 /* min match */; - - if (i == seqStore->longLengthPos) { - if (seqStore->longLengthID == 1) { - outSeqs[i].litLength += 0x10000; - } else if (seqStore->longLengthID == 2) { - outSeqs[i].matchLength += 0x10000; - } - } - - if (outSeqs[i].offset <= 3 /* num reps */) { - outSeqs[i].rep = 1; - repIdx = i - outSeqs[i].offset; - - if (repIdx >= 0) { - outSeqs[i].offset = outSeqs[repIdx].offset; - } - - if (repIdx == -1) { - outSeqs[i].offset = 1; - } else if (repIdx == -2) { - outSeqs[i].offset = 4; - } else if (repIdx == -3) { - outSeqs[i].offset = 8; - } - } else { - outSeqs[i].offset -= 3 /* num reps */; - } - - position += outSeqs[i].litLength; - outSeqs[i].matchPos = position; - position += outSeqs[i].matchLength; - } -} - typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) @@ -2394,6 +2324,81 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params } } +static void ZSTD_copyBlockSequences(const seqStore_t* seqStore, seqDef* seqs, + ZSTD_Sequence* outSeqs, size_t seqsSize) +{ + size_t i; size_t position; int repIdx; + for (i = 0, position = 0; i < seqsSize; ++i) { + outSeqs[i].offset = seqs[i].offset; + outSeqs[i].litLength = seqs[i].litLength; + outSeqs[i].matchLength = seqs[i].matchLength + 3 /* min match */; + + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthID == 1) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthID == 2) { + outSeqs[i].matchLength += 0x10000; + } + } + + if (outSeqs[i].offset <= 3 /* num reps */) { + outSeqs[i].rep = 1; + repIdx = i - outSeqs[i].offset; + + if (repIdx >= 0) { + outSeqs[i].offset = outSeqs[repIdx].offset; + } + + if (repIdx == -1) { + outSeqs[i].offset = 1; + } else if (repIdx == -2) { + outSeqs[i].offset = 4; + } else if (repIdx == -3) { + outSeqs[i].offset = 8; + } + } else { + outSeqs[i].offset -= 3 /* num reps */; + } + + position += outSeqs[i].litLength; + outSeqs[i].matchPos = position; + position += outSeqs[i].matchLength; + } +} + +static void ZSTD_getBlockSequences(ZSTD_CCtx* cctx, const seqStore_t* seqStore) +{ + size_t seqsSize = seqStore->sequences - seqStore->sequencesStart; + + assert(cctx->seqCollector.maxSequences > + (cctx->seqCollector.seqCurrent - cctx->seqCollector.seqStart) + seqsSize); + + ZSTD_copyBlockSequences(seqStore, seqStore->sequencesStart, + cctx->seqCollector.seqCurrent, seqsSize); + cctx->seqCollector.seqCurrent += seqsSize; +} + +size_t ZSTD_getSequences(ZSTD_CCtx* zc, const void* src, + size_t srcSize, ZSTD_Sequence* outSeqs, size_t outSeqsSize, + int level) +{ + size_t dstCapacity = ZSTD_compressBound(srcSize * sizeof(void*)); + void* dst = malloc(dstCapacity); + size_t seqsSize; + + SeqCollector seqCollector; + seqCollector.collectSequences = 1; + seqCollector.seqStart = outSeqs; + seqCollector.seqCurrent = outSeqs; + seqCollector.maxSequences = outSeqsSize; + zc->seqCollector = seqCollector; + + ZSTD_compressCCtx(zc, dst, dstCapacity, src, srcSize, level); + seqsSize = zc->seqCollector.seqCurrent - zc->seqCollector.seqStart; + + free(dst); + return seqsSize; +} /*! ZSTD_compress_frameChunk() : * Compress a chunk of data into one or multiple blocks. @@ -2438,6 +2443,9 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize); FORWARD_IF_ERROR(cSize); + if (cctx->seqCollector.collectSequences) { + ZSTD_getBlockSequences(cctx, ZSTD_getSeqStore(cctx)); + } if (cSize == 0) { /* block is not compressible */ cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 6d623cc6..d40d5340 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -192,6 +192,13 @@ typedef struct { size_t capacity; /* The capacity starting from `seq` pointer */ } rawSeqStore_t; +typedef struct { + int collectSequences; + ZSTD_Sequence* seqStart; + ZSTD_Sequence* seqCurrent; + size_t maxSequences; +} SeqCollector; + struct ZSTD_CCtx_params_s { ZSTD_format_e format; ZSTD_compressionParameters cParams; @@ -238,6 +245,7 @@ struct ZSTD_CCtx_s { XXH64_state_t xxhState; ZSTD_customMem customMem; size_t staticSize; + SeqCollector seqCollector; seqStore_t seqStore; /* sequences storage ptrs */ ldmState_t ldmState; /* long distance matching state */ diff --git a/lib/zstd.h b/lib/zstd.h index f8e95f22..782940ef 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1072,6 +1072,14 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params; +typedef struct { + unsigned int matchPos; + unsigned int offset; + unsigned int litLength; + unsigned int matchLength; + int rep; +} ZSTD_Sequence; + typedef struct { unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ @@ -1210,6 +1218,9 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS * or an error code (if srcSize is too small) */ ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, const void* src, + size_t srcSize, ZSTD_Sequence* outSeqs, size_t outSeqsSize, int level); + /*************************************** * Memory management diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 2de7c009..09fe4695 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1960,6 +1960,11 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "OK \n"); } + DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences zeros : ", testNb++); + memset(CNBuffer, 0, 1000000); + assert(ZSTD_getSequences(ZSTD_createCCtx(), CNBuffer, 1000000, + compressedBuffer, 1000000, 3) == 1000000 / 131071 + 1); + /* All zeroes test (test bug #137) */ #define ZEROESLENGTH 100 DISPLAYLEVEL(3, "test%3i : compress %u zeroes : ", testNb++, ZEROESLENGTH); From 9e7bb55e14b112e514a0e8d5ac7f19fd64f353c1 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Mon, 9 Sep 2019 20:04:46 -0700 Subject: [PATCH 04/21] Addressing comments --- lib/compress/zstd_compress.c | 156 +++++++++++++------------- lib/compress/zstd_compress_internal.h | 2 +- lib/zstd.h | 4 +- tests/fuzzer.c | 4 +- 4 files changed, 80 insertions(+), 86 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f8588b34..56da1664 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -13,7 +13,6 @@ ***************************************/ #include /* INT_MAX */ #include /* memset */ -#include #include "cpu.h" #include "mem.h" #include "hist.h" /* HIST_countFast_wksp */ @@ -2265,6 +2264,77 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) return ZSTDbss_compress; } +static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) +{ + const seqStore_t* seqStore = ZSTD_getSeqStore(zc); + const seqDef* seqs = seqStore->sequencesStart; + size_t seqsSize = seqStore->sequences - seqs; + + ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; + size_t i; size_t position; int repIdx; + + assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); + for (i = 0, position = 0; i < seqsSize; ++i) { + outSeqs[i].offset = seqs[i].offset; + outSeqs[i].litLength = seqs[i].litLength; + outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH; + + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthID == 1) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthID == 2) { + outSeqs[i].matchLength += 0x10000; + } + } + + if (outSeqs[i].offset <= ZSTD_REP_NUM) { + outSeqs[i].rep = 1; + repIdx = i - outSeqs[i].offset; + + if (repIdx >= 0) { + outSeqs[i].offset = outSeqs[repIdx].offset; + } + + if (repIdx == -1) { + outSeqs[i].offset = 1; + } else if (repIdx == -2) { + outSeqs[i].offset = 4; + } else if (repIdx == -3) { + outSeqs[i].offset = 8; + } + } else { + outSeqs[i].offset -= ZSTD_REP_NUM; + } + + position += outSeqs[i].litLength; + outSeqs[i].matchPos = position; + position += outSeqs[i].matchLength; + } + zc->seqCollector.seqIndex += seqsSize; +} + +/* We call compress2() and collect sequences after each block + * compression. The function stores the ZSTD_Sequences in outSeqs + * and returns the number of collected sequences from all blocks. + */ +size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize) +{ + const size_t dstCapacity = ZSTD_compressBound(srcSize * sizeof(void*)); + void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem); + + SeqCollector seqCollector; + seqCollector.collectSequences = 1; + seqCollector.seqStart = outSeqs; + seqCollector.seqIndex = 0; + seqCollector.maxSequences = outSeqsSize; + zc->seqCollector = seqCollector; + + ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); + ZSTD_free(dst, ZSTD_defaultCMem); + return zc->seqCollector.seqIndex; +} + static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) @@ -2288,6 +2358,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + } + out: if (!ZSTD_isError(cSize) && cSize != 0) { /* confirm repcodes and entropy tables when emitting a compressed block */ @@ -2324,82 +2398,6 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params } } -static void ZSTD_copyBlockSequences(const seqStore_t* seqStore, seqDef* seqs, - ZSTD_Sequence* outSeqs, size_t seqsSize) -{ - size_t i; size_t position; int repIdx; - for (i = 0, position = 0; i < seqsSize; ++i) { - outSeqs[i].offset = seqs[i].offset; - outSeqs[i].litLength = seqs[i].litLength; - outSeqs[i].matchLength = seqs[i].matchLength + 3 /* min match */; - - if (i == seqStore->longLengthPos) { - if (seqStore->longLengthID == 1) { - outSeqs[i].litLength += 0x10000; - } else if (seqStore->longLengthID == 2) { - outSeqs[i].matchLength += 0x10000; - } - } - - if (outSeqs[i].offset <= 3 /* num reps */) { - outSeqs[i].rep = 1; - repIdx = i - outSeqs[i].offset; - - if (repIdx >= 0) { - outSeqs[i].offset = outSeqs[repIdx].offset; - } - - if (repIdx == -1) { - outSeqs[i].offset = 1; - } else if (repIdx == -2) { - outSeqs[i].offset = 4; - } else if (repIdx == -3) { - outSeqs[i].offset = 8; - } - } else { - outSeqs[i].offset -= 3 /* num reps */; - } - - position += outSeqs[i].litLength; - outSeqs[i].matchPos = position; - position += outSeqs[i].matchLength; - } -} - -static void ZSTD_getBlockSequences(ZSTD_CCtx* cctx, const seqStore_t* seqStore) -{ - size_t seqsSize = seqStore->sequences - seqStore->sequencesStart; - - assert(cctx->seqCollector.maxSequences > - (cctx->seqCollector.seqCurrent - cctx->seqCollector.seqStart) + seqsSize); - - ZSTD_copyBlockSequences(seqStore, seqStore->sequencesStart, - cctx->seqCollector.seqCurrent, seqsSize); - cctx->seqCollector.seqCurrent += seqsSize; -} - -size_t ZSTD_getSequences(ZSTD_CCtx* zc, const void* src, - size_t srcSize, ZSTD_Sequence* outSeqs, size_t outSeqsSize, - int level) -{ - size_t dstCapacity = ZSTD_compressBound(srcSize * sizeof(void*)); - void* dst = malloc(dstCapacity); - size_t seqsSize; - - SeqCollector seqCollector; - seqCollector.collectSequences = 1; - seqCollector.seqStart = outSeqs; - seqCollector.seqCurrent = outSeqs; - seqCollector.maxSequences = outSeqsSize; - zc->seqCollector = seqCollector; - - ZSTD_compressCCtx(zc, dst, dstCapacity, src, srcSize, level); - seqsSize = zc->seqCollector.seqCurrent - zc->seqCollector.seqStart; - - free(dst); - return seqsSize; -} - /*! ZSTD_compress_frameChunk() : * Compress a chunk of data into one or multiple blocks. * All blocks will be terminated, all input will be consumed. @@ -2443,10 +2441,6 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize); FORWARD_IF_ERROR(cSize); - if (cctx->seqCollector.collectSequences) { - ZSTD_getBlockSequences(cctx, ZSTD_getSeqStore(cctx)); - } - if (cSize == 0) { /* block is not compressible */ cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); FORWARD_IF_ERROR(cSize); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index d40d5340..e3ed93eb 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -195,7 +195,7 @@ typedef struct { typedef struct { int collectSequences; ZSTD_Sequence* seqStart; - ZSTD_Sequence* seqCurrent; + size_t seqIndex; size_t maxSequences; } SeqCollector; diff --git a/lib/zstd.h b/lib/zstd.h index 782940ef..b2c66e75 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1218,8 +1218,8 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS * or an error code (if srcSize is too small) */ ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, const void* src, - size_t srcSize, ZSTD_Sequence* outSeqs, size_t outSeqsSize, int level); +ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); /*************************************** diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 09fe4695..fdf6960b 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1962,8 +1962,8 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences zeros : ", testNb++); memset(CNBuffer, 0, 1000000); - assert(ZSTD_getSequences(ZSTD_createCCtx(), CNBuffer, 1000000, - compressedBuffer, 1000000, 3) == 1000000 / 131071 + 1); + assert(ZSTD_getSequences(ZSTD_createCCtx(), compressedBuffer, 1000000, + CNBuffer, 1000000) == 1000000 / 131071 + 1); /* All zeroes test (test bug #137) */ #define ZEROESLENGTH 100 From e3c582591855a2e70a8c88976a9f05c6b1a0ea8e Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Tue, 10 Sep 2019 10:06:02 -0700 Subject: [PATCH 05/21] Fizing litLength == 0 case --- lib/compress/zstd_compress.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 56da1664..83bcb52d 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2291,11 +2291,22 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) outSeqs[i].rep = 1; repIdx = i - outSeqs[i].offset; + /* Not first block */ if (repIdx >= 0) { - outSeqs[i].offset = outSeqs[repIdx].offset; - } - - if (repIdx == -1) { + /* Special case where litLength == 0 */ + if (outSeqs[i].litLength == 0) { + /* When the offset is 3 */ + if (outSeqs[i].offset > 2) { + outSeqs[i].offset = outSeqs[repIdx - 1].offset - 1; + /* When the offset is either 1 or 2 */ + } else { + outSeqs[i].offset = outSeqs[repIdx - 1].offset; + } + } else { + outSeqs[i].offset = outSeqs[repIdx].offset; + } + /* First block */ + } else if (repIdx == -1) { outSeqs[i].offset = 1; } else if (repIdx == -2) { outSeqs[i].offset = 4; From 47199480da07531a1e2b982483e08e23eff54970 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Tue, 10 Sep 2019 13:18:59 -0700 Subject: [PATCH 06/21] Cleaning up parsing per suggestion --- lib/compress/zstd_compress.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 83bcb52d..b4fdf387 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2291,27 +2291,17 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) outSeqs[i].rep = 1; repIdx = i - outSeqs[i].offset; - /* Not first block */ - if (repIdx >= 0) { - /* Special case where litLength == 0 */ - if (outSeqs[i].litLength == 0) { - /* When the offset is 3 */ - if (outSeqs[i].offset > 2) { - outSeqs[i].offset = outSeqs[repIdx - 1].offset - 1; - /* When the offset is either 1 or 2 */ - } else { - outSeqs[i].offset = outSeqs[repIdx - 1].offset; - } + if (outSeqs[i].litLength == 0) { + if (outSeqs[i].offset < 3) { + --repIdx; } else { - outSeqs[i].offset = outSeqs[repIdx].offset; + repIdx = i - 1; } - /* First block */ - } else if (repIdx == -1) { - outSeqs[i].offset = 1; - } else if (repIdx == -2) { - outSeqs[i].offset = 4; - } else if (repIdx == -3) { - outSeqs[i].offset = 8; + } + assert(repIdx >= -3); + outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1]; + if (outSeqs[i].offset == 3) { + --outSeqs[i].offset; } } else { outSeqs[i].offset -= ZSTD_REP_NUM; From 1407919d132f1b29998939705fc3a7f0240f3a7c Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Tue, 10 Sep 2019 15:10:50 -0700 Subject: [PATCH 07/21] Addressing comments on parsing --- lib/compress/zstd_compress.c | 5 +++-- lib/zstd.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index b4fdf387..849a9f42 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2288,7 +2288,7 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) } if (outSeqs[i].offset <= ZSTD_REP_NUM) { - outSeqs[i].rep = 1; + outSeqs[i].rep = outSeqs[i].offset; repIdx = i - outSeqs[i].offset; if (outSeqs[i].litLength == 0) { @@ -2297,10 +2297,11 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) } else { repIdx = i - 1; } + ++outSeqs[i].rep; } assert(repIdx >= -3); outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1]; - if (outSeqs[i].offset == 3) { + if (outSeqs[i].offset == 4) { --outSeqs[i].offset; } } else { diff --git a/lib/zstd.h b/lib/zstd.h index b2c66e75..97feb77d 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1077,7 +1077,7 @@ typedef struct { unsigned int offset; unsigned int litLength; unsigned int matchLength; - int rep; + unsigned int rep; } ZSTD_Sequence; typedef struct { From bff6072e3a691a47638153d9daa1cd9de7c119b4 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Mon, 16 Sep 2019 08:26:21 -0700 Subject: [PATCH 08/21] Bailing early when collecting sequences and documentation --- lib/compress/zstd_compress.c | 13 +++++-------- lib/zstd.h | 16 +++++++++++----- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 849a9f42..833ae838 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2315,10 +2315,6 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) zc->seqCollector.seqIndex += seqsSize; } -/* We call compress2() and collect sequences after each block - * compression. The function stores the ZSTD_Sequences in outSeqs - * and returns the number of collected sequences from all blocks. - */ size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, size_t outSeqsSize, const void* src, size_t srcSize) { @@ -2351,6 +2347,11 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } } + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + return 0; + } + /* encode sequences and literals */ cSize = ZSTD_compressSequences(&zc->seqStore, &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, @@ -2360,10 +2361,6 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, zc->bmi2); - if (zc->seqCollector.collectSequences) { - ZSTD_copyBlockSequences(zc); - } - out: if (!ZSTD_isError(cSize) && cSize != 0) { /* confirm repcodes and entropy tables when emitting a compressed block */ diff --git a/lib/zstd.h b/lib/zstd.h index 97feb77d..3e737d8f 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1073,11 +1073,11 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params; typedef struct { - unsigned int matchPos; - unsigned int offset; - unsigned int litLength; - unsigned int matchLength; - unsigned int rep; + unsigned int matchPos; /* match pos in dst */ + unsigned int offset; /* offset taking into account rep (different from seqdef) */ + unsigned int litLength; /* literal length */ + unsigned int matchLength; /* match length */ + unsigned int rep; /* 0 when seq not rep and seqDef.offset otherwise */ } ZSTD_Sequence; typedef struct { @@ -1218,6 +1218,12 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS * or an error code (if srcSize is too small) */ ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); +/*! ZSTD_getSequences() : + * Extract sequences from the sequence store + * zc can be used to insert custom compression params. + * This function invokes ZSTD_compress2 + * @return : number of sequences extracted + */ ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, size_t outSeqsSize, const void* src, size_t srcSize); From 1f93be0f6dbb9bad2103b71a249aedee8b5dc2ad Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Mon, 16 Sep 2019 13:35:45 -0700 Subject: [PATCH 09/21] Handling memory leak and potential side effect --- tests/fuzzer.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/fuzzer.c b/tests/fuzzer.c index fdf6960b..b11a3cc0 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1960,10 +1960,15 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "OK \n"); } - DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences zeros : ", testNb++); - memset(CNBuffer, 0, 1000000); - assert(ZSTD_getSequences(ZSTD_createCCtx(), compressedBuffer, 1000000, - CNBuffer, 1000000) == 1000000 / 131071 + 1); + { + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + assert(cctx != NULL); + DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences zeros : ", testNb++); + memset(CNBuffer, 0, 1000000); + assert(ZSTD_getSequences(cctx, compressedBuffer, 1000000, + CNBuffer, 1000000) == 1000000 / 131071 + 1); + ZSTD_freeCCtx(cctx); + } /* All zeroes test (test bug #137) */ #define ZEROESLENGTH 100 From 76fea3fb9922c68a45159aa68b536ac196f31ab7 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Mon, 16 Sep 2019 14:02:23 -0700 Subject: [PATCH 10/21] Resolving appveyor test failure implicit conversion --- lib/compress/zstd_compress.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 3fe84b6b..acb3b15e 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2290,13 +2290,13 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) if (outSeqs[i].offset <= ZSTD_REP_NUM) { outSeqs[i].rep = outSeqs[i].offset; - repIdx = i - outSeqs[i].offset; + repIdx = (unsigned int)i - outSeqs[i].offset; if (outSeqs[i].litLength == 0) { if (outSeqs[i].offset < 3) { --repIdx; } else { - repIdx = i - 1; + repIdx = (unsigned int)i - 1; } ++outSeqs[i].rep; } @@ -2310,7 +2310,7 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) } position += outSeqs[i].litLength; - outSeqs[i].matchPos = position; + outSeqs[i].matchPos = (unsigned int)position; position += outSeqs[i].matchLength; } zc->seqCollector.seqIndex += seqsSize; From 3cacc0a30bb3f39af6f4cb7f2ebca9ace9f45aff Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Tue, 17 Sep 2019 17:44:08 -0700 Subject: [PATCH 11/21] Casting void pointer to ZSTD_Sequence pointer --- tests/fuzzer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 62263fd1..a513e1b4 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1965,7 +1965,7 @@ static int basicUnitTests(U32 const seed, double compressibility) assert(cctx != NULL); DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences zeros : ", testNb++); memset(CNBuffer, 0, 1000000); - assert(ZSTD_getSequences(cctx, compressedBuffer, 1000000, + assert(ZSTD_getSequences(cctx, (ZSTD_Sequence*)compressedBuffer, 1000000, CNBuffer, 1000000) == 1000000 / 131071 + 1); ZSTD_freeCCtx(cctx); } From ae6d0e64ae0fcd66a6c1d2061a017bc7521ac7c0 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 19 Sep 2019 15:25:20 -0700 Subject: [PATCH 12/21] Addressing comments --- lib/compress/zstd_compress.c | 9 +++------ lib/zstd.h | 20 +++++++++++++++----- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index acb3b15e..ce352acd 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2302,7 +2302,7 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) } assert(repIdx >= -3); outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1]; - if (outSeqs[i].offset == 4) { + if (outSeqs[i].rep == 4) { --outSeqs[i].offset; } } else { @@ -2319,9 +2319,6 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, size_t outSeqsSize, const void* src, size_t srcSize) { - const size_t dstCapacity = ZSTD_compressBound(srcSize * sizeof(void*)); - void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem); - SeqCollector seqCollector; seqCollector.collectSequences = 1; seqCollector.seqStart = outSeqs; @@ -2329,8 +2326,8 @@ size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, seqCollector.maxSequences = outSeqsSize; zc->seqCollector = seqCollector; - ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); - ZSTD_free(dst, ZSTD_defaultCMem); + /* We never write to dst when collecing sequences so setting dst = src is harmless */ + ZSTD_compress2(zc, (void*)src, srcSize, src, srcSize); return zc->seqCollector.seqIndex; } diff --git a/lib/zstd.h b/lib/zstd.h index 217a6d35..836aa723 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1078,11 +1078,21 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params; typedef struct { - unsigned int matchPos; /* match pos in dst */ - unsigned int offset; /* offset taking into account rep (different from seqdef) */ - unsigned int litLength; /* literal length */ - unsigned int matchLength; /* match length */ - unsigned int rep; /* 0 when seq not rep and seqDef.offset otherwise */ + unsigned int matchPos; /* Match pos in dst */ + /* If seqDef.offset > 3, then this is seqDef.offset - 3 + * If seqDef.offset < 3, then this is the corresponding repeat offset + * But if seqDef.offset < 3 and litLength == 0, this is the + * repeat offset before the corresponding repeat offset + * And if seqDef.offset == 3 and litLength == 0, this is the + * most recent repeat offset - 1 + */ + unsigned int offset; + unsigned int litLength; /* Literal length */ + unsigned int matchLength; /* Match length */ + /* 0 when seq not rep and seqDef.offset otherwise + * when litLength == 0 this will be <= 4, otherwise <= 3 like normal + */ + unsigned int rep; } ZSTD_Sequence; typedef struct { From f3c4fd17e30465a4ec90152c9f858dc8ff674b7f Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Fri, 20 Sep 2019 15:50:58 -0700 Subject: [PATCH 13/21] Passing in dummy dst buffer of compressbound(srcSize) --- lib/compress/zstd_compress.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index ce352acd..e7ec1d61 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2319,6 +2319,9 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, size_t outSeqsSize, const void* src, size_t srcSize) { + const size_t dstCapacity = ZSTD_compressBound(srcSize); + void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem); + SeqCollector seqCollector; seqCollector.collectSequences = 1; seqCollector.seqStart = outSeqs; @@ -2326,8 +2329,8 @@ size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, seqCollector.maxSequences = outSeqsSize; zc->seqCollector = seqCollector; - /* We never write to dst when collecing sequences so setting dst = src is harmless */ - ZSTD_compress2(zc, (void*)src, srcSize, src, srcSize); + ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); + ZSTD_free(dst, ZSTD_defaultCMem); return zc->seqCollector.seqIndex; } From be0bebd24e08833f757daaee3c172514c0cc811f Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Mon, 23 Sep 2019 15:08:18 -0700 Subject: [PATCH 14/21] Adding test and null check for malloc --- lib/compress/zstd_compress.c | 4 +++- tests/fuzzer.c | 26 ++++++++++++++++++++++---- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e7ec1d61..8eb29289 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2321,8 +2321,10 @@ size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, { const size_t dstCapacity = ZSTD_compressBound(srcSize); void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem); - SeqCollector seqCollector; + + assert(dst != NULL); + seqCollector.collectSequences = 1; seqCollector.seqStart = outSeqs; seqCollector.seqIndex = 0; diff --git a/tests/fuzzer.c b/tests/fuzzer.c index a513e1b4..349f0d19 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1962,11 +1962,29 @@ static int basicUnitTests(U32 const seed, double compressibility) { ZSTD_CCtx* cctx = ZSTD_createCCtx(); - assert(cctx != NULL); + size_t zerosLength = ZSTD_BLOCKSIZE_MAX * 2 - 1; + size_t expectedOffsets[] = {1, 1}; + size_t expectedLitLengths[] = {2, 1}; + size_t expectedMatchLengths[] = {ZSTD_BLOCKSIZE_MAX - 2, ZSTD_BLOCKSIZE_MAX - 2}; + size_t expectedReps[] = {1, 1}; + size_t expectedMatchPos[] = {2, 1}; + size_t expectedSequencesSize = 2; + size_t sequencesSize; + size_t i = 0; + ZSTD_Sequence* sequences = (ZSTD_Sequence*)compressedBuffer; DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences zeros : ", testNb++); - memset(CNBuffer, 0, 1000000); - assert(ZSTD_getSequences(cctx, (ZSTD_Sequence*)compressedBuffer, 1000000, - CNBuffer, 1000000) == 1000000 / 131071 + 1); + assert(cctx != NULL); + memset(CNBuffer, 0, zerosLength); + sequencesSize = ZSTD_getSequences(cctx, sequences, 10, + CNBuffer, zerosLength); + assert(sequencesSize == expectedSequencesSize); + for (i = 0; i < sequencesSize; ++i) { + assert(sequences[i].offset == expectedOffsets[i]); + assert(sequences[i].litLength == expectedLitLengths[i]); + assert(sequences[i].matchLength == expectedMatchLengths[i]); + assert(sequences[i].rep == expectedReps[i]); + assert(sequences[i].matchPos == expectedMatchPos[i]); + } ZSTD_freeCCtx(cctx); } From c04245b257e25c26cdb15507308575b1f16108e4 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Mon, 23 Sep 2019 15:42:16 -0700 Subject: [PATCH 15/21] Replacing assert with memory_allocation error code throw --- lib/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 8eb29289..7facbeff 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2323,7 +2323,7 @@ size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem); SeqCollector seqCollector; - assert(dst != NULL); + RETURN_ERROR_IF(dst == NULL, memory_allocation); seqCollector.collectSequences = 1; seqCollector.seqStart = outSeqs; From bb27472afcf5a8f6499638f30221e62850bba0e7 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 26 Sep 2019 15:38:31 -0700 Subject: [PATCH 16/21] Adding more realistic test for get sequences --- tests/fuzzer.c | 63 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 349f0d19..66d1c7e0 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -304,6 +304,26 @@ static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part) #endif +static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize, BYTE* src, size_t size) +{ + size_t i; + for(i = 0; i < seqsSize; ++i) { + assert(dst + seqs[i].litLength + seqs[i].matchLength < dst + size); + assert(src + seqs[i].litLength + seqs[i].matchLength < src + size); + + memcpy(dst, src, seqs[i].litLength); + dst += seqs[i].litLength; + src += seqs[i].litLength; + size -= seqs[i].litLength; + + memcpy(dst, dst-seqs[i].offset, seqs[i].matchLength); + dst += seqs[i].matchLength; + src += seqs[i].matchLength; + size -= seqs[i].matchLength; + } + memcpy(dst, src, size); +} + /*============================================= * Unit tests =============================================*/ @@ -1960,32 +1980,31 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "OK \n"); } + DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences decode from sequences test : ", testNb++); { + size_t srcSize = sizeof(U32) * 1000; + BYTE* src = (BYTE*)CNBuffer; + BYTE* decoded = (BYTE*)compressedBuffer; + ZSTD_CCtx* cctx = ZSTD_createCCtx(); - size_t zerosLength = ZSTD_BLOCKSIZE_MAX * 2 - 1; - size_t expectedOffsets[] = {1, 1}; - size_t expectedLitLengths[] = {2, 1}; - size_t expectedMatchLengths[] = {ZSTD_BLOCKSIZE_MAX - 2, ZSTD_BLOCKSIZE_MAX - 2}; - size_t expectedReps[] = {1, 1}; - size_t expectedMatchPos[] = {2, 1}; - size_t expectedSequencesSize = 2; - size_t sequencesSize; - size_t i = 0; - ZSTD_Sequence* sequences = (ZSTD_Sequence*)compressedBuffer; - DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences zeros : ", testNb++); + ZSTD_Sequence* seqs = malloc(srcSize * sizeof(ZSTD_Sequence)); + size_t seqsSize; size_t i; + U32 randSeed = seed; + assert(cctx != NULL); - memset(CNBuffer, 0, zerosLength); - sequencesSize = ZSTD_getSequences(cctx, sequences, 10, - CNBuffer, zerosLength); - assert(sequencesSize == expectedSequencesSize); - for (i = 0; i < sequencesSize; ++i) { - assert(sequences[i].offset == expectedOffsets[i]); - assert(sequences[i].litLength == expectedLitLengths[i]); - assert(sequences[i].matchLength == expectedMatchLengths[i]); - assert(sequences[i].rep == expectedReps[i]); - assert(sequences[i].matchPos == expectedMatchPos[i]); - } + + /* Populate src with random data */ + for (i = 0; i < srcSize / sizeof(U32); ++i) {*((U32*)src + i) = FUZ_rand(&randSeed);} + + /* get the sequences */ + seqsSize = ZSTD_getSequences(cctx, seqs, srcSize, src, srcSize); + + /* "decode" and compare the sequences */ + FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize); + assert(!memcmp(CNBuffer, compressedBuffer, 5)); + ZSTD_freeCCtx(cctx); + free(seqs); } /* Multiple blocks of zeros test */ From 75b128635455994ea5218036c5bbd187c67e1998 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 26 Sep 2019 16:07:34 -0700 Subject: [PATCH 17/21] Fixing shortest failure --- tests/fuzzer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 66d1c7e0..c3687974 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1994,7 +1994,7 @@ static int basicUnitTests(U32 const seed, double compressibility) assert(cctx != NULL); /* Populate src with random data */ - for (i = 0; i < srcSize / sizeof(U32); ++i) {*((U32*)src + i) = FUZ_rand(&randSeed);} + for (i = 0; i < srcSize / sizeof(U32); ++i) {((U32*)CNBuffer)[i] = FUZ_rand(&randSeed);} /* get the sequences */ seqsSize = ZSTD_getSequences(cctx, seqs, srcSize, src, srcSize); From 91daee5c068c105d456331ff30fd6971ec9cd437 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 26 Sep 2019 16:21:57 -0700 Subject: [PATCH 18/21] Fixing appveyor test --- tests/fuzzer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/fuzzer.c b/tests/fuzzer.c index c3687974..0b935c82 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1987,10 +1987,11 @@ static int basicUnitTests(U32 const seed, double compressibility) BYTE* decoded = (BYTE*)compressedBuffer; ZSTD_CCtx* cctx = ZSTD_createCCtx(); - ZSTD_Sequence* seqs = malloc(srcSize * sizeof(ZSTD_Sequence)); + ZSTD_Sequence* seqs = (ZSTD_Sequence*)malloc(srcSize * sizeof(ZSTD_Sequence)); size_t seqsSize; size_t i; U32 randSeed = seed; + if (seqs == NULL) goto _output_error; assert(cctx != NULL); /* Populate src with random data */ From b63a1e7ae526e7544c0bcfdfebf8744f133eb500 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Fri, 27 Sep 2019 07:20:20 -0700 Subject: [PATCH 19/21] Typo fix --- tests/fuzzer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 0b935c82..cf81230d 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -2002,7 +2002,7 @@ static int basicUnitTests(U32 const seed, double compressibility) /* "decode" and compare the sequences */ FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize); - assert(!memcmp(CNBuffer, compressedBuffer, 5)); + assert(!memcmp(CNBuffer, compressedBuffer, srcSize)); ZSTD_freeCCtx(cctx); free(seqs); From 61ec4c2e7f7a20856933978b79dcf55a3c68ff74 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 3 Oct 2019 06:42:40 -0700 Subject: [PATCH 20/21] Cleaning sequence parsing logic --- lib/compress/zstd_compress.c | 59 +++++++++++++++--------------------- 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 7facbeff..4e2fbd85 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2272,46 +2272,37 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) size_t seqsSize = seqStore->sequences - seqs; ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; - size_t i; size_t position; int repIdx; + int i; assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); - for (i = 0, position = 0; i < seqsSize; ++i) { - outSeqs[i].offset = seqs[i].offset; + for (i = 0; i < (int)seqsSize; ++i) { + unsigned int offsetValue = seqs[i].offset; outSeqs[i].litLength = seqs[i].litLength; - outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH; - - if (i == seqStore->longLengthPos) { - if (seqStore->longLengthID == 1) { - outSeqs[i].litLength += 0x10000; - } else if (seqStore->longLengthID == 2) { - outSeqs[i].matchLength += 0x10000; - } - } - - if (outSeqs[i].offset <= ZSTD_REP_NUM) { - outSeqs[i].rep = outSeqs[i].offset; - repIdx = (unsigned int)i - outSeqs[i].offset; - - if (outSeqs[i].litLength == 0) { - if (outSeqs[i].offset < 3) { - --repIdx; - } else { - repIdx = (unsigned int)i - 1; - } - ++outSeqs[i].rep; - } - assert(repIdx >= -3); - outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1]; - if (outSeqs[i].rep == 4) { - --outSeqs[i].offset; - } + outSeqs[i].matchLength = seqs[i].matchLength; + if (offsetValue > 3) { + outSeqs[i].offset = offsetValue - 3; } else { - outSeqs[i].offset -= ZSTD_REP_NUM; + /* special repeat offset case */ + unsigned int repeatOffset1 = i - 1 >= 0 ? outSeqs[i - 1].offset : 1; + unsigned int repeatOffset2 = 1 - 2 >= 0 ? outSeqs[i - 2].offset : 4; + unsigned int repeatOffset3 = i - 3 >= 0 ? outSeqs[i - 3].offset : 8; + if (seqs[i].litLength != 0) { + switch (offsetValue) { + case 1: outSeqs[i].offset = repeatOffset1; break; + case 2: outSeqs[i].offset = repeatOffset2; break; + case 3: outSeqs[i].offset = repeatOffset3; break; + } + } else { + /* offsets shifted by one */ + switch (offsetValue) { + case 1: outSeqs[i].offset = repeatOffset2; break; + case 2: outSeqs[i].offset = repeatOffset3; break; + /* corner case where offsetValue == 3 */ + case 3: outSeqs[i].offset = repeatOffset1 - 1; break; + } + } } - position += outSeqs[i].litLength; - outSeqs[i].matchPos = (unsigned int)position; - position += outSeqs[i].matchLength; } zc->seqCollector.seqIndex += seqsSize; } From 36528b96c4ae2dff700e63851f710aa11c1d0bc8 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Thu, 3 Oct 2019 09:26:51 -0700 Subject: [PATCH 21/21] Manually moving instead of memcpy on decoder and using genBuffer() --- lib/compress/zstd_compress.c | 59 +++++++++++++++++++++--------------- tests/fuzzer.c | 13 ++++---- 2 files changed, 41 insertions(+), 31 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 4e2fbd85..7facbeff 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2272,37 +2272,46 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) size_t seqsSize = seqStore->sequences - seqs; ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; - int i; + size_t i; size_t position; int repIdx; assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); - for (i = 0; i < (int)seqsSize; ++i) { - unsigned int offsetValue = seqs[i].offset; + for (i = 0, position = 0; i < seqsSize; ++i) { + outSeqs[i].offset = seqs[i].offset; outSeqs[i].litLength = seqs[i].litLength; - outSeqs[i].matchLength = seqs[i].matchLength; - if (offsetValue > 3) { - outSeqs[i].offset = offsetValue - 3; - } else { - /* special repeat offset case */ - unsigned int repeatOffset1 = i - 1 >= 0 ? outSeqs[i - 1].offset : 1; - unsigned int repeatOffset2 = 1 - 2 >= 0 ? outSeqs[i - 2].offset : 4; - unsigned int repeatOffset3 = i - 3 >= 0 ? outSeqs[i - 3].offset : 8; - if (seqs[i].litLength != 0) { - switch (offsetValue) { - case 1: outSeqs[i].offset = repeatOffset1; break; - case 2: outSeqs[i].offset = repeatOffset2; break; - case 3: outSeqs[i].offset = repeatOffset3; break; - } - } else { - /* offsets shifted by one */ - switch (offsetValue) { - case 1: outSeqs[i].offset = repeatOffset2; break; - case 2: outSeqs[i].offset = repeatOffset3; break; - /* corner case where offsetValue == 3 */ - case 3: outSeqs[i].offset = repeatOffset1 - 1; break; - } + outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH; + + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthID == 1) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthID == 2) { + outSeqs[i].matchLength += 0x10000; } } + if (outSeqs[i].offset <= ZSTD_REP_NUM) { + outSeqs[i].rep = outSeqs[i].offset; + repIdx = (unsigned int)i - outSeqs[i].offset; + + if (outSeqs[i].litLength == 0) { + if (outSeqs[i].offset < 3) { + --repIdx; + } else { + repIdx = (unsigned int)i - 1; + } + ++outSeqs[i].rep; + } + assert(repIdx >= -3); + outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1]; + if (outSeqs[i].rep == 4) { + --outSeqs[i].offset; + } + } else { + outSeqs[i].offset -= ZSTD_REP_NUM; + } + + position += outSeqs[i].litLength; + outSeqs[i].matchPos = (unsigned int)position; + position += outSeqs[i].matchLength; } zc->seqCollector.seqIndex += seqsSize; } diff --git a/tests/fuzzer.c b/tests/fuzzer.c index cf81230d..a22dafa3 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -307,7 +307,8 @@ static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part) static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize, BYTE* src, size_t size) { size_t i; - for(i = 0; i < seqsSize; ++i) { + size_t j; + for(i = 0; i < seqsSize - 1; ++i) { assert(dst + seqs[i].litLength + seqs[i].matchLength < dst + size); assert(src + seqs[i].litLength + seqs[i].matchLength < src + size); @@ -316,7 +317,8 @@ static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize, src += seqs[i].litLength; size -= seqs[i].litLength; - memcpy(dst, dst-seqs[i].offset, seqs[i].matchLength); + for (j = 0; j < seqs[i].matchLength; ++j) + dst[j] = dst[j - seqs[i].offset]; dst += seqs[i].matchLength; src += seqs[i].matchLength; size -= seqs[i].matchLength; @@ -1982,20 +1984,19 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences decode from sequences test : ", testNb++); { - size_t srcSize = sizeof(U32) * 1000; + size_t srcSize = 100 KB; BYTE* src = (BYTE*)CNBuffer; BYTE* decoded = (BYTE*)compressedBuffer; ZSTD_CCtx* cctx = ZSTD_createCCtx(); ZSTD_Sequence* seqs = (ZSTD_Sequence*)malloc(srcSize * sizeof(ZSTD_Sequence)); - size_t seqsSize; size_t i; - U32 randSeed = seed; + size_t seqsSize; if (seqs == NULL) goto _output_error; assert(cctx != NULL); /* Populate src with random data */ - for (i = 0; i < srcSize / sizeof(U32); ++i) {((U32*)CNBuffer)[i] = FUZ_rand(&randSeed);} + RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed); /* get the sequences */ seqsSize = ZSTD_getSequences(cctx, seqs, srcSize, src, srcSize);