Merge branch 'dev' into newFormats

2017-09-26 14:22:39 -07:00 · 2017-09-26 14:22:39 -07:00 · 9f0b8dfbe9
commit 9f0b8dfbe9
parent 52a1d1c6dc 8a4d0abc1c
12 changed files with 267 additions and 89 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -21,6 +21,8 @@ matrix:
    - env: Cmd='make arminstall && make aarch64fuzz'
    - env: Cmd='make ppcinstall && make ppcfuzz'
    - env: Cmd='make ppcinstall && make ppc64fuzz'
    - env: Cmd='make -j uasanregressiontest'
    - env: Cmd='make -j msanregressiontest'
 git:
  depth: 1
--- a/10
+++ b/10
@ -12,6 +12,7 @@ ZSTDDIR  = lib
 BUILDIR  = build
 ZWRAPDIR = zlibWrapper
 TESTDIR  = tests
 FUZZDIR  = $(TESTDIR)/fuzz
 # Define nul output
 VOID = /dev/null
@ -215,6 +216,15 @@ arm-ppc-compilation:
 	$(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static ZSTDRTTEST= MOREFLAGS="-Werror -Wno-attributes -static"
 	$(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static ZSTDRTTEST= MOREFLAGS="-m64 -static"
 regressiontest:
 	$(MAKE) -C $(FUZZDIR) regressiontest
 uasanregressiontest:
 	$(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=address,undefined" CXXFLAGS="-O3 -fsanitize=address,undefined"
 msanregressiontest:
 	$(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=memory" CXXFLAGS="-O3 -fsanitize=memory"
 # run UBsan with -fsanitize-recover=signed-integer-overflow
 # due to a bug in UBsan when doing pointer subtraction
 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63303
--- a/circle.yml
+++ b/circle.yml
@ -45,7 +45,7 @@ test:
        parallel: true
    - ? |
        if [[ "$CIRCLE_NODE_INDEX" == "0" ]]                                    ; then make ppc64build   && make clean; fi &&
-        if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make gcc7build    && make clean; fi #could add another test here
+        if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make gcc7build    && make clean; fi
      :
        parallel: true
    - ? |
@ -53,6 +53,11 @@ test:
        if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make -C tests test-legacy test-longmatch test-symbols && make clean; fi
      :
        parallel: true
    - ? |
        if [[ "$CIRCLE_NODE_INDEX" == "0" ]]                                    ; then make -j regressiontest && make clean; fi &&
        if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then true; fi # Could add another test here
      :
        parallel: true
  post:
    - echo Circle CI tests finished
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@ -125,7 +125,8 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
 #define MaxLit ((1<<Litbits) - 1)
 #define MaxML  52
 #define MaxLL  35
-#define MaxOff 28
+#define DefaultMaxOff 28
 #define MaxOff 31
 #define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
 #define MLFSELog    9
 #define LLFSELog    9
@ -151,8 +152,8 @@ static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1,
 #define ML_DEFAULTNORMLOG 6  /* for static allocation */
 static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
-static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
-                                              1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
+                                                     1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
 #define OF_DEFAULTNORMLOG 5  /* for static allocation */
 static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
--- a/lib/compress/fse_compress.c
+++ b/lib/compress/fse_compress.c
@ -582,7 +582,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
    if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
-    {   U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
+    {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
        U64 const scale = 62 - tableLog;
        U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
        U64 const vStep = 1ULL<<(scale-20);
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@ -1259,20 +1259,30 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
        mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
 }
-MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType(FSE_repeat* repeatMode,
+typedef enum {
-        size_t const mostFrequent, size_t nbSeq, U32 defaultNormLog)
+    ZSTD_defaultDisallowed = 0,
    ZSTD_defaultAllowed = 1
 } ZSTD_defaultPolicy_e;
 MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType(
        FSE_repeat* repeatMode, size_t const mostFrequent, size_t nbSeq,
        U32 defaultNormLog, ZSTD_defaultPolicy_e const isDefaultAllowed)
 {
 #define MIN_SEQ_FOR_DYNAMIC_FSE   64
 #define MAX_SEQ_FOR_STATIC_FSE  1000
-
+    ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
-    if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
+    if ((mostFrequent == nbSeq) && (!isDefaultAllowed || nbSeq > 2)) {
        /* Prefer set_basic over set_rle when there are 2 or less symbols,
         * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
         * If basic encoding isn't possible, always choose RLE.
         */
        *repeatMode = FSE_repeat_check;
        return set_rle;
    }
-    if ((*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+    if (isDefaultAllowed && (*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
        return set_repeat;
    }
-    if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) {
+    if (isDefaultAllowed && ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1))))) {
        *repeatMode = FSE_repeat_valid;
        return set_basic;
    }
@ -1308,6 +1318,7 @@ MEM_STATIC size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
            count[codeTable[nbSeq-1]]--;
            nbSeq_1--;
        }
        assert(nbSeq_1 > 1);
        CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog);   /* overflow protected */
            if (FSE_isError(NCountSize)) return NCountSize;
@ -1445,7 +1456,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
    /* CTable for Literal Lengths */
    {   U32 max = MaxLL;
        size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, entropy->workspace);
-        LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog);
+        LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed);
        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
                    count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
                    entropy->workspace, sizeof(entropy->workspace));
@ -1455,9 +1466,11 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
    /* CTable for Offsets */
    {   U32 max = MaxOff;
        size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, entropy->workspace);
-        Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog);
+        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
        ZSTD_defaultPolicy_e const defaultPolicy = max <= DefaultMaxOff ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
        Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy);
        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
-                    count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, MaxOff,
+                    count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
                    entropy->workspace, sizeof(entropy->workspace));
            if (ZSTD_isError(countSize)) return countSize;
            op += countSize;
@ -1465,7 +1478,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
    /* CTable for MatchLengths */
    {   U32 max = MaxML;
        size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, entropy->workspace);
-        MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog);
+        MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed);
        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
                    count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
                    entropy->workspace, sizeof(entropy->workspace));
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@ -903,6 +903,15 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
 /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
 * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
 * bits before reloading. This value is the maximum number of bytes we read
 * after reloading when we are decoding long offets.
 */
 #define LONG_OFFSETS_MAX_EXTRA_BITS_32                                         \
    (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32                         \
        ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32                    \
        : 0)
 static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
 {
@ -910,7 +919,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
    U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
    U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
-    U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb);   /* <= maxOff, by table construction */
+    U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb);   /* <= MaxOff, by table construction */
    U32 const llBits = LL_bits[llCode];
    U32 const mlBits = ML_bits[mlCode];
@ -937,7 +946,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
                     0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
                     0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
                     0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
-                     0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
+                     0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
    /* sequence */
    {   size_t offset;
@ -945,8 +954,10 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
            offset = 0;
        else {
            ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
-            if (longOffsets) {
+            ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 2);
-                int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN);
+            assert(ofBits <= MaxOff);
            if (MEM_32bits() && longOffsets) {
                U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
                offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
                if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
                if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
@ -977,13 +988,17 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
    seq.matchLength = ML_base[mlCode]
                    + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0);  /* <=  16 bits */
-    if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream);
+    if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
        BIT_reloadDStream(&seqState->DStream);
    if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
        BIT_reloadDStream(&seqState->DStream);
    /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
    ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
    seq.litLength = LL_base[llCode]
                  + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0);    /* <=  16 bits */
-    if (  MEM_32bits()
+    if (MEM_32bits())
-      || (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) )
+        BIT_reloadDStream(&seqState->DStream);
       BIT_reloadDStream(&seqState->DStream);
    DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
                (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
@ -1143,7 +1158,6 @@ static size_t ZSTD_decompressSequences(
 }
 HINT_INLINE
 seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
 {
@ -1151,7 +1165,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
    U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
    U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
-    U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb);   /* <= maxOff, by table construction */
+    U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb);   /* <= MaxOff, by table construction */
    U32 const llBits = LL_bits[llCode];
    U32 const mlBits = ML_bits[mlCode];
@ -1178,7 +1192,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
                     0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
                     0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
                     0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
-                     0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
+                     0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
    /* sequence */
    {   size_t offset;
@ -1186,8 +1200,10 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
            offset = 0;
        else {
            ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
-            if (longOffsets) {
+            ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 2);
-                int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN);
+            assert(ofBits <= MaxOff);
            if (MEM_32bits() && longOffsets) {
                U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
                offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
                if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
                if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
@ -1217,11 +1233,16 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
    }
    seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0);  /* <=  16 bits */
-    if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream);
+    if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
        BIT_reloadDStream(&seqState->DStream);
    if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
        BIT_reloadDStream(&seqState->DStream);
    /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
    ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
    seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0);    /* <=  16 bits */
-    if (MEM_32bits() ||
+    if (MEM_32bits())
-       (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream);
+        BIT_reloadDStream(&seqState->DStream);
    {   size_t const pos = seqState->pos + seq.litLength;
        seq.match = seqState->base + pos - seq.offset;    /* single memory segment */
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@ -881,7 +881,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
                                  frame->stats.offsetSymbolSet, 28)) {
            Offtype = set_repeat;
        } else if (!(RAND(seed) & 3)) {
-            FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
+            FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
            Offtype = set_basic;
        } else {
            size_t nbSeq_1 = nbSeq;
--- a/tests/fuzz/Makefile
+++ b/tests/fuzz/Makefile
@ -14,6 +14,13 @@ CPPFLAGS ?=
 LDFLAGS ?=
 ARFLAGS ?=
 LIB_FUZZING_ENGINE ?= libregression.a
 PYTHON ?= python
 ifeq ($(shell uname), Darwin)
 	DOWNLOAD?=curl -L -o
 else
 	DOWNLOAD?=wget -O
 endif
 CORPORA_URL_PREFIX:=https://github.com/facebook/zstd/releases/download/fuzz-corpora/
 ZSTDDIR = ../../lib
 PRGDIR = ../../programs
@ -48,18 +55,20 @@ FUZZ_SRC       := \
 FUZZ_OBJ := $(patsubst %.c,%.o, $(wildcard $(FUZZ_SRC)))
-.PHONY: default all clean
+.PHONY: default all clean cleanall
 default: all
-all: \
+FUZZ_TARGETS :=       \
 	simple_round_trip \
 	stream_round_trip \
-	block_round_trip \
+	block_round_trip  \
 	simple_decompress \
 	stream_decompress \
 	block_decompress
 all: $(FUZZ_TARGETS)
 %.o: %.c
 	$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $^ -c -o $@
@ -93,7 +102,25 @@ libFuzzer:
 	@git clone https://chromium.googlesource.com/chromium/llvm-project/llvm/lib/Fuzzer
 	@cd Fuzzer && ./build.sh
 corpora/%_seed_corpus.zip:
 	@mkdir -p corpora
 	$(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip
 corpora/%: corpora/%_seed_corpus.zip
 	unzip -q $^ -d $@
 .PHONY: corpora
 corpora: $(patsubst %,corpora/%,$(FUZZ_TARGETS))
 regressiontest: corpora
 	CC="$(CC)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" $(PYTHON) ./fuzz.py build all
 	$(PYTHON) ./fuzz.py regression all
 clean:
 	@$(MAKE) -C $(ZSTDDIR) clean
 	@$(RM) -f *.a *.o
 	@$(RM) -f simple_round_trip stream_round_trip simple_decompress stream_decompress
 cleanall:
 	@$(RM) -rf Fuzzer
 	@$(RM) -rf corpora
--- a/tests/fuzz/README.md
+++ b/tests/fuzz/README.md
@ -1,6 +1,14 @@
 # Fuzzing
 Each fuzzing target can be built with multiple engines.
 Zstd provides a fuzz corpus for each target that can be downloaded with
 the command:
 ```
 make corpora
 ```
 It will download each corpus into `./corpora/TARGET`.
 ## fuzz.py
--- a/tests/fuzz/fuzz.py
+++ b/tests/fuzz/fuzz.py
@ -82,11 +82,39 @@ def tmpdir():
        shutil.rmtree(dirpath, ignore_errors=True)
 def parse_targets(in_targets):
    targets = set()
    for target in in_targets:
        if not target:
            continue
        if target == 'all':
            targets = targets.union(TARGETS)
        elif target in TARGETS:
            targets.add(target)
        else:
            raise RuntimeError('{} is not a valid target'.format(target))
    return list(targets)
 def targets_parser(args, description):
    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
    parser.add_argument(
        'TARGET',
        nargs='*',
        type=str,
        help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
    args, extra = parser.parse_known_args(args)
    args.extra = extra
    args.TARGET = parse_targets(args.TARGET)
    return args
 def parse_env_flags(args, flags):
    """
    Look for flags set by environment variables.
    """
    flags = ' '.join(flags)
    san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
    nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
@ -112,6 +140,34 @@ def parse_env_flags(args, flags):
    return args
 def compiler_version(cc, cxx):
    """
    Determines the compiler and version.
    Only works for clang and gcc.
    """
    cc_version_bytes = subprocess.check_output([cc, "--version"])
    cxx_version_bytes = subprocess.check_output([cxx, "--version"])
    if cc_version_bytes.startswith(b'clang'):
        assert(cxx_version_bytes.startswith(b'clang'))
        compiler = 'clang'
    if cc_version_bytes.startswith(b'gcc'):
        assert(cxx_version_bytes.startswith(b'g++'))
        compiler = 'gcc'
    version_regex = b'([0-9])+\.([0-9])+\.([0-9])+'
    version_match = re.search(version_regex, cc_version_bytes)
    version = tuple(int(version_match.group(i)) for i in range(1, 4))
    return compiler, version
 def overflow_ubsan_flags(cc, cxx):
    compiler, version = compiler_version(cc, cxx)
    if compiler == 'gcc':
        return ['-fno-sanitize=signed-integer-overflow']
    if compiler == 'clang' and version >= (5, 0, 0):
        return ['-fno-sanitize=pointer-overflow']
    return []
 def build_parser(args):
    description = """
    Cleans the repository and builds a fuzz target (or all).
@ -336,7 +392,7 @@ def build(args):
    if args.ubsan:
        ubsan_flags = ['-fsanitize=undefined']
        if not args.ubsan_pointer_overflow:
-            ubsan_flags += ['-fno-sanitize=pointer-overflow']
+            ubsan_flags += overflow_ubsan_flags(cc, cxx)
        common_flags += ubsan_flags
    if args.stateful_fuzzing:
@ -424,36 +480,42 @@ def libfuzzer_parser(args):
    if args.TARGET and args.TARGET not in TARGETS:
        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
    if not args.corpora:
        args.corpora = abs_join(CORPORA_DIR, args.TARGET)
    if not args.artifact:
        args.artifact = abs_join(CORPORA_DIR, '{}-crash'.format(args.TARGET))
    if not args.seed:
        args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
    return args
-def libfuzzer(args):
+def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
-    try:
+    if corpora is None:
-        args = libfuzzer_parser(args)
+        corpora = abs_join(CORPORA_DIR, target)
-    except Exception as e:
+    if artifact is None:
-        print(e)
+        artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
-        return 1
+    if seed is None:
-    target = abs_join(FUZZ_DIR, args.TARGET)
+        seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
    if extra_args is None:
        extra_args = []
-    corpora = [create(args.corpora)]
+    target = abs_join(FUZZ_DIR, target)
-    artifact = create(args.artifact)
+
-    seed = check(args.seed)
+    corpora = [create(corpora)]
    artifact = create(artifact)
    seed = check(seed)
    corpora += [artifact]
    if seed is not None:
        corpora += [seed]
    cmd = [target, '-artifact_prefix={}/'.format(artifact)]
-    cmd += corpora + args.extra
+    cmd += corpora + extra_args
    print(' '.join(cmd))
-    subprocess.call(cmd)
+    subprocess.check_call(cmd)
 def libfuzzer_cmd(args):
    try:
        args = libfuzzer_parser(args)
    except Exception as e:
        print(e)
        return 1
    libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
    return 0
@ -518,39 +580,15 @@ def afl(args):
    return 0
 def regression_parser(args):
    description = """
    Runs one or more regression tests.
    The fuzzer should have been built with with
    LIB_FUZZING_ENGINE='libregression.a'.
    Takes input from CORPORA.
    """
    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
    parser.add_argument(
        'TARGET',
        nargs='*',
        type=str,
        help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
    args = parser.parse_args(args)
    targets = set()
    for target in args.TARGET:
        if not target:
            continue
        if target == 'all':
            targets = targets.union(TARGETS)
        elif target in TARGETS:
            targets.add(target)
        else:
            raise RuntimeError('{} is not a valid target'.format(target))
    args.TARGET = list(targets)
    return args
 def regression(args):
    try:
-        args = regression_parser(args)
+        description = """
        Runs one or more regression tests.
        The fuzzer should have been built with with
        LIB_FUZZING_ENGINE='libregression.a'.
        Takes input from CORPORA.
        """
        args = targets_parser(args, description)
    except Exception as e:
        print(e)
        return 1
@ -673,6 +711,52 @@ def gen(args):
    return 0
 def minimize(args):
    try:
        description = """
        Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
        TARGET_seed_corpus. All extra args are passed to libfuzzer.
        """
        args = targets_parser(args, description)
    except Exception as e:
        print(e)
        return 1
    for target in args.TARGET:
        # Merge the corpus + anything else into the seed_corpus
        corpus = abs_join(CORPORA_DIR, target)
        seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
        extra_args = [corpus, "-merge=1"] + args.extra
        libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
        seeds = set(os.listdir(seed_corpus))
        # Copy all crashes directly into the seed_corpus if not already present
        crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
        for crash in os.listdir(crashes):
            if crash not in seeds:
                shutil.copy(abs_join(crashes, crash), seed_corpus)
                seeds.add(crash)
 def zip_cmd(args):
    try:
        description = """
        Zips up the seed corpus.
        """
        args = targets_parser(args, description)
    except Exception as e:
        print(e)
        return 1
    for target in args.TARGET:
        # Zip the seed_corpus
        seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
        seeds = [abs_join(seed_corpus, f) for f in os.listdir(seed_corpus)]
        zip_file = "{}.zip".format(seed_corpus)
        cmd = ["zip", "-q", "-j", "-9", zip_file]
        print(' '.join(cmd + [abs_join(seed_corpus, '*')]))
        subprocess.check_call(cmd + seeds)
 def short_help(args):
    name = args[0]
    print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
@ -690,6 +774,8 @@ def help(args):
    print("\tafl\t\tRun an AFL fuzzer")
    print("\tregression\tRun a regression test")
    print("\tgen\t\tGenerate a seed corpus for a fuzzer")
    print("\tminimize\tMinimize the test corpora")
    print("\tzip\t\tZip the minimized corpora up")
 def main():
@ -705,13 +791,17 @@ def main():
    if command == "build":
        return build(args)
    if command == "libfuzzer":
-        return libfuzzer(args)
+        return libfuzzer_cmd(args)
    if command == "regression":
        return regression(args)
    if command == "afl":
        return afl(args)
    if command == "gen":
        return gen(args)
    if command == "minimize":
        return minimize(args)
    if command == "zip":
        return zip_cmd(args)
    short_help(args)
    print("Error: No such command {} (pass -h for help)".format(command))
    return 1
--- a/tests/fuzz/simple_round_trip.c
+++ b/tests/fuzz/simple_round_trip.c
@ -38,10 +38,11 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
    if (FUZZ_rand(&seed) & 1) {
        ZSTD_inBuffer in = {src, srcSize, 0};
        ZSTD_outBuffer out = {compressed, compressedCapacity, 0};
        size_t err;
        ZSTD_CCtx_reset(cctx);
        FUZZ_setRandomParameters(cctx, &seed);
-        size_t const err = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end);
+        err = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end);
        if (err != 0) {
            return err;
        }