Merge branch 'dev' into newFormats
commit
9f0b8dfbe9
|
@ -21,6 +21,8 @@ matrix:
|
|||
- env: Cmd='make arminstall && make aarch64fuzz'
|
||||
- env: Cmd='make ppcinstall && make ppcfuzz'
|
||||
- env: Cmd='make ppcinstall && make ppc64fuzz'
|
||||
- env: Cmd='make -j uasanregressiontest'
|
||||
- env: Cmd='make -j msanregressiontest'
|
||||
|
||||
git:
|
||||
depth: 1
|
||||
|
|
10
Makefile
10
Makefile
|
@ -12,6 +12,7 @@ ZSTDDIR = lib
|
|||
BUILDIR = build
|
||||
ZWRAPDIR = zlibWrapper
|
||||
TESTDIR = tests
|
||||
FUZZDIR = $(TESTDIR)/fuzz
|
||||
|
||||
# Define nul output
|
||||
VOID = /dev/null
|
||||
|
@ -215,6 +216,15 @@ arm-ppc-compilation:
|
|||
$(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static ZSTDRTTEST= MOREFLAGS="-Werror -Wno-attributes -static"
|
||||
$(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static ZSTDRTTEST= MOREFLAGS="-m64 -static"
|
||||
|
||||
regressiontest:
|
||||
$(MAKE) -C $(FUZZDIR) regressiontest
|
||||
|
||||
uasanregressiontest:
|
||||
$(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=address,undefined" CXXFLAGS="-O3 -fsanitize=address,undefined"
|
||||
|
||||
msanregressiontest:
|
||||
$(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=memory" CXXFLAGS="-O3 -fsanitize=memory"
|
||||
|
||||
# run UBsan with -fsanitize-recover=signed-integer-overflow
|
||||
# due to a bug in UBsan when doing pointer subtraction
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63303
|
||||
|
|
|
@ -45,7 +45,7 @@ test:
|
|||
parallel: true
|
||||
- ? |
|
||||
if [[ "$CIRCLE_NODE_INDEX" == "0" ]] ; then make ppc64build && make clean; fi &&
|
||||
if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make gcc7build && make clean; fi #could add another test here
|
||||
if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make gcc7build && make clean; fi
|
||||
:
|
||||
parallel: true
|
||||
- ? |
|
||||
|
@ -53,6 +53,11 @@ test:
|
|||
if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make -C tests test-legacy test-longmatch test-symbols && make clean; fi
|
||||
:
|
||||
parallel: true
|
||||
- ? |
|
||||
if [[ "$CIRCLE_NODE_INDEX" == "0" ]] ; then make -j regressiontest && make clean; fi &&
|
||||
if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then true; fi # Could add another test here
|
||||
:
|
||||
parallel: true
|
||||
|
||||
post:
|
||||
- echo Circle CI tests finished
|
||||
|
|
|
@ -125,7 +125,8 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
|
|||
#define MaxLit ((1<<Litbits) - 1)
|
||||
#define MaxML 52
|
||||
#define MaxLL 35
|
||||
#define MaxOff 28
|
||||
#define DefaultMaxOff 28
|
||||
#define MaxOff 31
|
||||
#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
|
||||
#define MLFSELog 9
|
||||
#define LLFSELog 9
|
||||
|
@ -151,8 +152,8 @@ static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1,
|
|||
#define ML_DEFAULTNORMLOG 6 /* for static allocation */
|
||||
static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
|
||||
|
||||
static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
|
||||
static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
|
||||
#define OF_DEFAULTNORMLOG 5 /* for static allocation */
|
||||
static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
|
||||
|
||||
|
|
|
@ -582,7 +582,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
|||
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */
|
||||
if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
|
||||
|
||||
{ U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
|
||||
{ static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
|
||||
U64 const scale = 62 - tableLog;
|
||||
U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
|
||||
U64 const vStep = 1ULL<<(scale-20);
|
||||
|
|
|
@ -1259,20 +1259,30 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
|
|||
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
|
||||
}
|
||||
|
||||
MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType(FSE_repeat* repeatMode,
|
||||
size_t const mostFrequent, size_t nbSeq, U32 defaultNormLog)
|
||||
typedef enum {
|
||||
ZSTD_defaultDisallowed = 0,
|
||||
ZSTD_defaultAllowed = 1
|
||||
} ZSTD_defaultPolicy_e;
|
||||
|
||||
MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType(
|
||||
FSE_repeat* repeatMode, size_t const mostFrequent, size_t nbSeq,
|
||||
U32 defaultNormLog, ZSTD_defaultPolicy_e const isDefaultAllowed)
|
||||
{
|
||||
#define MIN_SEQ_FOR_DYNAMIC_FSE 64
|
||||
#define MAX_SEQ_FOR_STATIC_FSE 1000
|
||||
|
||||
if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
|
||||
ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
|
||||
if ((mostFrequent == nbSeq) && (!isDefaultAllowed || nbSeq > 2)) {
|
||||
/* Prefer set_basic over set_rle when there are 2 or less symbols,
|
||||
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
|
||||
* If basic encoding isn't possible, always choose RLE.
|
||||
*/
|
||||
*repeatMode = FSE_repeat_check;
|
||||
return set_rle;
|
||||
}
|
||||
if ((*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
|
||||
if (isDefaultAllowed && (*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
|
||||
return set_repeat;
|
||||
}
|
||||
if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) {
|
||||
if (isDefaultAllowed && ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1))))) {
|
||||
*repeatMode = FSE_repeat_valid;
|
||||
return set_basic;
|
||||
}
|
||||
|
@ -1308,6 +1318,7 @@ MEM_STATIC size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
|||
count[codeTable[nbSeq-1]]--;
|
||||
nbSeq_1--;
|
||||
}
|
||||
assert(nbSeq_1 > 1);
|
||||
CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
|
||||
if (FSE_isError(NCountSize)) return NCountSize;
|
||||
|
@ -1445,7 +1456,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
|||
/* CTable for Literal Lengths */
|
||||
{ U32 max = MaxLL;
|
||||
size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, entropy->workspace);
|
||||
LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog);
|
||||
LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed);
|
||||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
|
||||
count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
|
||||
entropy->workspace, sizeof(entropy->workspace));
|
||||
|
@ -1455,9 +1466,11 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
|||
/* CTable for Offsets */
|
||||
{ U32 max = MaxOff;
|
||||
size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, entropy->workspace);
|
||||
Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog);
|
||||
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
|
||||
ZSTD_defaultPolicy_e const defaultPolicy = max <= DefaultMaxOff ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
|
||||
Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy);
|
||||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
|
||||
count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, MaxOff,
|
||||
count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
||||
entropy->workspace, sizeof(entropy->workspace));
|
||||
if (ZSTD_isError(countSize)) return countSize;
|
||||
op += countSize;
|
||||
|
@ -1465,7 +1478,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
|||
/* CTable for MatchLengths */
|
||||
{ U32 max = MaxML;
|
||||
size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, entropy->workspace);
|
||||
MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog);
|
||||
MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed);
|
||||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
|
||||
count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
|
||||
entropy->workspace, sizeof(entropy->workspace));
|
||||
|
|
|
@ -903,6 +903,15 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
|
|||
|
||||
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
|
||||
|
||||
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
|
||||
* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
|
||||
* bits before reloading. This value is the maximum number of bytes we read
|
||||
* after reloading when we are decoding long offets.
|
||||
*/
|
||||
#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
|
||||
(ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
|
||||
? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
|
||||
: 0)
|
||||
|
||||
static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
||||
{
|
||||
|
@ -910,7 +919,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
|
|||
|
||||
U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
|
||||
U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
|
||||
U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */
|
||||
U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= MaxOff, by table construction */
|
||||
|
||||
U32 const llBits = LL_bits[llCode];
|
||||
U32 const mlBits = ML_bits[mlCode];
|
||||
|
@ -937,7 +946,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
|
|||
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
|
||||
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
|
||||
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
||||
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
|
||||
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
||||
|
||||
/* sequence */
|
||||
{ size_t offset;
|
||||
|
@ -945,8 +954,10 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
|
|||
offset = 0;
|
||||
else {
|
||||
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
||||
if (longOffsets) {
|
||||
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN);
|
||||
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 2);
|
||||
assert(ofBits <= MaxOff);
|
||||
if (MEM_32bits() && longOffsets) {
|
||||
U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
|
||||
offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
||||
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
|
||||
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
||||
|
@ -977,13 +988,17 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
|
|||
|
||||
seq.matchLength = ML_base[mlCode]
|
||||
+ ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
|
||||
if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream);
|
||||
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
/* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
|
||||
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
||||
|
||||
seq.litLength = LL_base[llCode]
|
||||
+ ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
|
||||
if ( MEM_32bits()
|
||||
|| (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) )
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
if (MEM_32bits())
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
|
||||
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
|
||||
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
|
||||
|
@ -1143,7 +1158,6 @@ static size_t ZSTD_decompressSequences(
|
|||
}
|
||||
|
||||
|
||||
|
||||
HINT_INLINE
|
||||
seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
|
||||
{
|
||||
|
@ -1151,7 +1165,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
|
|||
|
||||
U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
|
||||
U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
|
||||
U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */
|
||||
U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= MaxOff, by table construction */
|
||||
|
||||
U32 const llBits = LL_bits[llCode];
|
||||
U32 const mlBits = ML_bits[mlCode];
|
||||
|
@ -1178,7 +1192,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
|
|||
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
|
||||
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
|
||||
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
||||
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
|
||||
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
||||
|
||||
/* sequence */
|
||||
{ size_t offset;
|
||||
|
@ -1186,8 +1200,10 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
|
|||
offset = 0;
|
||||
else {
|
||||
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
|
||||
if (longOffsets) {
|
||||
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN);
|
||||
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 2);
|
||||
assert(ofBits <= MaxOff);
|
||||
if (MEM_32bits() && longOffsets) {
|
||||
U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
|
||||
offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
|
||||
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
|
||||
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
|
||||
|
@ -1217,11 +1233,16 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
|
|||
}
|
||||
|
||||
seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
|
||||
if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream);
|
||||
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
/* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
|
||||
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
|
||||
|
||||
seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
|
||||
if (MEM_32bits() ||
|
||||
(totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream);
|
||||
if (MEM_32bits())
|
||||
BIT_reloadDStream(&seqState->DStream);
|
||||
|
||||
{ size_t const pos = seqState->pos + seq.litLength;
|
||||
seq.match = seqState->base + pos - seq.offset; /* single memory segment */
|
||||
|
|
|
@ -881,7 +881,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
|
|||
frame->stats.offsetSymbolSet, 28)) {
|
||||
Offtype = set_repeat;
|
||||
} else if (!(RAND(seed) & 3)) {
|
||||
FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
|
||||
FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
|
||||
Offtype = set_basic;
|
||||
} else {
|
||||
size_t nbSeq_1 = nbSeq;
|
||||
|
|
|
@ -14,6 +14,13 @@ CPPFLAGS ?=
|
|||
LDFLAGS ?=
|
||||
ARFLAGS ?=
|
||||
LIB_FUZZING_ENGINE ?= libregression.a
|
||||
PYTHON ?= python
|
||||
ifeq ($(shell uname), Darwin)
|
||||
DOWNLOAD?=curl -L -o
|
||||
else
|
||||
DOWNLOAD?=wget -O
|
||||
endif
|
||||
CORPORA_URL_PREFIX:=https://github.com/facebook/zstd/releases/download/fuzz-corpora/
|
||||
|
||||
ZSTDDIR = ../../lib
|
||||
PRGDIR = ../../programs
|
||||
|
@ -48,18 +55,20 @@ FUZZ_SRC := \
|
|||
FUZZ_OBJ := $(patsubst %.c,%.o, $(wildcard $(FUZZ_SRC)))
|
||||
|
||||
|
||||
.PHONY: default all clean
|
||||
.PHONY: default all clean cleanall
|
||||
|
||||
default: all
|
||||
|
||||
all: \
|
||||
FUZZ_TARGETS := \
|
||||
simple_round_trip \
|
||||
stream_round_trip \
|
||||
block_round_trip \
|
||||
block_round_trip \
|
||||
simple_decompress \
|
||||
stream_decompress \
|
||||
block_decompress
|
||||
|
||||
all: $(FUZZ_TARGETS)
|
||||
|
||||
%.o: %.c
|
||||
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $^ -c -o $@
|
||||
|
||||
|
@ -93,7 +102,25 @@ libFuzzer:
|
|||
@git clone https://chromium.googlesource.com/chromium/llvm-project/llvm/lib/Fuzzer
|
||||
@cd Fuzzer && ./build.sh
|
||||
|
||||
corpora/%_seed_corpus.zip:
|
||||
@mkdir -p corpora
|
||||
$(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip
|
||||
|
||||
corpora/%: corpora/%_seed_corpus.zip
|
||||
unzip -q $^ -d $@
|
||||
|
||||
.PHONY: corpora
|
||||
corpora: $(patsubst %,corpora/%,$(FUZZ_TARGETS))
|
||||
|
||||
regressiontest: corpora
|
||||
CC="$(CC)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" $(PYTHON) ./fuzz.py build all
|
||||
$(PYTHON) ./fuzz.py regression all
|
||||
|
||||
clean:
|
||||
@$(MAKE) -C $(ZSTDDIR) clean
|
||||
@$(RM) -f *.a *.o
|
||||
@$(RM) -f simple_round_trip stream_round_trip simple_decompress stream_decompress
|
||||
|
||||
cleanall:
|
||||
@$(RM) -rf Fuzzer
|
||||
@$(RM) -rf corpora
|
||||
|
|
|
@ -1,6 +1,14 @@
|
|||
# Fuzzing
|
||||
|
||||
Each fuzzing target can be built with multiple engines.
|
||||
Zstd provides a fuzz corpus for each target that can be downloaded with
|
||||
the command:
|
||||
|
||||
```
|
||||
make corpora
|
||||
```
|
||||
|
||||
It will download each corpus into `./corpora/TARGET`.
|
||||
|
||||
## fuzz.py
|
||||
|
||||
|
|
|
@ -82,11 +82,39 @@ def tmpdir():
|
|||
shutil.rmtree(dirpath, ignore_errors=True)
|
||||
|
||||
|
||||
def parse_targets(in_targets):
|
||||
targets = set()
|
||||
for target in in_targets:
|
||||
if not target:
|
||||
continue
|
||||
if target == 'all':
|
||||
targets = targets.union(TARGETS)
|
||||
elif target in TARGETS:
|
||||
targets.add(target)
|
||||
else:
|
||||
raise RuntimeError('{} is not a valid target'.format(target))
|
||||
return list(targets)
|
||||
|
||||
|
||||
def targets_parser(args, description):
|
||||
parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
|
||||
parser.add_argument(
|
||||
'TARGET',
|
||||
nargs='*',
|
||||
type=str,
|
||||
help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
|
||||
args, extra = parser.parse_known_args(args)
|
||||
args.extra = extra
|
||||
|
||||
args.TARGET = parse_targets(args.TARGET)
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def parse_env_flags(args, flags):
|
||||
"""
|
||||
Look for flags set by environment variables.
|
||||
"""
|
||||
flags = ' '.join(flags)
|
||||
san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
|
||||
nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
|
||||
|
||||
|
@ -112,6 +140,34 @@ def parse_env_flags(args, flags):
|
|||
return args
|
||||
|
||||
|
||||
def compiler_version(cc, cxx):
|
||||
"""
|
||||
Determines the compiler and version.
|
||||
Only works for clang and gcc.
|
||||
"""
|
||||
cc_version_bytes = subprocess.check_output([cc, "--version"])
|
||||
cxx_version_bytes = subprocess.check_output([cxx, "--version"])
|
||||
if cc_version_bytes.startswith(b'clang'):
|
||||
assert(cxx_version_bytes.startswith(b'clang'))
|
||||
compiler = 'clang'
|
||||
if cc_version_bytes.startswith(b'gcc'):
|
||||
assert(cxx_version_bytes.startswith(b'g++'))
|
||||
compiler = 'gcc'
|
||||
version_regex = b'([0-9])+\.([0-9])+\.([0-9])+'
|
||||
version_match = re.search(version_regex, cc_version_bytes)
|
||||
version = tuple(int(version_match.group(i)) for i in range(1, 4))
|
||||
return compiler, version
|
||||
|
||||
|
||||
def overflow_ubsan_flags(cc, cxx):
|
||||
compiler, version = compiler_version(cc, cxx)
|
||||
if compiler == 'gcc':
|
||||
return ['-fno-sanitize=signed-integer-overflow']
|
||||
if compiler == 'clang' and version >= (5, 0, 0):
|
||||
return ['-fno-sanitize=pointer-overflow']
|
||||
return []
|
||||
|
||||
|
||||
def build_parser(args):
|
||||
description = """
|
||||
Cleans the repository and builds a fuzz target (or all).
|
||||
|
@ -336,7 +392,7 @@ def build(args):
|
|||
if args.ubsan:
|
||||
ubsan_flags = ['-fsanitize=undefined']
|
||||
if not args.ubsan_pointer_overflow:
|
||||
ubsan_flags += ['-fno-sanitize=pointer-overflow']
|
||||
ubsan_flags += overflow_ubsan_flags(cc, cxx)
|
||||
common_flags += ubsan_flags
|
||||
|
||||
if args.stateful_fuzzing:
|
||||
|
@ -424,36 +480,42 @@ def libfuzzer_parser(args):
|
|||
if args.TARGET and args.TARGET not in TARGETS:
|
||||
raise RuntimeError('{} is not a valid target'.format(args.TARGET))
|
||||
|
||||
if not args.corpora:
|
||||
args.corpora = abs_join(CORPORA_DIR, args.TARGET)
|
||||
if not args.artifact:
|
||||
args.artifact = abs_join(CORPORA_DIR, '{}-crash'.format(args.TARGET))
|
||||
if not args.seed:
|
||||
args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def libfuzzer(args):
|
||||
try:
|
||||
args = libfuzzer_parser(args)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return 1
|
||||
target = abs_join(FUZZ_DIR, args.TARGET)
|
||||
def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
|
||||
if corpora is None:
|
||||
corpora = abs_join(CORPORA_DIR, target)
|
||||
if artifact is None:
|
||||
artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
|
||||
if seed is None:
|
||||
seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
|
||||
if extra_args is None:
|
||||
extra_args = []
|
||||
|
||||
corpora = [create(args.corpora)]
|
||||
artifact = create(args.artifact)
|
||||
seed = check(args.seed)
|
||||
target = abs_join(FUZZ_DIR, target)
|
||||
|
||||
corpora = [create(corpora)]
|
||||
artifact = create(artifact)
|
||||
seed = check(seed)
|
||||
|
||||
corpora += [artifact]
|
||||
if seed is not None:
|
||||
corpora += [seed]
|
||||
|
||||
cmd = [target, '-artifact_prefix={}/'.format(artifact)]
|
||||
cmd += corpora + args.extra
|
||||
cmd += corpora + extra_args
|
||||
print(' '.join(cmd))
|
||||
subprocess.call(cmd)
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
|
||||
def libfuzzer_cmd(args):
|
||||
try:
|
||||
args = libfuzzer_parser(args)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return 1
|
||||
libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
|
||||
return 0
|
||||
|
||||
|
||||
|
@ -518,39 +580,15 @@ def afl(args):
|
|||
return 0
|
||||
|
||||
|
||||
def regression_parser(args):
|
||||
description = """
|
||||
Runs one or more regression tests.
|
||||
The fuzzer should have been built with with
|
||||
LIB_FUZZING_ENGINE='libregression.a'.
|
||||
Takes input from CORPORA.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
|
||||
parser.add_argument(
|
||||
'TARGET',
|
||||
nargs='*',
|
||||
type=str,
|
||||
help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
|
||||
args = parser.parse_args(args)
|
||||
|
||||
targets = set()
|
||||
for target in args.TARGET:
|
||||
if not target:
|
||||
continue
|
||||
if target == 'all':
|
||||
targets = targets.union(TARGETS)
|
||||
elif target in TARGETS:
|
||||
targets.add(target)
|
||||
else:
|
||||
raise RuntimeError('{} is not a valid target'.format(target))
|
||||
args.TARGET = list(targets)
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def regression(args):
|
||||
try:
|
||||
args = regression_parser(args)
|
||||
description = """
|
||||
Runs one or more regression tests.
|
||||
The fuzzer should have been built with with
|
||||
LIB_FUZZING_ENGINE='libregression.a'.
|
||||
Takes input from CORPORA.
|
||||
"""
|
||||
args = targets_parser(args, description)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return 1
|
||||
|
@ -673,6 +711,52 @@ def gen(args):
|
|||
return 0
|
||||
|
||||
|
||||
def minimize(args):
|
||||
try:
|
||||
description = """
|
||||
Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
|
||||
TARGET_seed_corpus. All extra args are passed to libfuzzer.
|
||||
"""
|
||||
args = targets_parser(args, description)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return 1
|
||||
|
||||
for target in args.TARGET:
|
||||
# Merge the corpus + anything else into the seed_corpus
|
||||
corpus = abs_join(CORPORA_DIR, target)
|
||||
seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
|
||||
extra_args = [corpus, "-merge=1"] + args.extra
|
||||
libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
|
||||
seeds = set(os.listdir(seed_corpus))
|
||||
# Copy all crashes directly into the seed_corpus if not already present
|
||||
crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
|
||||
for crash in os.listdir(crashes):
|
||||
if crash not in seeds:
|
||||
shutil.copy(abs_join(crashes, crash), seed_corpus)
|
||||
seeds.add(crash)
|
||||
|
||||
|
||||
def zip_cmd(args):
|
||||
try:
|
||||
description = """
|
||||
Zips up the seed corpus.
|
||||
"""
|
||||
args = targets_parser(args, description)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return 1
|
||||
|
||||
for target in args.TARGET:
|
||||
# Zip the seed_corpus
|
||||
seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
|
||||
seeds = [abs_join(seed_corpus, f) for f in os.listdir(seed_corpus)]
|
||||
zip_file = "{}.zip".format(seed_corpus)
|
||||
cmd = ["zip", "-q", "-j", "-9", zip_file]
|
||||
print(' '.join(cmd + [abs_join(seed_corpus, '*')]))
|
||||
subprocess.check_call(cmd + seeds)
|
||||
|
||||
|
||||
def short_help(args):
|
||||
name = args[0]
|
||||
print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
|
||||
|
@ -690,6 +774,8 @@ def help(args):
|
|||
print("\tafl\t\tRun an AFL fuzzer")
|
||||
print("\tregression\tRun a regression test")
|
||||
print("\tgen\t\tGenerate a seed corpus for a fuzzer")
|
||||
print("\tminimize\tMinimize the test corpora")
|
||||
print("\tzip\t\tZip the minimized corpora up")
|
||||
|
||||
|
||||
def main():
|
||||
|
@ -705,13 +791,17 @@ def main():
|
|||
if command == "build":
|
||||
return build(args)
|
||||
if command == "libfuzzer":
|
||||
return libfuzzer(args)
|
||||
return libfuzzer_cmd(args)
|
||||
if command == "regression":
|
||||
return regression(args)
|
||||
if command == "afl":
|
||||
return afl(args)
|
||||
if command == "gen":
|
||||
return gen(args)
|
||||
if command == "minimize":
|
||||
return minimize(args)
|
||||
if command == "zip":
|
||||
return zip_cmd(args)
|
||||
short_help(args)
|
||||
print("Error: No such command {} (pass -h for help)".format(command))
|
||||
return 1
|
||||
|
|
|
@ -38,10 +38,11 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
|
|||
if (FUZZ_rand(&seed) & 1) {
|
||||
ZSTD_inBuffer in = {src, srcSize, 0};
|
||||
ZSTD_outBuffer out = {compressed, compressedCapacity, 0};
|
||||
size_t err;
|
||||
|
||||
ZSTD_CCtx_reset(cctx);
|
||||
FUZZ_setRandomParameters(cctx, &seed);
|
||||
size_t const err = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end);
|
||||
err = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue