Merge branch 'dev' into newFormats

dev
Yann Collet 2017-09-26 14:22:39 -07:00
commit 9f0b8dfbe9
12 changed files with 267 additions and 89 deletions

View File

@ -21,6 +21,8 @@ matrix:
- env: Cmd='make arminstall && make aarch64fuzz'
- env: Cmd='make ppcinstall && make ppcfuzz'
- env: Cmd='make ppcinstall && make ppc64fuzz'
- env: Cmd='make -j uasanregressiontest'
- env: Cmd='make -j msanregressiontest'
git:
depth: 1

View File

@ -12,6 +12,7 @@ ZSTDDIR = lib
BUILDIR = build
ZWRAPDIR = zlibWrapper
TESTDIR = tests
FUZZDIR = $(TESTDIR)/fuzz
# Define nul output
VOID = /dev/null
@ -215,6 +216,15 @@ arm-ppc-compilation:
$(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static ZSTDRTTEST= MOREFLAGS="-Werror -Wno-attributes -static"
$(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static ZSTDRTTEST= MOREFLAGS="-m64 -static"
regressiontest:
$(MAKE) -C $(FUZZDIR) regressiontest
uasanregressiontest:
$(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=address,undefined" CXXFLAGS="-O3 -fsanitize=address,undefined"
msanregressiontest:
$(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=memory" CXXFLAGS="-O3 -fsanitize=memory"
# run UBsan with -fsanitize-recover=signed-integer-overflow
# due to a bug in UBsan when doing pointer subtraction
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63303

View File

@ -45,7 +45,7 @@ test:
parallel: true
- ? |
if [[ "$CIRCLE_NODE_INDEX" == "0" ]] ; then make ppc64build && make clean; fi &&
if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make gcc7build && make clean; fi #could add another test here
if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make gcc7build && make clean; fi
:
parallel: true
- ? |
@ -53,6 +53,11 @@ test:
if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make -C tests test-legacy test-longmatch test-symbols && make clean; fi
:
parallel: true
- ? |
if [[ "$CIRCLE_NODE_INDEX" == "0" ]] ; then make -j regressiontest && make clean; fi &&
if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then true; fi # Could add another test here
:
parallel: true
post:
- echo Circle CI tests finished

View File

@ -125,7 +125,8 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
#define MaxLit ((1<<Litbits) - 1)
#define MaxML 52
#define MaxLL 35
#define MaxOff 28
#define DefaultMaxOff 28
#define MaxOff 31
#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
#define MLFSELog 9
#define LLFSELog 9
@ -151,8 +152,8 @@ static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1,
#define ML_DEFAULTNORMLOG 6 /* for static allocation */
static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
#define OF_DEFAULTNORMLOG 5 /* for static allocation */
static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;

View File

@ -582,7 +582,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */
if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
{ U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
{ static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
U64 const scale = 62 - tableLog;
U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
U64 const vStep = 1ULL<<(scale-20);

View File

@ -1259,20 +1259,30 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
}
MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType(FSE_repeat* repeatMode,
size_t const mostFrequent, size_t nbSeq, U32 defaultNormLog)
typedef enum {
ZSTD_defaultDisallowed = 0,
ZSTD_defaultAllowed = 1
} ZSTD_defaultPolicy_e;
MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType(
FSE_repeat* repeatMode, size_t const mostFrequent, size_t nbSeq,
U32 defaultNormLog, ZSTD_defaultPolicy_e const isDefaultAllowed)
{
#define MIN_SEQ_FOR_DYNAMIC_FSE 64
#define MAX_SEQ_FOR_STATIC_FSE 1000
if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
if ((mostFrequent == nbSeq) && (!isDefaultAllowed || nbSeq > 2)) {
/* Prefer set_basic over set_rle when there are 2 or less symbols,
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
* If basic encoding isn't possible, always choose RLE.
*/
*repeatMode = FSE_repeat_check;
return set_rle;
}
if ((*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
if (isDefaultAllowed && (*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
return set_repeat;
}
if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) {
if (isDefaultAllowed && ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1))))) {
*repeatMode = FSE_repeat_valid;
return set_basic;
}
@ -1308,6 +1318,7 @@ MEM_STATIC size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
count[codeTable[nbSeq-1]]--;
nbSeq_1--;
}
assert(nbSeq_1 > 1);
CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return NCountSize;
@ -1445,7 +1456,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
/* CTable for Literal Lengths */
{ U32 max = MaxLL;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, entropy->workspace);
LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog);
LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed);
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
entropy->workspace, sizeof(entropy->workspace));
@ -1455,9 +1466,11 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
/* CTable for Offsets */
{ U32 max = MaxOff;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, entropy->workspace);
Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog);
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
ZSTD_defaultPolicy_e const defaultPolicy = max <= DefaultMaxOff ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy);
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, MaxOff,
count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
entropy->workspace, sizeof(entropy->workspace));
if (ZSTD_isError(countSize)) return countSize;
op += countSize;
@ -1465,7 +1478,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
/* CTable for MatchLengths */
{ U32 max = MaxML;
size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, entropy->workspace);
MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog);
MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed);
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
entropy->workspace, sizeof(entropy->workspace));

View File

@ -903,6 +903,15 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
* bits before reloading. This value is the maximum number of bytes we read
* after reloading when we are decoding long offets.
*/
#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
(ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
: 0)
static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
{
@ -910,7 +919,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */
U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= MaxOff, by table construction */
U32 const llBits = LL_bits[llCode];
U32 const mlBits = ML_bits[mlCode];
@ -937,7 +946,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
/* sequence */
{ size_t offset;
@ -945,8 +954,10 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
offset = 0;
else {
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
if (longOffsets) {
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN);
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 2);
assert(ofBits <= MaxOff);
if (MEM_32bits() && longOffsets) {
U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
@ -977,13 +988,17 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
seq.matchLength = ML_base[mlCode]
+ ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream);
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
BIT_reloadDStream(&seqState->DStream);
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
BIT_reloadDStream(&seqState->DStream);
/* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
seq.litLength = LL_base[llCode]
+ ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
if ( MEM_32bits()
|| (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) )
BIT_reloadDStream(&seqState->DStream);
if (MEM_32bits())
BIT_reloadDStream(&seqState->DStream);
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
@ -1143,7 +1158,6 @@ static size_t ZSTD_decompressSequences(
}
HINT_INLINE
seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
{
@ -1151,7 +1165,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */
U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= MaxOff, by table construction */
U32 const llBits = LL_bits[llCode];
U32 const mlBits = ML_bits[mlCode];
@ -1178,7 +1192,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
/* sequence */
{ size_t offset;
@ -1186,8 +1200,10 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
offset = 0;
else {
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
if (longOffsets) {
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN);
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 2);
assert(ofBits <= MaxOff);
if (MEM_32bits() && longOffsets) {
U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
@ -1217,11 +1233,16 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
}
seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream);
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
BIT_reloadDStream(&seqState->DStream);
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
BIT_reloadDStream(&seqState->DStream);
/* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
if (MEM_32bits() ||
(totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream);
if (MEM_32bits())
BIT_reloadDStream(&seqState->DStream);
{ size_t const pos = seqState->pos + seq.litLength;
seq.match = seqState->base + pos - seq.offset; /* single memory segment */

View File

@ -881,7 +881,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
frame->stats.offsetSymbolSet, 28)) {
Offtype = set_repeat;
} else if (!(RAND(seed) & 3)) {
FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
Offtype = set_basic;
} else {
size_t nbSeq_1 = nbSeq;

View File

@ -14,6 +14,13 @@ CPPFLAGS ?=
LDFLAGS ?=
ARFLAGS ?=
LIB_FUZZING_ENGINE ?= libregression.a
PYTHON ?= python
ifeq ($(shell uname), Darwin)
DOWNLOAD?=curl -L -o
else
DOWNLOAD?=wget -O
endif
CORPORA_URL_PREFIX:=https://github.com/facebook/zstd/releases/download/fuzz-corpora/
ZSTDDIR = ../../lib
PRGDIR = ../../programs
@ -48,18 +55,20 @@ FUZZ_SRC := \
FUZZ_OBJ := $(patsubst %.c,%.o, $(wildcard $(FUZZ_SRC)))
.PHONY: default all clean
.PHONY: default all clean cleanall
default: all
all: \
FUZZ_TARGETS := \
simple_round_trip \
stream_round_trip \
block_round_trip \
block_round_trip \
simple_decompress \
stream_decompress \
block_decompress
all: $(FUZZ_TARGETS)
%.o: %.c
$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $^ -c -o $@
@ -93,7 +102,25 @@ libFuzzer:
@git clone https://chromium.googlesource.com/chromium/llvm-project/llvm/lib/Fuzzer
@cd Fuzzer && ./build.sh
corpora/%_seed_corpus.zip:
@mkdir -p corpora
$(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip
corpora/%: corpora/%_seed_corpus.zip
unzip -q $^ -d $@
.PHONY: corpora
corpora: $(patsubst %,corpora/%,$(FUZZ_TARGETS))
regressiontest: corpora
CC="$(CC)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" $(PYTHON) ./fuzz.py build all
$(PYTHON) ./fuzz.py regression all
clean:
@$(MAKE) -C $(ZSTDDIR) clean
@$(RM) -f *.a *.o
@$(RM) -f simple_round_trip stream_round_trip simple_decompress stream_decompress
cleanall:
@$(RM) -rf Fuzzer
@$(RM) -rf corpora

View File

@ -1,6 +1,14 @@
# Fuzzing
Each fuzzing target can be built with multiple engines.
Zstd provides a fuzz corpus for each target that can be downloaded with
the command:
```
make corpora
```
It will download each corpus into `./corpora/TARGET`.
## fuzz.py

View File

@ -82,11 +82,39 @@ def tmpdir():
shutil.rmtree(dirpath, ignore_errors=True)
def parse_targets(in_targets):
targets = set()
for target in in_targets:
if not target:
continue
if target == 'all':
targets = targets.union(TARGETS)
elif target in TARGETS:
targets.add(target)
else:
raise RuntimeError('{} is not a valid target'.format(target))
return list(targets)
def targets_parser(args, description):
parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
parser.add_argument(
'TARGET',
nargs='*',
type=str,
help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
args, extra = parser.parse_known_args(args)
args.extra = extra
args.TARGET = parse_targets(args.TARGET)
return args
def parse_env_flags(args, flags):
"""
Look for flags set by environment variables.
"""
flags = ' '.join(flags)
san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
@ -112,6 +140,34 @@ def parse_env_flags(args, flags):
return args
def compiler_version(cc, cxx):
"""
Determines the compiler and version.
Only works for clang and gcc.
"""
cc_version_bytes = subprocess.check_output([cc, "--version"])
cxx_version_bytes = subprocess.check_output([cxx, "--version"])
if cc_version_bytes.startswith(b'clang'):
assert(cxx_version_bytes.startswith(b'clang'))
compiler = 'clang'
if cc_version_bytes.startswith(b'gcc'):
assert(cxx_version_bytes.startswith(b'g++'))
compiler = 'gcc'
version_regex = b'([0-9])+\.([0-9])+\.([0-9])+'
version_match = re.search(version_regex, cc_version_bytes)
version = tuple(int(version_match.group(i)) for i in range(1, 4))
return compiler, version
def overflow_ubsan_flags(cc, cxx):
compiler, version = compiler_version(cc, cxx)
if compiler == 'gcc':
return ['-fno-sanitize=signed-integer-overflow']
if compiler == 'clang' and version >= (5, 0, 0):
return ['-fno-sanitize=pointer-overflow']
return []
def build_parser(args):
description = """
Cleans the repository and builds a fuzz target (or all).
@ -336,7 +392,7 @@ def build(args):
if args.ubsan:
ubsan_flags = ['-fsanitize=undefined']
if not args.ubsan_pointer_overflow:
ubsan_flags += ['-fno-sanitize=pointer-overflow']
ubsan_flags += overflow_ubsan_flags(cc, cxx)
common_flags += ubsan_flags
if args.stateful_fuzzing:
@ -424,36 +480,42 @@ def libfuzzer_parser(args):
if args.TARGET and args.TARGET not in TARGETS:
raise RuntimeError('{} is not a valid target'.format(args.TARGET))
if not args.corpora:
args.corpora = abs_join(CORPORA_DIR, args.TARGET)
if not args.artifact:
args.artifact = abs_join(CORPORA_DIR, '{}-crash'.format(args.TARGET))
if not args.seed:
args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
return args
def libfuzzer(args):
try:
args = libfuzzer_parser(args)
except Exception as e:
print(e)
return 1
target = abs_join(FUZZ_DIR, args.TARGET)
def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
if corpora is None:
corpora = abs_join(CORPORA_DIR, target)
if artifact is None:
artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
if seed is None:
seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
if extra_args is None:
extra_args = []
corpora = [create(args.corpora)]
artifact = create(args.artifact)
seed = check(args.seed)
target = abs_join(FUZZ_DIR, target)
corpora = [create(corpora)]
artifact = create(artifact)
seed = check(seed)
corpora += [artifact]
if seed is not None:
corpora += [seed]
cmd = [target, '-artifact_prefix={}/'.format(artifact)]
cmd += corpora + args.extra
cmd += corpora + extra_args
print(' '.join(cmd))
subprocess.call(cmd)
subprocess.check_call(cmd)
def libfuzzer_cmd(args):
try:
args = libfuzzer_parser(args)
except Exception as e:
print(e)
return 1
libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
return 0
@ -518,39 +580,15 @@ def afl(args):
return 0
def regression_parser(args):
description = """
Runs one or more regression tests.
The fuzzer should have been built with with
LIB_FUZZING_ENGINE='libregression.a'.
Takes input from CORPORA.
"""
parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
parser.add_argument(
'TARGET',
nargs='*',
type=str,
help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
args = parser.parse_args(args)
targets = set()
for target in args.TARGET:
if not target:
continue
if target == 'all':
targets = targets.union(TARGETS)
elif target in TARGETS:
targets.add(target)
else:
raise RuntimeError('{} is not a valid target'.format(target))
args.TARGET = list(targets)
return args
def regression(args):
try:
args = regression_parser(args)
description = """
Runs one or more regression tests.
The fuzzer should have been built with with
LIB_FUZZING_ENGINE='libregression.a'.
Takes input from CORPORA.
"""
args = targets_parser(args, description)
except Exception as e:
print(e)
return 1
@ -673,6 +711,52 @@ def gen(args):
return 0
def minimize(args):
try:
description = """
Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
TARGET_seed_corpus. All extra args are passed to libfuzzer.
"""
args = targets_parser(args, description)
except Exception as e:
print(e)
return 1
for target in args.TARGET:
# Merge the corpus + anything else into the seed_corpus
corpus = abs_join(CORPORA_DIR, target)
seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
extra_args = [corpus, "-merge=1"] + args.extra
libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
seeds = set(os.listdir(seed_corpus))
# Copy all crashes directly into the seed_corpus if not already present
crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
for crash in os.listdir(crashes):
if crash not in seeds:
shutil.copy(abs_join(crashes, crash), seed_corpus)
seeds.add(crash)
def zip_cmd(args):
try:
description = """
Zips up the seed corpus.
"""
args = targets_parser(args, description)
except Exception as e:
print(e)
return 1
for target in args.TARGET:
# Zip the seed_corpus
seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
seeds = [abs_join(seed_corpus, f) for f in os.listdir(seed_corpus)]
zip_file = "{}.zip".format(seed_corpus)
cmd = ["zip", "-q", "-j", "-9", zip_file]
print(' '.join(cmd + [abs_join(seed_corpus, '*')]))
subprocess.check_call(cmd + seeds)
def short_help(args):
name = args[0]
print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
@ -690,6 +774,8 @@ def help(args):
print("\tafl\t\tRun an AFL fuzzer")
print("\tregression\tRun a regression test")
print("\tgen\t\tGenerate a seed corpus for a fuzzer")
print("\tminimize\tMinimize the test corpora")
print("\tzip\t\tZip the minimized corpora up")
def main():
@ -705,13 +791,17 @@ def main():
if command == "build":
return build(args)
if command == "libfuzzer":
return libfuzzer(args)
return libfuzzer_cmd(args)
if command == "regression":
return regression(args)
if command == "afl":
return afl(args)
if command == "gen":
return gen(args)
if command == "minimize":
return minimize(args)
if command == "zip":
return zip_cmd(args)
short_help(args)
print("Error: No such command {} (pass -h for help)".format(command))
return 1

View File

@ -38,10 +38,11 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
if (FUZZ_rand(&seed) & 1) {
ZSTD_inBuffer in = {src, srcSize, 0};
ZSTD_outBuffer out = {compressed, compressedCapacity, 0};
size_t err;
ZSTD_CCtx_reset(cctx);
FUZZ_setRandomParameters(cctx, &seed);
size_t const err = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end);
err = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end);
if (err != 0) {
return err;
}