From a71bbba7bed5cb7f0e4bf6c793a314f29edf6ebd Mon Sep 17 00:00:00 2001 From: Dario Pavlovic Date: Mon, 9 Sep 2019 08:43:22 -0700 Subject: [PATCH 1/3] [Fuzz] Improve data generation #1723 --- tests/fuzz/Makefile | 4 +-- tests/fuzz/README.md | 2 +- tests/fuzz/fuzz_data_producer.c | 57 +++++++++++++++++++++++++++++++++ tests/fuzz/fuzz_data_producer.h | 43 +++++++++++++++++++++++++ tests/fuzz/simple_decompress.c | 7 ++-- 5 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 tests/fuzz/fuzz_data_producer.c create mode 100644 tests/fuzz/fuzz_data_producer.h diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 08dedd66..83837e62 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -40,8 +40,8 @@ FUZZ_LDFLAGS := -pthread $(LDFLAGS) FUZZ_ARFLAGS := $(ARFLAGS) FUZZ_TARGET_FLAGS = $(FUZZ_CPPFLAGS) $(FUZZ_CXXFLAGS) $(FUZZ_LDFLAGS) -FUZZ_HEADERS := fuzz_helpers.h fuzz.h zstd_helpers.h -FUZZ_SRC := $(PRGDIR)/util.c zstd_helpers.c +FUZZ_HEADERS := fuzz_helpers.h fuzz.h zstd_helpers.h fuzz_data_producer.h +FUZZ_SRC := $(PRGDIR)/util.c zstd_helpers.c fuzz_data_producer.c ZSTDCOMMON_SRC := $(ZSTDDIR)/common/*.c ZSTDCOMP_SRC := $(ZSTDDIR)/compress/*.c diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md index 856a57f8..71afa406 100644 --- a/tests/fuzz/README.md +++ b/tests/fuzz/README.md @@ -90,7 +90,7 @@ CC=afl-clang CXX=afl-clang++ ./fuzz.py build all --enable-asan --enable-ubsan ## Regression Testing -The regression rest supports the `all` target to run all the fuzzers in one +The regression test supports the `all` target to run all the fuzzers in one command. ``` diff --git a/tests/fuzz/fuzz_data_producer.c b/tests/fuzz/fuzz_data_producer.c new file mode 100644 index 00000000..a083f636 --- /dev/null +++ b/tests/fuzz/fuzz_data_producer.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +#include "fuzz_data_producer.h" + +struct FUZZ_dataProducer_s{ + const uint8_t *data; + size_t size; +}; + +FUZZ_dataProducer_t *FUZZ_dataProducer_create(const uint8_t *data, size_t size) { + FUZZ_dataProducer_t *producer = malloc(sizeof(FUZZ_dataProducer_t)); + + FUZZ_ASSERT(producer != NULL); + + producer->data = data; + producer->size = size; + return producer; +} + +void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer) { free(producer); } + +uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min, + uint32_t max) { + FUZZ_ASSERT(min <= max); + + uint32_t range = max - min; + uint32_t rolling = range; + uint32_t result = 0; + + while (rolling > 0 && producer->size > 0) { + uint8_t next = *(producer->data + producer->size - 1); + producer->size -= 1; + result = (result << 8) | next; + rolling >>= 8; + } + + if (range == 0xffffffff) { + return result; + } + + return min + result % (range + 1); +} + +uint32_t FUZZ_dataProducer_uint32(FUZZ_dataProducer_t *producer) { + return FUZZ_dataProducer_uint32Range(producer, 0, 0xffffffff); +} + +size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer){ + return producer->size; +} diff --git a/tests/fuzz/fuzz_data_producer.h b/tests/fuzz/fuzz_data_producer.h new file mode 100644 index 00000000..4a12e130 --- /dev/null +++ b/tests/fuzz/fuzz_data_producer.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * Helper APIs for generating random data from input data stream. + */ + +#ifndef FUZZ_DATA_PRODUCER_H +#define FUZZ_DATA_PRODUCER_H + +#include +#include +#include +#include + +#include "fuzz_helpers.h" + +/* Struct used for maintaining the state of the data */ +typedef struct FUZZ_dataProducer_s FUZZ_dataProducer_t; + +/* Returns a data producer state struct. Use for producer initialization. */ +FUZZ_dataProducer_t *FUZZ_dataProducer_create(const uint8_t *data, size_t size); + +/* Frees the data producer */ +void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer); + +/* Returns value between [min, max] */ +uint32_t FUZZ_dataProducer_uint32Range(FUZZ_dataProducer_t *producer, uint32_t min, + uint32_t max); + +/* Returns a uint32 value */ +uint32_t FUZZ_dataProducer_uint32(FUZZ_dataProducer_t *producer); + +/* Returns the size of the remaining bytes of data in the producer */ +size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer); + +#endif // FUZZ_DATA_PRODUCER_H diff --git a/tests/fuzz/simple_decompress.c b/tests/fuzz/simple_decompress.c index af3f302b..56ebb93e 100644 --- a/tests/fuzz/simple_decompress.c +++ b/tests/fuzz/simple_decompress.c @@ -17,13 +17,14 @@ #include #include "fuzz_helpers.h" #include "zstd.h" +#include "fuzz_data_producer.h" static ZSTD_DCtx *dctx = NULL; int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { - uint32_t seed = FUZZ_seed(&src, &size); + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); int i; if (!dctx) { dctx = ZSTD_createDCtx(); @@ -31,13 +32,15 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) } /* Run it 10 times over 10 output sizes. Reuse the context. */ for (i = 0; i < 10; ++i) { - size_t const bufSize = FUZZ_rand32(&seed, 0, 2 * size); + size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 2 * size); void* rBuf = malloc(bufSize); FUZZ_ASSERT(rBuf); ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size); free(rBuf); } + FUZZ_dataProducer_free(producer); + #ifndef STATEFUL_FUZZING ZSTD_freeDCtx(dctx); dctx = NULL; #endif From 3932fcfebc077876bf57eab7d7ee2f1a542c4303 Mon Sep 17 00:00:00 2001 From: Dario Pavlovic Date: Mon, 9 Sep 2019 15:39:04 -0700 Subject: [PATCH 2/3] Fixing issues with double usage of data. --- tests/fuzz/simple_decompress.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/tests/fuzz/simple_decompress.c b/tests/fuzz/simple_decompress.c index 56ebb93e..803f7f86 100644 --- a/tests/fuzz/simple_decompress.c +++ b/tests/fuzz/simple_decompress.c @@ -23,23 +23,26 @@ static ZSTD_DCtx *dctx = NULL; int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); - FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); - int i; - if (!dctx) { - dctx = ZSTD_createDCtx(); - FUZZ_ASSERT(dctx); - } - /* Run it 10 times over 10 output sizes. Reuse the context. */ - for (i = 0; i < 10; ++i) { - size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 2 * size); - void* rBuf = malloc(bufSize); - FUZZ_ASSERT(rBuf); - ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size); - free(rBuf); - } + int i; + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } - FUZZ_dataProducer_free(producer); + size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 2 * size); + void* rBuf = malloc(bufSize); + FUZZ_ASSERT(rBuf); + + /* Restrict to remaining data. If we run out of data while generating params, + we should still continue and let decompression happen on empty data. */ + size = FUZZ_dataProducer_remainingBytes(producer); + + ZSTD_decompressDCtx(dctx, rBuf, bufSize, src, size); + free(rBuf); + + FUZZ_dataProducer_free(producer); #ifndef STATEFUL_FUZZING ZSTD_freeDCtx(dctx); dctx = NULL; From ea1ad123da7f760cac8d57bbaf81d367af17aebb Mon Sep 17 00:00:00 2001 From: Dario Pavlovic Date: Mon, 9 Sep 2019 16:13:24 -0700 Subject: [PATCH 3/3] Addressing nits --- tests/fuzz/fuzz_data_producer.h | 4 ++++ tests/fuzz/simple_decompress.c | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/fuzz/fuzz_data_producer.h b/tests/fuzz/fuzz_data_producer.h index 4a12e130..4fcf6fd4 100644 --- a/tests/fuzz/fuzz_data_producer.h +++ b/tests/fuzz/fuzz_data_producer.h @@ -9,6 +9,10 @@ /** * Helper APIs for generating random data from input data stream. + The producer reads bytes from the end of the input and appends them together + to generate a random number in the requested range. If it runs out of input + data, it will keep returning the same value (min) over and over again. + */ #ifndef FUZZ_DATA_PRODUCER_H diff --git a/tests/fuzz/simple_decompress.c b/tests/fuzz/simple_decompress.c index 803f7f86..a68813ee 100644 --- a/tests/fuzz/simple_decompress.c +++ b/tests/fuzz/simple_decompress.c @@ -31,8 +31,8 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) FUZZ_ASSERT(dctx); } - size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 2 * size); - void* rBuf = malloc(bufSize); + size_t const bufSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size); + void *rBuf = malloc(bufSize); FUZZ_ASSERT(rBuf); /* Restrict to remaining data. If we run out of data while generating params,