/* * Copyright (c) 2016-2020, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. */ /** * This fuzz target performs a zstd round-trip test (compress & decompress), * compares the result with the original, and calls abort() on corruption. */ #define ZSTD_STATIC_LINKING_ONLY #include #include #include #include #include #include "fuzz_helpers.h" #include "zstd_helpers.h" #include "fuzz_data_producer.h" static ZSTD_CCtx *cctx = NULL; static ZSTD_DCtx *dctx = NULL; #define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 25) /* Allow up to 32MB generated data */ #define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */ #define ZSTD_FUZZ_GENERATED_LITERALS_MAXSIZE (1 << 19) /* Allow up to 512KB literals buffer */ #define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << 18) /* Allow up to a 256KB dict */ #define ZSTD_FUZZ_GENERATE_REPCODES 0 /* Disabled repcode fuzzing for now */ /* Make a pseudorandom string - this simple function exists to avoid * taking a dependency on datagen.h to have RDG_genBuffer(). We don't need anything fancy. */ static char *generatePseudoRandomString(char *str, size_t size) { const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_"; if (size) { --size; for (size_t n = 0; n < size; n++) { int key = rand() % (int) (sizeof charset - 1); str[n] = charset[key]; } } return str; } /* Returns size of source buffer */ static size_t decodeSequences(void* dst, const ZSTD_Sequence* generatedSequences, size_t nbSequences, const void* literals, size_t literalsSize, const void* dict, size_t dictSize) { const uint8_t* ip = literals; const uint8_t* dictPtr = dict; uint8_t* op = dst; size_t generatedSrcBufferSize = 0; size_t bytesWritten = 0; /* Note that src is a literals buffer */ for (size_t i = 0; i < nbSequences; ++i) { assert(generatedSequences[i].matchLength != 0); assert(generatedSequences[i].offset != 0); ZSTD_memcpy(op, ip, generatedSequences[i].litLength); bytesWritten += generatedSequences[i].litLength; op += generatedSequences[i].litLength; ip += generatedSequences[i].litLength; literalsSize -= generatedSequences[i].litLength; assert(generatedSequences[i].offset != 0); /* Copy over the match */ { size_t matchLength = generatedSequences[i].matchLength; size_t j = 0; size_t k = 0; if (dictSize != 0) { if (generatedSequences[i].offset > bytesWritten) { /* Offset goes into the dictionary */ size_t offsetFromEndOfDict = generatedSequences[i].offset - bytesWritten; for (; k < offsetFromEndOfDict && k < matchLength; ++k) { op[k] = dictPtr[dictSize - offsetFromEndOfDict + k]; } matchLength -= k; op += k; } } for (; j < matchLength; ++j) { op[j] = op[j-(int)generatedSequences[i].offset]; } op += j; assert(generatedSequences[i].matchLength == j + k); bytesWritten += generatedSequences[i].matchLength; } } generatedSrcBufferSize = bytesWritten; assert(ip <= literals + literalsSize); ZSTD_memcpy(op, ip, literalsSize); return generatedSrcBufferSize; } /* Returns nb sequences generated * TODO: Add repcode fuzzing once we support repcode match splits */ static size_t generateRandomSequences(ZSTD_Sequence* generatedSequences, FUZZ_dataProducer_t* producer, size_t literalsSize, size_t dictSize, size_t windowLog) { uint32_t bytesGenerated = 0; uint32_t nbSeqGenerated = 0; uint32_t litLength; uint32_t matchLength; uint32_t offset; uint32_t offsetBound; uint32_t repCode = 0; uint32_t isFirstSequence = 1; uint32_t windowSize = 1 << windowLog; while (bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE && !FUZZ_dataProducer_empty(producer)) { litLength = isFirstSequence ? FUZZ_dataProducer_uint32Range(producer, 1, literalsSize) : FUZZ_dataProducer_uint32Range(producer, 0, literalsSize); literalsSize -= litLength; bytesGenerated += litLength; if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) { break; } offsetBound = bytesGenerated > windowSize ? windowSize : bytesGenerated + dictSize; offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound); matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, ZSTD_FUZZ_MATCHLENGTH_MAXSIZE); bytesGenerated += matchLength; if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) { break; } ZSTD_Sequence seq = {offset, litLength, matchLength, repCode}; generatedSequences[nbSeqGenerated++] = seq; isFirstSequence = 0; } return nbSeqGenerated; } static size_t roundTripTest(void *result, size_t resultCapacity, void *compressed, size_t compressedCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize, const ZSTD_Sequence* generatedSequences, size_t generatedSequencesSize, size_t wLog, unsigned cLevel, unsigned hasDict) { size_t cSize; size_t dSize; ZSTD_CDict* cdict = NULL; ZSTD_DDict* ddict = NULL; ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0); ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel); ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog); /* TODO: Add block delim mode fuzzing */ ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters); if (hasDict) { cdict = ZSTD_createCDict(dict, dictSize, cLevel); FUZZ_ASSERT(cdict); ZSTD_CCtx_refCDict(cctx, cdict); ddict = ZSTD_createDDict(dict, dictSize); FUZZ_ASSERT(ddict); ZSTD_DCtx_refDDict(dctx, ddict); } cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity, generatedSequences, generatedSequencesSize, src, srcSize); FUZZ_ZASSERT(cSize); dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize); FUZZ_ZASSERT(dSize); if (cdict) { ZSTD_freeCDict(cdict); } if (ddict) { ZSTD_freeDDict(ddict); } return dSize; } int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) { void* rBuf; size_t rBufSize; void* cBuf; size_t cBufSize; void* generatedSrc; size_t generatedSrcSize; ZSTD_Sequence* generatedSequences; size_t nbSequences; void* literalsBuffer; size_t literalsSize; void* dictBuffer; size_t dictSize = 0; unsigned hasDict; unsigned wLog; int cLevel; FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); literalsSize = FUZZ_dataProducer_uint32Range(producer, 1, ZSTD_FUZZ_GENERATED_LITERALS_MAXSIZE); literalsBuffer = FUZZ_malloc(literalsSize); literalsBuffer = generatePseudoRandomString(literalsBuffer, literalsSize); hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1); if (hasDict) { dictSize = FUZZ_dataProducer_uint32Range(producer, 1, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE); dictBuffer = FUZZ_malloc(dictSize); dictBuffer = generatePseudoRandomString(dictBuffer, dictSize); } // Generate window log first so we dont generate offsets too large wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); cLevel = FUZZ_dataProducer_int32Range(producer, (int)ZSTD_minCLevel, (int)ZSTD_maxCLevel); generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_GENERATED_SRC_MAXSIZE); generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE); nbSequences = generateRandomSequences(generatedSequences, producer, literalsSize, dictSize, wLog); generatedSrcSize = decodeSequences(generatedSrc, generatedSequences, nbSequences, literalsBuffer, literalsSize, dictBuffer, dictSize); cBufSize = ZSTD_compressBound(generatedSrcSize); cBuf = FUZZ_malloc(cBufSize); rBufSize = generatedSrcSize; rBuf = FUZZ_malloc(rBufSize); if (!cctx) { cctx = ZSTD_createCCtx(); FUZZ_ASSERT(cctx); } if (!dctx) { dctx = ZSTD_createDCtx(); FUZZ_ASSERT(dctx); } size_t const result = roundTripTest(rBuf, rBufSize, cBuf, cBufSize, generatedSrc, generatedSrcSize, dictBuffer, dictSize, generatedSequences, nbSequences, wLog, cLevel, hasDict); FUZZ_ZASSERT(result); FUZZ_ASSERT_MSG(result == generatedSrcSize, "Incorrect regenerated size"); FUZZ_ASSERT_MSG(!FUZZ_memcmp(generatedSrc, rBuf, generatedSrcSize), "Corruption!"); free(rBuf); free(cBuf); free(generatedSequences); free(generatedSrc); free(literalsBuffer); FUZZ_dataProducer_free(producer); #ifndef STATEFUL_FUZZING ZSTD_freeCCtx(cctx); cctx = NULL; ZSTD_freeDCtx(dctx); dctx = NULL; #endif return 0; }