Merge pull request #2282 from terrelln/ncount-fix

[bug] Fix FSE_readNCount()
dev
Nick Terrell 2020-08-26 10:31:07 -07:00 committed by GitHub
commit cf83aceaf3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 120 additions and 10 deletions

View File

@ -110,13 +110,14 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
while (repeats >= 12) {
charnum += 3 * 12;
if (ip <= iend-7) {
if (LIKELY(ip <= iend-7)) {
ip += 3;
bitStream = MEM_readLE32(ip) >> bitCount;
} else {
bitStream >>= 24;
bitCount += 24;
bitCount -= (int)(8 * (iend - 7 - ip));
bitCount &= 31;
ip = iend - 4;
}
bitStream = MEM_readLE32(ip) >> bitCount;
repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
}
charnum += 3 * repeats;
@ -124,6 +125,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
bitCount += 2 * repeats;
/* Add the final repeat which isn't 0b11. */
assert((bitStream & 3) < 3);
charnum += bitStream & 3;
bitCount += 2;
@ -137,14 +139,16 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
* because we already memset the whole buffer to 0.
*/
if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
assert((bitCount >> 3) <= 3); /* For first condition to work */
ip += bitCount>>3;
bitCount &= 7;
bitStream = MEM_readLE32(ip) >> bitCount;
} else {
bitStream >>= 2;
bitCount -= (int)(8 * (iend - 4 - ip));
bitCount &= 31;
ip = iend - 4;
}
bitStream = MEM_readLE32(ip) >> bitCount;
}
{
int const max = (2*threshold-1) - remaining;
@ -184,14 +188,15 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
}
if (charnum >= maxSV1) break;
if (LIKELY((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))) {
if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
ip += bitCount>>3;
bitCount &= 7;
} else {
bitCount -= (int)(8 * (iend - 4 - ip));
bitCount &= 31;
ip = iend - 4;
}
bitStream = MEM_readLE32(ip) >> (bitCount & 31);
bitStream = MEM_readLE32(ip) >> bitCount;
} }
if (remaining != 1) return ERROR(corruption_detected);
/* Only possible when there are too many zeros. */

View File

@ -96,7 +96,8 @@ FUZZ_TARGETS := \
dictionary_loader \
raw_dictionary_round_trip \
dictionary_stream_round_trip \
decompress_dstSize_tooSmall
decompress_dstSize_tooSmall \
fse_read_ncount
all: $(FUZZ_TARGETS)
@ -184,6 +185,9 @@ dictionary_loader: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_dictionary_loa
decompress_dstSize_tooSmall: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_dstSize_tooSmall.o
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_dstSize_tooSmall.o $(LIB_FUZZING_ENGINE) -o $@
fse_read_ncount: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_fse_read_ncount.o
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_fse_read_ncount.o $(LIB_FUZZING_ENGINE) -o $@
libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o
$(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o

View File

@ -0,0 +1,100 @@
/*
* Copyright (c) 2016-2020, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/**
* This fuzz target round trips the FSE normalized count with FSE_writeNCount()
* and FSE_readNcount() to ensure that it can always round trip correctly.
*/
#define FSE_STATIC_LINKING_ONLY
#define ZSTD_STATIC_LINKING_ONLY
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "fuzz_helpers.h"
#include "zstd_helpers.h"
#include "fuzz_data_producer.h"
#include "fse.h"
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
/* Pick a random tableLog and maxSymbolValue */
unsigned const tableLog = FUZZ_dataProducer_uint32Range(producer, FSE_MIN_TABLELOG, FSE_MAX_TABLELOG);
unsigned const maxSymbolValue = FUZZ_dataProducer_uint32Range(producer, 0, 255);
unsigned remainingWeight = (1u << tableLog) - 1;
size_t dataSize;
BYTE data[512];
short ncount[256];
/* Randomly fill the normalized count */
memset(ncount, 0, sizeof(ncount));
{
unsigned s;
for (s = 0; s < maxSymbolValue && remainingWeight > 0; ++s) {
short n = (short)FUZZ_dataProducer_int32Range(producer, -1, remainingWeight);
ncount[s] = n;
if (n < 0) {
remainingWeight -= 1;
} else {
assert((unsigned)n <= remainingWeight);
remainingWeight -= n;
}
}
/* Ensure ncount[maxSymbolValue] != 0 and the sum is (1<<tableLog) */
ncount[maxSymbolValue] = remainingWeight + 1;
if (ncount[maxSymbolValue] == 1 && FUZZ_dataProducer_uint32Range(producer, 0, 1) == 1) {
ncount[maxSymbolValue] = -1;
}
}
/* Write the normalized count */
{
FUZZ_ASSERT(sizeof(data) >= FSE_NCountWriteBound(maxSymbolValue, tableLog));
dataSize = FSE_writeNCount(data, sizeof(data), ncount, maxSymbolValue, tableLog);
FUZZ_ZASSERT(dataSize);
}
/* Read & validate the normalized count */
{
short rtNcount[256];
unsigned rtMaxSymbolValue = 255;
unsigned rtTableLog;
/* Copy into a buffer with a random amount of random data at the end */
size_t const buffSize = (size_t)FUZZ_dataProducer_uint32Range(producer, dataSize, sizeof(data));
BYTE* const buff = FUZZ_malloc(buffSize);
size_t rtDataSize;
memcpy(buff, data, dataSize);
{
size_t b;
for (b = dataSize; b < buffSize; ++b) {
buff[b] = (BYTE)FUZZ_dataProducer_uint32Range(producer, 0, 255);
}
}
rtDataSize = FSE_readNCount(rtNcount, &rtMaxSymbolValue, &rtTableLog, buff, buffSize);
FUZZ_ZASSERT(rtDataSize);
FUZZ_ASSERT(rtDataSize == dataSize);
FUZZ_ASSERT(rtMaxSymbolValue == maxSymbolValue);
FUZZ_ASSERT(rtTableLog == tableLog);
{
unsigned s;
for (s = 0; s <= maxSymbolValue; ++s) {
FUZZ_ASSERT(ncount[s] == rtNcount[s]);
}
}
free(buff);
}
FUZZ_dataProducer_free(producer);
return 0;
}

View File

@ -60,6 +60,7 @@ TARGET_INFO = {
'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA),
'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA),
'fse_read_ncount': TargetInfo(InputType.RAW_DATA),
}
TARGETS = list(TARGET_INFO.keys())
ALL_TARGETS = TARGETS + ['all']