Added : ZSTD decompression : ability to resist to noisy/faulty data source
This commit is contained in:
parent
cf5ce55cca
commit
d5d9bc3f82
@ -1,4 +1,4 @@
|
||||
**Zstd**, short for Zstandard, is a new lossless compression algorithm, which provides both good compression ratio _and_ speed for your standard compression needs. "Standard" translates into everyday situations which neither look for highest possible ratio nor extreme speeds.s
|
||||
**Zstd**, short for Zstandard, is a new lossless compression algorithm, which provides both good compression ratio _and_ speed for your standard compression needs. "Standard" translates into everyday situations which neither look for highest possible ratio nor extreme speed.
|
||||
|
||||
It is provided as a BSD-license package, hosted on Github.
|
||||
|
||||
@ -40,7 +40,7 @@ It's a complex area which will require time and benefit from contributions.
|
||||
|
||||
Another property zstd is developed for is configurable memory requirement, with the objective to fit into low-memory configurations, or servers handling many connections in parallel.
|
||||
|
||||
Zstd entropy stage is provided by [Huff0 and FSE, of Finite State Entrop library](https://github.com/Cyan4973/FiniteStateEntropy).
|
||||
Zstd entropy stage is provided by [Huff0 and FSE, from Finite State Entrop library](https://github.com/Cyan4973/FiniteStateEntropy).
|
||||
|
||||
Zstd is still considered experimental at this stage. Specifically, it doesn't guarantee yet that its current stream/file format will remain supported in future versions of the library. Therefore, only use Zstd in environments where you can control the availability of the decompression library. "Stable" status, including official documented format format and long-term support commitment, is projected sometimes early 2016.
|
||||
|
||||
|
@ -1414,7 +1414,7 @@ static size_t ZSTD_execSequence(BYTE* op,
|
||||
const BYTE** litPtr, const BYTE* const litLimit,
|
||||
BYTE* const base, BYTE* const oend)
|
||||
{
|
||||
static const int dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4}; /* added */
|
||||
static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
|
||||
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
|
||||
const BYTE* const ostart = op;
|
||||
const size_t litLength = sequence.litLength;
|
||||
@ -1422,8 +1422,9 @@ static size_t ZSTD_execSequence(BYTE* op,
|
||||
const BYTE* const litEnd = *litPtr + litLength;
|
||||
|
||||
/* check */
|
||||
if (endMatch > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
|
||||
if (endMatch > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* overwrite beyond dst buffer */
|
||||
if (litEnd > litLimit) return (size_t)-ZSTD_ERROR_corruption;
|
||||
if (sequence.matchLength > (size_t)(*litPtr-op)) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* overwrite literal segment */
|
||||
|
||||
/* copy Literals */
|
||||
if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8))
|
||||
@ -1536,7 +1537,9 @@ static size_t ZSTD_decompressSequences(
|
||||
|
||||
memset(&sequence, 0, sizeof(sequence));
|
||||
seqState.dumps = dumps;
|
||||
FSE_initDStream(&(seqState.DStream), ip, iend-ip);
|
||||
seqState.prevOffset = 1;
|
||||
errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
|
||||
if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_corruption;
|
||||
FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
|
||||
FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
|
||||
FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
|
||||
|
@ -73,10 +73,10 @@ fullbench : $(ZSTDDIR)/zstd.c datagen.c fullbench.c
|
||||
fullbench32: $(ZSTDDIR)/zstd.c datagen.c fullbench.c
|
||||
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
|
||||
|
||||
fuzzer : $(ZSTDDIR)/zstd.c xxhash.c fuzzer.c
|
||||
fuzzer : $(ZSTDDIR)/zstd.c datagen.c xxhash.c fuzzer.c
|
||||
$(CC) $(FLAGS) $^ -o $@$(EXT)
|
||||
|
||||
fuzzer32: $(ZSTDDIR)/zstd.c xxhash.c fuzzer.c
|
||||
fuzzer32: $(ZSTDDIR)/zstd.c datagen.c xxhash.c fuzzer.c
|
||||
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
|
||||
|
||||
datagen : datagen.c datagencli.c
|
||||
|
@ -153,7 +153,7 @@ void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double match
|
||||
memset(buffPtr+pos, 0, size0);
|
||||
pos += size0;
|
||||
buffPtr[pos-1] = RDG_genChar(seed, lt);
|
||||
return;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* init */
|
||||
@ -200,7 +200,7 @@ void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba
|
||||
|
||||
#define RDG_DICTSIZE (32 KB)
|
||||
#define RDG_BLOCKSIZE (128 KB)
|
||||
void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed)
|
||||
void RDG_genStdout(unsigned long long size, double matchProba, double litProba, unsigned seed)
|
||||
{
|
||||
BYTE* buff = (BYTE*)malloc(RDG_DICTSIZE + RDG_BLOCKSIZE);
|
||||
U64 total = 0;
|
||||
|
@ -26,15 +26,15 @@
|
||||
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed);
|
||||
void RDG_genStdout(unsigned long long size, double matchProba, double litProba, unsigned seed);
|
||||
void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed);
|
||||
/* RDG_genBuffer
|
||||
Generate 'size' bytes of compressible data into 'buffer'.
|
||||
Compressibility can be controlled using 'matchProba'.
|
||||
'LitProba' is optional, and affect variability of individual bytes. If litProba==0.0, default value is used.
|
||||
Compressibility can be controlled using 'matchProba', which is floating point value between 0 and 1.
|
||||
'LitProba' is optional, it affect variability of individual bytes. If litProba==0.0, default value will be used.
|
||||
Generated data pattern can be modified using different 'seed'.
|
||||
If (matchProba, litProba and seed) are equal, the function always generate the same content.
|
||||
For a triplet (matchProba, litProba, seed), the function always generate the same content.
|
||||
|
||||
RDG_genOut
|
||||
Same as RDG_genBuffer, but generate data towards stdout
|
||||
RDG_genStdout
|
||||
Same as RDG_genBuffer, but generates data into stdout
|
||||
*/
|
||||
|
@ -183,7 +183,7 @@ int main(int argc, char** argv)
|
||||
DISPLAYLEVEL(3, "Seed = %u \n", seed);
|
||||
if (proba!=COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", (U32)(proba*100));
|
||||
|
||||
RDG_genOut(size, proba, litProba, seed);
|
||||
RDG_genStdout(size, proba, litProba, seed);
|
||||
DISPLAYLEVEL(1, "\n");
|
||||
|
||||
return 0;
|
||||
|
@ -47,6 +47,7 @@
|
||||
#include <sys/timeb.h> /* timeb */
|
||||
#include <string.h> /* strcmp */
|
||||
#include "zstd_static.h"
|
||||
#include "datagen.h" /* RDG_genBuffer */
|
||||
#include "xxhash.h" /* XXH64 */
|
||||
|
||||
|
||||
@ -138,45 +139,6 @@ unsigned int FUZ_rand(unsigned int* src)
|
||||
}
|
||||
|
||||
|
||||
#define FUZ_RAND15BITS (FUZ_rand(seed) & 0x7FFF)
|
||||
#define FUZ_RANDLENGTH ( (FUZ_rand(seed) & 3) ? (FUZ_rand(seed) % 15) : (FUZ_rand(seed) % 510) + 15)
|
||||
static void FUZ_generateSynthetic(void* buffer, size_t bufferSize, double proba, U32* seed)
|
||||
{
|
||||
BYTE* BBuffer = (BYTE*)buffer;
|
||||
unsigned pos = 0;
|
||||
U32 P32 = (U32)(32768 * proba);
|
||||
|
||||
// First Byte
|
||||
BBuffer[pos++] = (BYTE)((FUZ_rand(seed) & 0x3F) + '0');
|
||||
|
||||
while (pos < bufferSize)
|
||||
{
|
||||
// Select : Literal (noise) or copy (within 64K)
|
||||
if (FUZ_RAND15BITS < P32)
|
||||
{
|
||||
// Copy (within 64K)
|
||||
size_t match, end;
|
||||
size_t length = FUZ_RANDLENGTH + 4;
|
||||
size_t offset = FUZ_RAND15BITS + 1;
|
||||
if (offset > pos) offset = pos;
|
||||
if (pos + length > bufferSize) length = bufferSize - pos;
|
||||
match = pos - offset;
|
||||
end = pos + length;
|
||||
while (pos < end) BBuffer[pos++] = BBuffer[match++];
|
||||
}
|
||||
else
|
||||
{
|
||||
// Literal (noise)
|
||||
size_t end;
|
||||
size_t length = FUZ_RANDLENGTH;
|
||||
if (pos + length > bufferSize) length = bufferSize - pos;
|
||||
end = pos + length;
|
||||
while (pos < end) BBuffer[pos++] = (BYTE)((FUZ_rand(seed) & 0x3F) + '0');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static unsigned FUZ_highbit32(U32 v32)
|
||||
{
|
||||
unsigned nbBits = 0;
|
||||
@ -200,7 +162,7 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
size_t result, cSize;
|
||||
U32 testNb=0;
|
||||
|
||||
// Create compressible test buffer
|
||||
/* Create compressible test buffer */
|
||||
CNBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH);
|
||||
compressedBuffer = malloc(ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH));
|
||||
decodedBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH);
|
||||
@ -210,9 +172,9 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
testResult = 1;
|
||||
goto _end;
|
||||
}
|
||||
FUZ_generateSynthetic(CNBuffer, COMPRESSIBLE_NOISE_LENGTH, compressibility, &randState);
|
||||
RDG_genBuffer(CNBuffer, COMPRESSIBLE_NOISE_LENGTH, compressibility, 0., randState);
|
||||
|
||||
// Basic tests
|
||||
/* Basic tests */
|
||||
DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
|
||||
result = ZSTD_compress(compressedBuffer, ZSTD_compressBound(COMPRESSIBLE_NOISE_LENGTH), CNBuffer, COMPRESSIBLE_NOISE_LENGTH);
|
||||
if (ZSTD_isError(result)) goto _output_error;
|
||||
@ -263,10 +225,10 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
{
|
||||
size_t sampleSize = 0;
|
||||
DISPLAYLEVEL(4, "test%3i : Long RLE test : ", testNb++);
|
||||
FUZ_generateSynthetic(CNBuffer, sampleSize, compressibility, &randState);
|
||||
RDG_genBuffer(CNBuffer, sampleSize, compressibility, 0., randState);
|
||||
memset((char*)CNBuffer+sampleSize, 'B', 256 KB - 1);
|
||||
sampleSize += 256 KB - 1;
|
||||
FUZ_generateSynthetic((char*)CNBuffer+sampleSize, 96 KB, compressibility, &randState);
|
||||
RDG_genBuffer((char*)CNBuffer+sampleSize, 96 KB, compressibility, 0., randState);
|
||||
sampleSize += 96 KB;
|
||||
cSize = ZSTD_compress(compressedBuffer, ZSTD_compressBound(sampleSize), CNBuffer, sampleSize);
|
||||
if (ZSTD_isError(cSize)) goto _output_error;
|
||||
@ -311,6 +273,7 @@ static const U32 maxSampleLog = 22;
|
||||
|
||||
int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibility)
|
||||
{
|
||||
BYTE* cNoiseBuffer[5];
|
||||
BYTE* srcBuffer;
|
||||
BYTE* cBuffer;
|
||||
BYTE* dstBuffer;
|
||||
@ -322,13 +285,23 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
|
||||
U32 coreSeed = seed, lseed = 0;
|
||||
|
||||
/* allocation */
|
||||
srcBuffer = (BYTE*)malloc (srcBufferSize);
|
||||
cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize);
|
||||
cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize);
|
||||
cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize);
|
||||
cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize);
|
||||
cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize);
|
||||
dstBuffer = (BYTE*)malloc (dstBufferSize);
|
||||
cBuffer = (BYTE*)malloc (cBufferSize);
|
||||
CHECK (!srcBuffer || !dstBuffer || !cBuffer, "Not enough memory, fuzzer tests cancelled");
|
||||
CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !dstBuffer || !cBuffer,
|
||||
"Not enough memory, fuzzer tests cancelled");
|
||||
|
||||
/* Create initial sample */
|
||||
FUZ_generateSynthetic(srcBuffer, srcBufferSize, compressibility, &coreSeed);
|
||||
/* Create initial samples */
|
||||
RDG_genBuffer(cNoiseBuffer[0], srcBufferSize, 0.00, 0., coreSeed); /* pure noise */
|
||||
RDG_genBuffer(cNoiseBuffer[1], srcBufferSize, 0.05, 0., coreSeed); /* barely compressible */
|
||||
RDG_genBuffer(cNoiseBuffer[2], srcBufferSize, compressibility, 0., coreSeed);
|
||||
RDG_genBuffer(cNoiseBuffer[3], srcBufferSize, 0.95, 0., coreSeed); /* highly compressible */
|
||||
RDG_genBuffer(cNoiseBuffer[4], srcBufferSize, 1.00, 0., coreSeed); /* sparse content */
|
||||
srcBuffer = cNoiseBuffer[2];
|
||||
|
||||
/* catch up testNb */
|
||||
for (testNb=1; testNb < startTest; testNb++)
|
||||
@ -339,13 +312,30 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
|
||||
{
|
||||
size_t sampleSize, sampleStart;
|
||||
size_t cSize, dSize, dSupSize;
|
||||
U32 sampleSizeLog;
|
||||
U32 sampleSizeLog, buffNb;
|
||||
U64 crcOrig, crcDest;
|
||||
|
||||
/* init */
|
||||
DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests);
|
||||
FUZ_rand(&coreSeed);
|
||||
lseed = coreSeed ^ prime1;
|
||||
buffNb = FUZ_rand(&lseed) & 127;
|
||||
if (buffNb & 7) buffNb=2;
|
||||
else
|
||||
{
|
||||
buffNb >>= 3;
|
||||
if (buffNb & 7)
|
||||
{
|
||||
const U32 tnb[2] = { 1, 3 };
|
||||
buffNb = tnb[buffNb >> 3];
|
||||
}
|
||||
else
|
||||
{
|
||||
const U32 tnb[2] = { 0, 4 };
|
||||
buffNb = tnb[buffNb >> 3];
|
||||
}
|
||||
}
|
||||
srcBuffer = cNoiseBuffer[buffNb];
|
||||
sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
|
||||
sampleSize = (size_t)1 << sampleSizeLog;
|
||||
sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
|
||||
@ -357,7 +347,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
|
||||
CHECK(ZSTD_isError(cSize), "ZSTD_compress failed");
|
||||
|
||||
/* compression failure test : too small dest buffer */
|
||||
if (cSize > 3)
|
||||
if (cSize > 3)
|
||||
{
|
||||
size_t errorCode;
|
||||
const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */
|
||||
@ -384,8 +374,8 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
|
||||
const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1; /* no problem, as cSize > 4 (frameHeaderSizer) */
|
||||
const size_t tooSmallSize = cSize - missing;
|
||||
void* cBufferTooSmall = malloc(tooSmallSize); /* valgrind will catch overflows */
|
||||
memcpy(cBufferTooSmall, cBuffer, tooSmallSize);
|
||||
CHECK(cBufferTooSmall == NULL, "not enough memory !");
|
||||
memcpy(cBufferTooSmall, cBuffer, tooSmallSize);
|
||||
errorCode = ZSTD_decompress(dstBuffer, dstBufferSize, cBufferTooSmall, tooSmallSize);
|
||||
CHECK(!ZSTD_isError(errorCode), "ZSTD_decompress should have failed ! (truncated src buffer)");
|
||||
free(cBufferTooSmall);
|
||||
@ -422,6 +412,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
|
||||
if (nbBits>0) nbBits--;
|
||||
mask = (1<<nbBits) - 1;
|
||||
noiseLength = (FUZ_rand(&lseed) & mask) + 1;
|
||||
if ( pos+noiseLength > cSize ) noiseLength = cSize-pos;
|
||||
noiseStart = FUZ_rand(&lseed) % (srcBufferSize - noiseLength);
|
||||
memcpy(cBuffer + pos, srcBuffer + noiseStart, noiseLength);
|
||||
pos += noiseLength;
|
||||
@ -435,9 +426,11 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
|
||||
|
||||
/* decompress noisy source */
|
||||
{
|
||||
U32 noiseSrc = FUZ_rand(&lseed) % 5;
|
||||
const U32 endMark = 0xA9B1C3D6;
|
||||
U32 endCheck;
|
||||
size_t errorCode;
|
||||
srcBuffer = cNoiseBuffer[noiseSrc];
|
||||
memcpy(dstBuffer+sampleSize, &endMark, 4);
|
||||
errorCode = ZSTD_decompress(dstBuffer, sampleSize, cBuffer, cSize);
|
||||
/* result *may* be an unlikely success, but even then, it must strictly respect dest buffer boundaries */
|
||||
@ -451,7 +444,11 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
|
||||
DISPLAY("\rAll fuzzer tests completed \n");
|
||||
|
||||
_cleanup:
|
||||
free(srcBuffer);
|
||||
free(cNoiseBuffer[0]);
|
||||
free(cNoiseBuffer[1]);
|
||||
free(cNoiseBuffer[2]);
|
||||
free(cNoiseBuffer[3]);
|
||||
free(cNoiseBuffer[4]);
|
||||
free(cBuffer);
|
||||
free(dstBuffer);
|
||||
return result;
|
||||
|
Loading…
x
Reference in New Issue
Block a user