diff --git a/programs/datagen.c b/programs/datagen.c index 026b9a18..88f48731 100644 --- a/programs/datagen.c +++ b/programs/datagen.c @@ -55,8 +55,9 @@ static U32 RDG_rand(U32* src) return rand32 >> 5; } +typedef U32 fixedPoint_24_8; -static void RDG_fillLiteralDistrib(BYTE* ldt, double ld) +static void RDG_fillLiteralDistrib(BYTE* ldt, fixedPoint_24_8 ld) { BYTE const firstChar = (ld<=0.0) ? 0 : '('; BYTE const lastChar = (ld<=0.0) ? 255 : '}'; @@ -65,7 +66,7 @@ static void RDG_fillLiteralDistrib(BYTE* ldt, double ld) if (ld<=0.0) ld = 0.0; for (u=0; u> 8) + 1; U32 const end = MIN ( u + weight , LTSIZE); while (u < end) ldt[u++] = character; character++; @@ -92,7 +93,8 @@ static U32 RDG_randLength(U32* seedPtr) return (RDG_rand(seedPtr) & 0x1FF) + 0xF; } -static void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, const BYTE* ldt, U32* seedPtr) +static void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, + double matchProba, const BYTE* ldt, U32* seedPtr) { BYTE* const buffPtr = (BYTE*)buffer; U32 const matchProba32 = (U32)(32768 * matchProba); @@ -128,13 +130,13 @@ static void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, doubl U32 const randOffset = RDG_rand15Bits(seedPtr) + 1; U32 const offset = repeatOffset ? prevOffset : (U32) MIN(randOffset , pos); size_t match = pos - offset; - while (pos < d) buffPtr[pos++] = buffPtr[match++]; /* correctly manages overlaps */ + while (pos < d) { buffPtr[pos++] = buffPtr[match++]; /* correctly manages overlaps */ } prevOffset = offset; } else { /* Literal (noise) */ U32 const length = RDG_randLength(seedPtr); U32 const d = (U32) MIN(pos + length, buffSize); - while (pos < d) buffPtr[pos++] = RDG_genChar(seedPtr, ldt); + while (pos < d) { buffPtr[pos++] = RDG_genChar(seedPtr, ldt); } } } } @@ -145,7 +147,7 @@ void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba BYTE ldt[LTSIZE]; memset(ldt, '0', sizeof(ldt)); /* yes, character '0', this is intentional */ if (litProba<=0.0) litProba = matchProba / 4.5; - RDG_fillLiteralDistrib(ldt, litProba); + RDG_fillLiteralDistrib(ldt, (fixedPoint_24_8)(litProba * 256 + 0.001)); RDG_genBlock(buffer, size, 0, matchProba, ldt, &seed32); } @@ -163,7 +165,7 @@ void RDG_genStdout(unsigned long long size, double matchProba, double litProba, if (buff==NULL) { perror("datagen"); exit(1); } if (litProba<=0.0) litProba = matchProba / 4.5; memset(ldt, '0', sizeof(ldt)); /* yes, character '0', this is intentional */ - RDG_fillLiteralDistrib(ldt, litProba); + RDG_fillLiteralDistrib(ldt, (fixedPoint_24_8)(litProba * 256 + 0.001)); SET_BINARY_MODE(stdout); /* Generate initial dict */ diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 9553ea71..e23bdec9 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -308,7 +308,7 @@ static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part) * Unit tests =============================================*/ -static int basicUnitTests(U32 seed, double compressibility) +static int basicUnitTests(U32 const seed, double compressibility) { size_t const CNBuffSize = 5 MB; void* const CNBuffer = malloc(CNBuffSize); @@ -1053,14 +1053,14 @@ static int basicUnitTests(U32 seed, double compressibility) size_t const contentSize = 9 KB; const void* const dict = (const char*)CNBuffer; const void* const contentStart = (const char*)dict + flatdictSize; - size_t const target_nodict_cSize[22+1] = { 3840, 3740, 3840, 3810, 3750, - 3750, 3740, 3740, 3740, 3740, - 3740, 3670, 3660, 3660, 3660, - 3650, 3650, 3650, 3650, 3650, - 3650, 3650, 3650 }; - size_t const target_wdict_cSize[22+1] = { 2820, 2850, 2860, 2820, 2940, - 2930, 2930, 2920, 2890, 2890, - 2890, 2900, 2900, 2770, 2760, + size_t const target_nodict_cSize[22+1] = { 3840, 3770, 3870, 3830, 3770, + 3770, 3770, 3770, 3750, 3750, + 3740, 3670, 3670, 3660, 3660, + 3660, 3660, 3660, 3660, 3660, + 3660, 3660, 3660 }; + size_t const target_wdict_cSize[22+1] = { 2830, 2890, 2890, 2820, 2940, + 2950, 2950, 2920, 2900, 2890, + 2910, 2910, 2910, 2770, 2760, 2750, 2750, 2750, 2750, 2750, 2750, 2750, 2750 }; int l = 1; @@ -1069,6 +1069,7 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(3, "test%3i : flat-dictionary efficiency test : \n", testNb++); assert(maxLevel == 22); RDG_genBuffer(CNBuffer, flatdictSize + contentSize, compressibility, 0., seed); + DISPLAYLEVEL(4, "content hash : %016llx; dict hash : %016llx \n", XXH64(contentStart, contentSize, 0), XXH64(dict, flatdictSize, 0)); for ( ; l <= maxLevel; l++) { size_t const nodict_cSize = ZSTD_compress(compressedBuffer, compressedBufferSize, @@ -1646,6 +1647,7 @@ static int basicUnitTests(U32 seed, double compressibility) size_t const sampleUnitSize = 8 KB; U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize); size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t)); + U32 seed32 = seed; ZDICT_cover_params_t params; U32 dictID; @@ -1658,8 +1660,8 @@ static int basicUnitTests(U32 seed, double compressibility) DISPLAYLEVEL(3, "test%3i : ZDICT_trainFromBuffer_cover : ", testNb++); { U32 u; for (u=0; u