Updated datagen : sparse file generation

This commit is contained in:
Yann Collet 2015-03-09 13:17:36 +01:00
parent 48310ded03
commit 5203b8c68d

View File

@ -127,12 +127,28 @@ static char RDG_genChar(U32* seed, const void* ltctx)
#define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15) #define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15)
void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, void* litTable, unsigned* seedPtr) void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, void* litTable, unsigned* seedPtr)
{ {
BYTE* buffPtr = ((BYTE*)buffer) - prefixSize; BYTE* buffPtr = (BYTE*)buffer;
const U32 matchProba32 = (U32)(32768 * matchProba); const U32 matchProba32 = (U32)(32768 * matchProba);
size_t pos = prefixSize; size_t pos = prefixSize;
void* ldctx = litTable; void* ldctx = litTable;
U32* seed = seedPtr; U32* seed = seedPtr;
/* special case */
while (matchProba >= 1.0)
{
size_t size0 = RDG_rand(seed) & 3;
size0 = 1U << (16 + size0 * 2);
size0 += RDG_rand(seed) & (size0-1); /* because size0 is power of 2*/
if (buffSize < pos + size0)
{
memset(buffPtr+pos, 0, buffSize-pos);
return;
}
memset(buffPtr+pos, 0, size0);
pos += size0;
buffPtr[pos-1] = RDG_genChar(seed, ldctx);
}
/* init */ /* init */
if (pos==0) buffPtr[0] = RDG_genChar(seed, ldctx), pos=1; if (pos==0) buffPtr[0] = RDG_genChar(seed, ldctx), pos=1;
@ -148,18 +164,18 @@ void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double match
int length = RDG_RANDLENGTH + 4; int length = RDG_RANDLENGTH + 4;
U32 offset = RDG_RAND15BITS + 1; U32 offset = RDG_RAND15BITS + 1;
if (offset > pos) offset = pos; if (offset > pos) offset = pos;
if (pos + length > buffSize) length = buffSize - pos;
match = pos - offset; match = pos - offset;
d = pos + length; d = pos + length;
if (d > buffSize) d = buffSize;
while (pos < d) buffPtr[pos++] = buffPtr[match++]; while (pos < d) buffPtr[pos++] = buffPtr[match++];
} }
else else
{ {
/* Literal (noise) */ /* Literal (noise) */
U32 d; size_t d;
int length = RDG_RANDLENGTH; size_t length = RDG_RANDLENGTH;
if (pos + length > buffSize) length = buffSize - pos;
d = pos + length; d = pos + length;
if (d > buffSize) d = buffSize;
while (pos < d) buffPtr[pos++] = RDG_genChar(seed, ldctx); while (pos < d) buffPtr[pos++] = RDG_genChar(seed, ldctx);
} }
} }
@ -169,7 +185,7 @@ void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double match
void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed) void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed)
{ {
void* ldctx; void* ldctx;
if (litProba==0.0) litProba = matchProba / 3.8; if (litProba==0.0) litProba = matchProba / 4.5;
ldctx = RDG_createLiteralDistrib(litProba); ldctx = RDG_createLiteralDistrib(litProba);
RDG_genBlock(buffer, size, 0, matchProba, ldctx, &seed); RDG_genBlock(buffer, size, 0, matchProba, ldctx, &seed);
free(ldctx); free(ldctx);
@ -179,30 +195,28 @@ void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba
#define RDG_BLOCKSIZE (128 KB) #define RDG_BLOCKSIZE (128 KB)
void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed) void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed)
{ {
BYTE fullbuff[RDG_DICTSIZE + RDG_BLOCKSIZE + 1]; BYTE buff[RDG_DICTSIZE + RDG_BLOCKSIZE];
BYTE* buff = fullbuff + RDG_DICTSIZE;
U64 total = 0; U64 total = 0;
U32 genBlockSize = RDG_BLOCKSIZE; size_t genBlockSize = RDG_BLOCKSIZE;
void* ldctx; void* ldctx;
/* init */ /* init */
if (litProba==0.0) litProba = matchProba / 3.8; if (litProba==0.0) litProba = matchProba / 4.5;
ldctx = RDG_createLiteralDistrib(litProba); ldctx = RDG_createLiteralDistrib(litProba);
SET_BINARY_MODE(stdout); SET_BINARY_MODE(stdout);
/* Generate dict */ /* Generate dict */
RDG_genBlock(fullbuff, RDG_DICTSIZE, 0, matchProba, ldctx, &seed); RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, ldctx, &seed);
/* Generate compressible data */ /* Generate compressible data */
while (total < size) while (total < size)
{ {
RDG_genBlock(buff, RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, ldctx, &seed); RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, ldctx, &seed);
if (size-total < RDG_BLOCKSIZE) genBlockSize = (U32)(size-total); if (size-total < RDG_BLOCKSIZE) genBlockSize = (size_t)(size-total);
total += genBlockSize; total += genBlockSize;
buff[genBlockSize] = 0;
fwrite(buff, 1, genBlockSize, stdout); fwrite(buff, 1, genBlockSize, stdout);
/* update dict */ /* update dict */
memcpy(fullbuff, buff + (RDG_BLOCKSIZE - RDG_DICTSIZE), RDG_DICTSIZE); memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE);
} }
free(ldctx); free(ldctx);