Updated datagen : sparse file generation
This commit is contained in:
parent
48310ded03
commit
5203b8c68d
@ -127,12 +127,28 @@ static char RDG_genChar(U32* seed, const void* ltctx)
|
|||||||
#define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15)
|
#define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15)
|
||||||
void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, void* litTable, unsigned* seedPtr)
|
void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, void* litTable, unsigned* seedPtr)
|
||||||
{
|
{
|
||||||
BYTE* buffPtr = ((BYTE*)buffer) - prefixSize;
|
BYTE* buffPtr = (BYTE*)buffer;
|
||||||
const U32 matchProba32 = (U32)(32768 * matchProba);
|
const U32 matchProba32 = (U32)(32768 * matchProba);
|
||||||
size_t pos = prefixSize;
|
size_t pos = prefixSize;
|
||||||
void* ldctx = litTable;
|
void* ldctx = litTable;
|
||||||
U32* seed = seedPtr;
|
U32* seed = seedPtr;
|
||||||
|
|
||||||
|
/* special case */
|
||||||
|
while (matchProba >= 1.0)
|
||||||
|
{
|
||||||
|
size_t size0 = RDG_rand(seed) & 3;
|
||||||
|
size0 = 1U << (16 + size0 * 2);
|
||||||
|
size0 += RDG_rand(seed) & (size0-1); /* because size0 is power of 2*/
|
||||||
|
if (buffSize < pos + size0)
|
||||||
|
{
|
||||||
|
memset(buffPtr+pos, 0, buffSize-pos);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
memset(buffPtr+pos, 0, size0);
|
||||||
|
pos += size0;
|
||||||
|
buffPtr[pos-1] = RDG_genChar(seed, ldctx);
|
||||||
|
}
|
||||||
|
|
||||||
/* init */
|
/* init */
|
||||||
if (pos==0) buffPtr[0] = RDG_genChar(seed, ldctx), pos=1;
|
if (pos==0) buffPtr[0] = RDG_genChar(seed, ldctx), pos=1;
|
||||||
|
|
||||||
@ -148,18 +164,18 @@ void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double match
|
|||||||
int length = RDG_RANDLENGTH + 4;
|
int length = RDG_RANDLENGTH + 4;
|
||||||
U32 offset = RDG_RAND15BITS + 1;
|
U32 offset = RDG_RAND15BITS + 1;
|
||||||
if (offset > pos) offset = pos;
|
if (offset > pos) offset = pos;
|
||||||
if (pos + length > buffSize) length = buffSize - pos;
|
|
||||||
match = pos - offset;
|
match = pos - offset;
|
||||||
d = pos + length;
|
d = pos + length;
|
||||||
|
if (d > buffSize) d = buffSize;
|
||||||
while (pos < d) buffPtr[pos++] = buffPtr[match++];
|
while (pos < d) buffPtr[pos++] = buffPtr[match++];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* Literal (noise) */
|
/* Literal (noise) */
|
||||||
U32 d;
|
size_t d;
|
||||||
int length = RDG_RANDLENGTH;
|
size_t length = RDG_RANDLENGTH;
|
||||||
if (pos + length > buffSize) length = buffSize - pos;
|
|
||||||
d = pos + length;
|
d = pos + length;
|
||||||
|
if (d > buffSize) d = buffSize;
|
||||||
while (pos < d) buffPtr[pos++] = RDG_genChar(seed, ldctx);
|
while (pos < d) buffPtr[pos++] = RDG_genChar(seed, ldctx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -169,7 +185,7 @@ void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double match
|
|||||||
void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed)
|
void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed)
|
||||||
{
|
{
|
||||||
void* ldctx;
|
void* ldctx;
|
||||||
if (litProba==0.0) litProba = matchProba / 3.8;
|
if (litProba==0.0) litProba = matchProba / 4.5;
|
||||||
ldctx = RDG_createLiteralDistrib(litProba);
|
ldctx = RDG_createLiteralDistrib(litProba);
|
||||||
RDG_genBlock(buffer, size, 0, matchProba, ldctx, &seed);
|
RDG_genBlock(buffer, size, 0, matchProba, ldctx, &seed);
|
||||||
free(ldctx);
|
free(ldctx);
|
||||||
@ -179,30 +195,28 @@ void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba
|
|||||||
#define RDG_BLOCKSIZE (128 KB)
|
#define RDG_BLOCKSIZE (128 KB)
|
||||||
void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed)
|
void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed)
|
||||||
{
|
{
|
||||||
BYTE fullbuff[RDG_DICTSIZE + RDG_BLOCKSIZE + 1];
|
BYTE buff[RDG_DICTSIZE + RDG_BLOCKSIZE];
|
||||||
BYTE* buff = fullbuff + RDG_DICTSIZE;
|
|
||||||
U64 total = 0;
|
U64 total = 0;
|
||||||
U32 genBlockSize = RDG_BLOCKSIZE;
|
size_t genBlockSize = RDG_BLOCKSIZE;
|
||||||
void* ldctx;
|
void* ldctx;
|
||||||
|
|
||||||
/* init */
|
/* init */
|
||||||
if (litProba==0.0) litProba = matchProba / 3.8;
|
if (litProba==0.0) litProba = matchProba / 4.5;
|
||||||
ldctx = RDG_createLiteralDistrib(litProba);
|
ldctx = RDG_createLiteralDistrib(litProba);
|
||||||
SET_BINARY_MODE(stdout);
|
SET_BINARY_MODE(stdout);
|
||||||
|
|
||||||
/* Generate dict */
|
/* Generate dict */
|
||||||
RDG_genBlock(fullbuff, RDG_DICTSIZE, 0, matchProba, ldctx, &seed);
|
RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, ldctx, &seed);
|
||||||
|
|
||||||
/* Generate compressible data */
|
/* Generate compressible data */
|
||||||
while (total < size)
|
while (total < size)
|
||||||
{
|
{
|
||||||
RDG_genBlock(buff, RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, ldctx, &seed);
|
RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, ldctx, &seed);
|
||||||
if (size-total < RDG_BLOCKSIZE) genBlockSize = (U32)(size-total);
|
if (size-total < RDG_BLOCKSIZE) genBlockSize = (size_t)(size-total);
|
||||||
total += genBlockSize;
|
total += genBlockSize;
|
||||||
buff[genBlockSize] = 0;
|
|
||||||
fwrite(buff, 1, genBlockSize, stdout);
|
fwrite(buff, 1, genBlockSize, stdout);
|
||||||
/* update dict */
|
/* update dict */
|
||||||
memcpy(fullbuff, buff + (RDG_BLOCKSIZE - RDG_DICTSIZE), RDG_DICTSIZE);
|
memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
free(ldctx);
|
free(ldctx);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user