added in struct to keep functions more orderly

This commit is contained in:
Paul Cruz 2017-06-19 16:23:00 -07:00
parent b91b810c64
commit 7ea025cc7d

View File

@ -236,8 +236,7 @@ typedef struct {
typedef struct { typedef struct {
int useDict; int useDict;
U32 dictID; U32 dictID;
size_t dictSize; size_t dictContentSize;
BYTE* fullDict;
BYTE* dictContent; BYTE* dictContent;
} dictInfo; } dictInfo;
/*-******************************************************* /*-*******************************************************
@ -249,7 +248,7 @@ struct {
} opts; /* advanced options on generation */ } opts; /* advanced options on generation */
/* Generate and write a random frame header */ /* Generate and write a random frame header */
static void writeFrameHeader(U32* seed, frame_t* frame, int genDict, U32 dictID) static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info)
{ {
BYTE* const op = frame->data; BYTE* const op = frame->data;
size_t pos = 0; size_t pos = 0;
@ -315,7 +314,7 @@ static void writeFrameHeader(U32* seed, frame_t* frame, int genDict, U32 dictID)
pos += 4; pos += 4;
{ {
int dictBits = genDict ? 3 : 0; int dictBits = info.useDict ? 3 : 0;
BYTE const frameHeaderDescriptor = BYTE const frameHeaderDescriptor =
(BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits); (BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits);
op[pos++] = frameHeaderDescriptor; op[pos++] = frameHeaderDescriptor;
@ -324,8 +323,8 @@ static void writeFrameHeader(U32* seed, frame_t* frame, int genDict, U32 dictID)
if (!singleSegment) { if (!singleSegment) {
op[pos++] = windowByte; op[pos++] = windowByte;
} }
if(genDict) { if(info.useDict) {
MEM_writeLE32(op + pos, (U32) dictID); MEM_writeLE32(op + pos, (U32) info.dictID);
pos += 4; pos += 4;
} }
if (contentSizeFlag) { if (contentSizeFlag) {
@ -618,7 +617,7 @@ static inline void initSeqStore(seqStore_t *seqStore) {
/* Randomly generate sequence commands */ /* Randomly generate sequence commands */
static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore, static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
size_t contentSize, size_t literalsSize, int genDict, size_t dictSize, BYTE* dictContent) size_t contentSize, size_t literalsSize, dictInfo info)
{ {
/* The total length of all the matches */ /* The total length of all the matches */
size_t const remainingMatch = contentSize - literalsSize; size_t const remainingMatch = contentSize - literalsSize;
@ -675,10 +674,10 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
MIN(frame->header.windowSize, MIN(frame->header.windowSize,
(size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) + (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) +
1; 1;
if (genDict && (RAND(seed) & 1)) { if (info.useDict && (RAND(seed) & 1)) {
/* need to occasionally generate offsets that go past the start */ /* need to occasionally generate offsets that go past the start */
/* we still need to be within the windowSize however */ /* we still need to be within the windowSize however */
U32 const lenPastStart = RAND(seed) % dictSize; U32 const lenPastStart = RAND(seed) % info.dictContentSize;
offset = MIN(frame->header.windowSize, offset+lenPastStart); offset = MIN(frame->header.windowSize, offset+lenPastStart);
} }
offsetCode = offset + ZSTD_REP_MOVE; offsetCode = offset + ZSTD_REP_MOVE;
@ -696,13 +695,13 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
repIndex = MIN(2, offsetCode + 1); repIndex = MIN(2, offsetCode + 1);
} }
} }
} while (((!genDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0); } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0);
{ size_t j; { size_t j;
for (j = 0; j < matchLen; j++) { for (j = 0; j < matchLen; j++) {
if ((void*)(srcPtr - offset) < (void*)frame->srcStart) { if ((void*)(srcPtr - offset) < (void*)frame->srcStart) {
/* copy from dictionary instead of literals */ /* copy from dictionary instead of literals */
*srcPtr = *(dictContent + dictSize - (offset-(srcPtr-(BYTE*)frame->srcStart))); *srcPtr = *(info.dictContent + info.dictContentSize - (offset-(srcPtr-(BYTE*)frame->srcStart)));
} }
else { else {
*srcPtr = *(srcPtr-offset); *srcPtr = *(srcPtr-offset);
@ -956,7 +955,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
} }
static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize, static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize,
size_t literalsSize, int genDict, size_t dictSize, BYTE* dictContent) size_t literalsSize, dictInfo info)
{ {
seqStore_t seqStore; seqStore_t seqStore;
size_t numSequences; size_t numSequences;
@ -965,14 +964,14 @@ static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize,
initSeqStore(&seqStore); initSeqStore(&seqStore);
/* randomly generate sequences */ /* randomly generate sequences */
numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, genDict, dictSize, dictContent); numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info);
/* write them out to the frame data */ /* write them out to the frame data */
CHECKERR(writeSequences(seed, frame, &seqStore, numSequences)); CHECKERR(writeSequences(seed, frame, &seqStore, numSequences));
return numSequences; return numSequences;
} }
static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, int genDict, size_t dictSize, BYTE* dictContent) static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info)
{ {
BYTE* const blockStart = (BYTE*)frame->data; BYTE* const blockStart = (BYTE*)frame->data;
size_t literalsSize; size_t literalsSize;
@ -984,7 +983,7 @@ static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize
DISPLAYLEVEL(4, " literals size: %u\n", (U32)literalsSize); DISPLAYLEVEL(4, " literals size: %u\n", (U32)literalsSize);
nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, genDict, dictSize, dictContent); nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info);
DISPLAYLEVEL(4, " number of sequences: %u\n", (U32)nbSeq); DISPLAYLEVEL(4, " number of sequences: %u\n", (U32)nbSeq);
@ -992,7 +991,7 @@ static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize
} }
static void writeBlock(U32* seed, frame_t* frame, size_t contentSize, static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
int lastBlock, int genDict, size_t dictSize, BYTE* dictContent) int lastBlock, dictInfo info)
{ {
int const blockTypeDesc = RAND(seed) % 8; int const blockTypeDesc = RAND(seed) % 8;
size_t blockSize; size_t blockSize;
@ -1032,7 +1031,7 @@ static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
frame->oldStats = frame->stats; frame->oldStats = frame->stats;
frame->data = op; frame->data = op;
compressedSize = writeCompressedBlock(seed, frame, contentSize, genDict, dictSize, dictContent); compressedSize = writeCompressedBlock(seed, frame, contentSize, info);
if (compressedSize > contentSize) { if (compressedSize > contentSize) {
blockType = 0; blockType = 0;
memcpy(op, frame->src, contentSize); memcpy(op, frame->src, contentSize);
@ -1058,7 +1057,7 @@ static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
frame->data = op; frame->data = op;
} }
static void writeBlocks(U32* seed, frame_t* frame, int genDict, size_t dictSize, BYTE* dictContent) static void writeBlocks(U32* seed, frame_t* frame, dictInfo info)
{ {
size_t contentLeft = frame->header.contentSize; size_t contentLeft = frame->header.contentSize;
size_t const maxBlockSize = MIN(MAX_BLOCK_SIZE, frame->header.windowSize); size_t const maxBlockSize = MIN(MAX_BLOCK_SIZE, frame->header.windowSize);
@ -1081,7 +1080,7 @@ static void writeBlocks(U32* seed, frame_t* frame, int genDict, size_t dictSize,
} }
} }
writeBlock(seed, frame, blockContentSize, lastBlock, genDict, dictSize, dictContent); writeBlock(seed, frame, blockContentSize, lastBlock, info);
contentLeft -= blockContentSize; contentLeft -= blockContentSize;
if (lastBlock) break; if (lastBlock) break;
@ -1146,19 +1145,92 @@ static void initFrame(frame_t* fr)
} }
/* Return the final seed */ /* Return the final seed */
static U32 generateFrame(U32 seed, frame_t* fr, int genDict, size_t dictSize, BYTE* dictContent, U32 dictID) static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info)
{ {
/* generate a complete frame */ /* generate a complete frame */
DISPLAYLEVEL(1, "frame seed: %u\n", seed); DISPLAYLEVEL(1, "frame seed: %u\n", seed);
initFrame(fr); initFrame(fr);
writeFrameHeader(&seed, fr, genDict, dictID); writeFrameHeader(&seed, fr, info);
writeBlocks(&seed, fr, genDict, dictSize, dictContent); writeBlocks(&seed, fr, info);
writeChecksum(fr); writeChecksum(fr);
return seed; return seed;
} }
/*_*******************************************************
* Dictionary Helper Functions
*********************************************************/
/* returns 0 if successful, otherwise returns 1 upon error */
static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict){
const size_t headerSize = dictSize/4;
const size_t dictContentSize = dictSize - dictSize/4;
BYTE* const dictContent = fullDict + headerSize;
/* use 3/4 of dictionary for content, save rest for header/entropy tables */
if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) {
DISPLAY("Error: dictionary size is too small\n");
return 1;
}
/* fill in dictionary content */
RAND_buffer(&seed, (void*)dictContent, dictContentSize);
/* allocate space for samples */
{
size_t dictWriteSize = 0;
unsigned const numSamples = 4;
BYTE* const samples = malloc(5000*sizeof(BYTE));
size_t* const sampleSizes = malloc(numSamples*sizeof(size_t));
if (samples == NULL || sampleSizes == NULL) {
DISPLAY("Error: could not generate samples for the dictionary.\n");
return 1;
}
/* generate samples */
unsigned i = 1;
size_t currSize = 1;
BYTE* curr = samples;
while (i <= 4) {
*(sampleSizes + i - 1) = currSize;
for (size_t j = 0; j < currSize; j++) {
*(curr++) = (BYTE)i;
}
i++;
currSize *= 16;
}
/* set dictionary params */
ZDICT_params_t zdictParams;
memset(&zdictParams, 0, sizeof(zdictParams));
zdictParams.dictID = dictID;
zdictParams.notificationLevel = 1;
/* finalize dictionary with random samples */
dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize,
dictContent, dictContentSize,
samples, sampleSizes, numSamples,
zdictParams);
free(samples);
free(sampleSizes);
if (dictWriteSize != dictSize && ZDICT_isError(dictWriteSize)) {
DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize));
return 1;
}
}
return 0;
}
static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){
/* allocate space statically */
dictInfo dictOp;
memset((void*)(&dictOp), 0, sizeof(dictOp));
dictOp.useDict = useDict;
dictOp.dictContentSize = dictContentSize;
dictOp.dictContent = dictContent;
dictOp.dictID = dictID;
return dictOp;
}
/*-******************************************************* /*-*******************************************************
* Test Mode * Test Mode
@ -1240,7 +1312,10 @@ static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS
else else
DISPLAYUPDATE("\r%u ", fnum); DISPLAYUPDATE("\r%u ", fnum);
seed = generateFrame(seed, &fr, 0, 0, NULL, 0); {
dictInfo const info = initDictInfo(0, 0, NULL, 0);
generateFrame(seed, &fr, info);
}
{ size_t const r = testDecodeSimple(&fr); { size_t const r = testDecodeSimple(&fr);
if (ZSTD_isError(r)) { if (ZSTD_isError(r)) {
@ -1264,80 +1339,6 @@ static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS
return 0; return 0;
} }
/*_*******************************************************
* Dictionary Helper Functions
*********************************************************/
/* returns 0 if successful, otherwise returns 1 upon error */
static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict){
const size_t headerSize = dictSize/4;
const size_t dictContentSize = dictSize - dictSize/4;
BYTE* const dictContent = fullDict + headerSize;
/* use 3/4 of dictionary for content, save rest for header/entropy tables */
if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) {
DISPLAY("Error: dictionary size is too small\n");
return 1;
}
/* fill in dictionary content */
RAND_buffer(&seed, (void*)dictContent, dictContentSize);
/* allocate space for samples */
{
size_t dictWriteSize = 0;
unsigned const numSamples = 4;
BYTE* const samples = malloc(5000*sizeof(BYTE));
size_t* const sampleSizes = malloc(numSamples*sizeof(size_t));
if (samples == NULL || sampleSizes == NULL) {
DISPLAY("Error: could not generate samples for the dictionary.\n");
return 1;
}
/* generate samples */
unsigned i = 1;
size_t currSize = 1;
BYTE* curr = samples;
while (i <= 4) {
*(sampleSizes + i - 1) = currSize;
for (size_t j = 0; j < currSize; j++) {
*(curr++) = (BYTE)i;
}
i++;
currSize *= 16;
}
/* set dictionary params */
ZDICT_params_t zdictParams;
memset(&zdictParams, 0, sizeof(zdictParams));
zdictParams.dictID = dictID;
zdictParams.notificationLevel = 1;
/* finalize dictionary with random samples */
dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize,
dictContent, dictContentSize,
samples, sampleSizes, numSamples,
zdictParams);
free(samples);
free(sampleSizes);
if (dictWriteSize != dictSize && ZDICT_isError(dictWriteSize)) {
DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize));
return 1;
}
}
return 0;
}
static dictInfo initDictInfo(int useDict, size_t dictSize, BYTE* fullDict, U32 seed){
/* allocate space statically */
dictInfo dictOp;
memset((void*)(&dictOp), 0, sizeof(dictOp));
dictOp.useDict = useDict;
dictOp.dictSize = dictSize;
dictOp.fullDict = fullDict;
dictOp.dictContent = fullDict + dictSize/4;
if (useDict) dictOp.dictID = RAND(&seed);
return dictOp;
}
/*-******************************************************* /*-*******************************************************
* File I/O * File I/O
*********************************************************/ *********************************************************/
@ -1349,7 +1350,10 @@ static int generateFile(U32 seed, const char* const path,
DISPLAY("seed: %u\n", seed); DISPLAY("seed: %u\n", seed);
generateFrame(seed, &fr, 0, 0, NULL, 0); {
dictInfo const info = initDictInfo(0, 0, NULL, 0);
generateFrame(seed, &fr, info);
}
outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path); outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
if (origPath) { if (origPath) {
@ -1371,7 +1375,10 @@ static int generateCorpus(U32 seed, unsigned numFiles, const char* const path,
DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
seed = generateFrame(seed, &fr, 0, 0, NULL, 0); {
dictInfo const info = initDictInfo(0, 0, NULL, 0);
generateFrame(seed, &fr, info);
}
if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
DISPLAY("Error: path too long\n"); DISPLAY("Error: path too long\n");
@ -1437,7 +1444,8 @@ static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const
{ {
size_t dictContentSize = dictSize-dictSize/4; size_t dictContentSize = dictSize-dictSize/4;
BYTE* const dictContent = fullDict+dictSize/4; BYTE* const dictContent = fullDict+dictSize/4;
seed = generateFrame(seed, &fr, 1, dictContentSize, dictContent, dictID); dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID);
seed = generateFrame(seed, &fr, info);
} }
if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
DISPLAY("Error: path too long\n"); DISPLAY("Error: path too long\n");
@ -1545,7 +1553,7 @@ int main(int argc, char** argv)
int testMode = 0; int testMode = 0;
const char* path = NULL; const char* path = NULL;
const char* origPath = NULL; const char* origPath = NULL;
int genDict = 0; int useDict = 0;
unsigned dictSize = (10 << 10); /* 10 kB default */ unsigned dictSize = (10 << 10); /* 10 kB default */
int argNb; int argNb;
@ -1610,7 +1618,7 @@ int main(int argc, char** argv)
} else if (strcmp(argument, "train-dict") == 0) { } else if (strcmp(argument, "train-dict") == 0) {
argument += 11; argument += 11;
dictSize = readInt(&argument); dictSize = readInt(&argument);
genDict = 1; useDict = 1;
} else { } else {
advancedUsage(argv[0]); advancedUsage(argv[0]);
return 1; return 1;
@ -1642,9 +1650,9 @@ int main(int argc, char** argv)
return 1; return 1;
} }
if (numFiles == 0 && genDict == 0) { if (numFiles == 0 && useDict == 0) {
return generateFile(seed, path, origPath); return generateFile(seed, path, origPath);
} else if (genDict == 0){ } else if (useDict == 0){
return generateCorpus(seed, numFiles, path, origPath); return generateCorpus(seed, numFiles, path, origPath);
} else { } else {
/* should generate files with a dictionary */ /* should generate files with a dictionary */