From 77c137b3ae4d3c961952acf659b4fa515dffb1db Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 14 Sep 2017 15:12:57 -0700 Subject: [PATCH] minor comment refactor --- lib/dictBuilder/zdict.c | 6 +++--- programs/dibio.c | 48 ++++++++++++++++++++++------------------- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index b76e7695..1bb8b068 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -375,7 +375,7 @@ static int isIncluded(const void* in, const void* container, size_t length) return u==length; } -/*! ZDICT_checkMerge +/*! ZDICT_tryMerge() : check if dictItem can be merged, do it if possible @return : id of destination elt, 0 if not merged */ @@ -440,8 +440,8 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const static void ZDICT_removeDictItem(dictItem* table, U32 id) { - /* convention : first element is nb of elts */ - U32 const max = table->pos; + /* convention : table[0].pos stores nb of elts */ + U32 const max = table[0].pos; U32 u; if (!id) return; /* protection, should never happen */ for (u=id; u> 5; } +/* DiB_shuffle() : + * shuffle a table of file names in a semi-random way + * It improves dictionary quality by reducing "locality" impact, so if sample set is very large, + * it will load random elements from it, instead of just the first ones. */ static void DiB_shuffle(const char** fileNamesTable, unsigned nbFiles) { - /* Initialize the pseudorandom number generator */ - U32 seed = 0xFD2FB528; - unsigned i; - for (i = nbFiles - 1; i > 0; --i) { - unsigned const j = DiB_rand(&seed) % (i + 1); - const char* tmp = fileNamesTable[j]; - fileNamesTable[j] = fileNamesTable[i]; - fileNamesTable[i] = tmp; - } + U32 seed = 0xFD2FB528; + unsigned i; + for (i = nbFiles - 1; i > 0; --i) { + unsigned const j = DiB_rand(&seed) % (i + 1); + const char* const tmp = fileNamesTable[j]; + fileNamesTable[j] = fileNamesTable[i]; + fileNamesTable[i] = tmp; + } } @@ -162,7 +167,7 @@ static size_t DiB_findMaxMem(unsigned long long requiredMem) requiredMem = (((requiredMem >> 23) + 1) << 23); requiredMem += step; - if (requiredMem > maxMemory) requiredMem = maxMemory; + if (requiredMem > g_maxMemory) requiredMem = g_maxMemory; while (!testmem) { testmem = malloc((size_t)requiredMem); @@ -203,7 +208,7 @@ static void DiB_saveDict(const char* dictFileName, static int g_tooLargeSamples = 0; -static U64 DiB_getTotalCappedFileSize(const char** fileNamesTable, unsigned nbFiles) +static U64 DiB_totalCappedFileSize(const char** fileNamesTable, unsigned nbFiles) { U64 total = 0; unsigned n; @@ -236,7 +241,7 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize, { void* const dictBuffer = malloc(maxDictSize); size_t* const fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t)); - unsigned long long const totalSizeToLoad = DiB_getTotalCappedFileSize(fileNamesTable, nbFiles); + unsigned long long const totalSizeToLoad = DiB_totalCappedFileSize(fileNamesTable, nbFiles); size_t const memMult = params ? MEMMULT : COVER_MEMMULT; size_t const maxMem = DiB_findMaxMem(totalSizeToLoad * memMult) / memMult; size_t benchedSize = (size_t) MIN ((unsigned long long)maxMem, totalSizeToLoad); @@ -246,8 +251,9 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize, /* Checks */ if (params) g_displayLevel = params->zParams.notificationLevel; else if (coverParams) g_displayLevel = coverParams->zParams.notificationLevel; - else EXM_THROW(13, "Neither dictionary algorith selected"); /* should not happen */ - if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */ + else EXM_THROW(13, "Neither dictionary algorithm selected"); /* should not happen */ + if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) + EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */ if (g_tooLargeSamples) { DISPLAYLEVEL(2, "! Warning : some samples are very large \n"); DISPLAYLEVEL(2, "! Note that dictionary is only useful for small files or beginning of large files. \n"); @@ -270,8 +276,7 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize, DiB_shuffle(fileNamesTable, nbFiles); nbFiles = DiB_loadFiles(srcBuffer, &benchedSize, fileSizes, fileNamesTable, nbFiles); - { - size_t dictSize; + { size_t dictSize; if (params) { DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH); /* guard band, for end of buffer condition */ dictSize = ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, maxDictSize, @@ -285,9 +290,8 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize, DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\n", coverParams->k, coverParams->d, coverParams->steps); } } else { - dictSize = - ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer, - fileSizes, nbFiles, *coverParams); + dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer, + fileSizes, nbFiles, *coverParams); } if (ZDICT_isError(dictSize)) { DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */