Refactoring and benchmark without dictionary
This commit is contained in:
parent
470c8d42f4
commit
71e767ac09
@ -1,43 +0,0 @@
|
|||||||
Benchmarking Dictionary Builder
|
|
||||||
|
|
||||||
### Permitted Argument:
|
|
||||||
Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
|
|
||||||
|
|
||||||
###Running Test:
|
|
||||||
make test
|
|
||||||
|
|
||||||
###Usage:
|
|
||||||
Benchmark given input files: make ARG= followed by permitted arguments
|
|
||||||
|
|
||||||
### Examples:
|
|
||||||
make ARG="in=../../lib/dictBuilder in=../../lib/compress"
|
|
||||||
|
|
||||||
###Benchmarking Result:
|
|
||||||
|
|
||||||
github:
|
|
||||||
| Algorithm | Speed(sec) | Compression Ratio |
|
|
||||||
| ------------- |:-------------:| ------------------:|
|
|
||||||
| random | 0.182254 | 8.786957 |
|
|
||||||
| cover | 34.821007 | 10.430999 |
|
|
||||||
| legacy | 1.125494 | 8.989482 |
|
|
||||||
|
|
||||||
hg-commands
|
|
||||||
| Algorithm | Speed(sec) | Compression Ratio |
|
|
||||||
| ------------- |:-------------:| ------------------:|
|
|
||||||
| random | 0.089231 | 3.489515 |
|
|
||||||
| cover | 32.342462 | 4.030274 |
|
|
||||||
| legacy | 1.066594 | 3.911896 |
|
|
||||||
|
|
||||||
hg-manifest
|
|
||||||
| Algorithm | Speed(sec) | Compression Ratio |
|
|
||||||
| ------------- |:-------------:| ------------------:|
|
|
||||||
| random | 1.095083 | 2.309485 |
|
|
||||||
| cover | 517.999132 | 2.575331 |
|
|
||||||
| legacy | 10.789509 | 2.506775 |
|
|
||||||
|
|
||||||
hg-changelog
|
|
||||||
| Algorithm | Speed(sec) | Compression Ratio |
|
|
||||||
| ------------- |:-------------:| ------------------:|
|
|
||||||
| random | 0.639630 | 2.096785 |
|
|
||||||
| cover | 121.398023 | 2.175706 |
|
|
||||||
| legacy | 3.050893 | 2.058273 |
|
|
@ -1,10 +0,0 @@
|
|||||||
/*! ZDICT_trainFromBuffer_unsafe_legacy() :
|
|
||||||
Strictly Internal use only !!
|
|
||||||
Same as ZDICT_trainFromBuffer_legacy(), but does not control `samplesBuffer`.
|
|
||||||
`samplesBuffer` must be followed by noisy guard band to avoid out-of-buffer reads.
|
|
||||||
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
|
||||||
or an error code.
|
|
||||||
*/
|
|
||||||
size_t ZDICT_trainFromBuffer_unsafe_legacy(void* dictBuffer, size_t dictBufferCapacity,
|
|
||||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
||||||
ZDICT_legacy_params_t parameters);
|
|
@ -2,7 +2,7 @@ ARG :=
|
|||||||
|
|
||||||
CC ?= gcc
|
CC ?= gcc
|
||||||
CFLAGS ?= -O3
|
CFLAGS ?= -O3
|
||||||
INCLUDES := -I ../randomDictBuilder -I ../../programs -I ../../lib/common -I ../../lib -I ../../lib/dictBuilder
|
INCLUDES := -I ../randomDictBuilder -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
|
||||||
|
|
||||||
RANDOM_FILE := ../randomDictBuilder/random.c
|
RANDOM_FILE := ../randomDictBuilder/random.c
|
||||||
IO_FILE := ../randomDictBuilder/io.c
|
IO_FILE := ../randomDictBuilder/io.c
|
||||||
@ -34,11 +34,11 @@ io.o: $(IO_FILE)
|
|||||||
$(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)
|
$(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)
|
||||||
|
|
||||||
libzstd.a:
|
libzstd.a:
|
||||||
$(MAKE) -C ../../lib libzstd.a
|
$(MAKE) -C ../../../lib libzstd.a
|
||||||
mv ../../lib/libzstd.a .
|
mv ../../../lib/libzstd.a .
|
||||||
|
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean:
|
clean:
|
||||||
rm -f *.o benchmark libzstd.a
|
rm -f *.o benchmark libzstd.a
|
||||||
$(MAKE) -C ../../lib clean
|
$(MAKE) -C ../../../lib clean
|
||||||
echo "Cleaning is completed"
|
echo "Cleaning is completed"
|
@ -0,0 +1,47 @@
|
|||||||
|
Benchmarking Dictionary Builder
|
||||||
|
|
||||||
|
### Permitted Argument:
|
||||||
|
Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
|
||||||
|
|
||||||
|
###Running Test:
|
||||||
|
make test
|
||||||
|
|
||||||
|
###Usage:
|
||||||
|
Benchmark given input files: make ARG= followed by permitted arguments
|
||||||
|
|
||||||
|
### Examples:
|
||||||
|
make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
|
||||||
|
|
||||||
|
###Benchmarking Result:
|
||||||
|
|
||||||
|
github:
|
||||||
|
| Algorithm | Speed(sec) | Compression Ratio |
|
||||||
|
| ------------- |:-------------:| ------------------:|
|
||||||
|
| nodict | 0.000004 | 2.999642 |
|
||||||
|
| random | 0.180238 | 8.786957 |
|
||||||
|
| cover | 33.891987 | 10.430999 |
|
||||||
|
| legacy | 1.077569 | 8.989482 |
|
||||||
|
|
||||||
|
hg-commands
|
||||||
|
| Algorithm | Speed(sec) | Compression Ratio |
|
||||||
|
| ------------- |:-------------:| ------------------:|
|
||||||
|
| nodict | 0.000006 | 2.425291 |
|
||||||
|
| random | 0.088735 | 3.489515 |
|
||||||
|
| cover | 35.447300 | 4.030274 |
|
||||||
|
| legacy | 1.048509 | 3.911896 |
|
||||||
|
|
||||||
|
hg-manifest
|
||||||
|
| Algorithm | Speed(sec) | Compression Ratio |
|
||||||
|
| ------------- |:-------------:| ------------------:|
|
||||||
|
| nodict | 0.000005 | 1.866385 |
|
||||||
|
| random | 1.148231 | 2.309485 |
|
||||||
|
| cover | 509.685257 | 2.575331 |
|
||||||
|
| legacy | 10.705866 | 2.506775 |
|
||||||
|
|
||||||
|
hg-changelog
|
||||||
|
| Algorithm | Speed(sec) | Compression Ratio |
|
||||||
|
| ------------- |:-------------:| ------------------:|
|
||||||
|
| nodict | 0.000005 | 1.377613 |
|
||||||
|
| random | 0.706434 | 2.096785 |
|
||||||
|
| cover | 122.815783 | 2.175706 |
|
||||||
|
| legacy | 3.010318 | 2.058273 |
|
@ -44,12 +44,14 @@ static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
|
|||||||
exit(error); \
|
exit(error); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*-*************************************
|
/*-*************************************
|
||||||
* Constants
|
* Constants
|
||||||
***************************************/
|
***************************************/
|
||||||
static const unsigned g_defaultMaxDictSize = 110 KB;
|
static const unsigned g_defaultMaxDictSize = 110 KB;
|
||||||
#define MEMMULT 11
|
#define DEFAULT_CLEVEL 3
|
||||||
#define NOISELENGTH 32
|
#define DEFAULT_DISPLAYLEVEL 2
|
||||||
|
|
||||||
|
|
||||||
/*-*************************************
|
/*-*************************************
|
||||||
* Struct
|
* Struct
|
||||||
@ -60,57 +62,6 @@ typedef struct {
|
|||||||
} dictInfo;
|
} dictInfo;
|
||||||
|
|
||||||
|
|
||||||
/*-*************************************
|
|
||||||
* Commandline related functions
|
|
||||||
***************************************/
|
|
||||||
static unsigned readU32FromChar(const char** stringPtr){
|
|
||||||
const char errorMsg[] = "error: numeric value too large";
|
|
||||||
unsigned result = 0;
|
|
||||||
while ((**stringPtr >='0') && (**stringPtr <='9')) {
|
|
||||||
unsigned const max = (((unsigned)(-1)) / 10) - 1;
|
|
||||||
if (result > max) exit(1);
|
|
||||||
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
|
|
||||||
}
|
|
||||||
if ((**stringPtr=='K') || (**stringPtr=='M')) {
|
|
||||||
unsigned const maxK = ((unsigned)(-1)) >> 10;
|
|
||||||
if (result > maxK) exit(1);
|
|
||||||
result <<= 10;
|
|
||||||
if (**stringPtr=='M') {
|
|
||||||
if (result > maxK) exit(1);
|
|
||||||
result <<= 10;
|
|
||||||
}
|
|
||||||
(*stringPtr)++; /* skip `K` or `M` */
|
|
||||||
if (**stringPtr=='i') (*stringPtr)++;
|
|
||||||
if (**stringPtr=='B') (*stringPtr)++;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** longCommandWArg() :
|
|
||||||
* check if *stringPtr is the same as longCommand.
|
|
||||||
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
|
|
||||||
* @return 0 and doesn't modify *stringPtr otherwise.
|
|
||||||
*/
|
|
||||||
static unsigned longCommandWArg(const char** stringPtr, const char* longCommand){
|
|
||||||
size_t const comSize = strlen(longCommand);
|
|
||||||
int const result = !strncmp(*stringPtr, longCommand, comSize);
|
|
||||||
if (result) *stringPtr += comSize;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void fillNoise(void* buffer, size_t length)
|
|
||||||
{
|
|
||||||
unsigned const prime1 = 2654435761U;
|
|
||||||
unsigned const prime2 = 2246822519U;
|
|
||||||
unsigned acc = prime1;
|
|
||||||
size_t p=0;;
|
|
||||||
|
|
||||||
for (p=0; p<length; p++) {
|
|
||||||
acc *= prime2;
|
|
||||||
((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*-*************************************
|
/*-*************************************
|
||||||
* Dictionary related operations
|
* Dictionary related operations
|
||||||
***************************************/
|
***************************************/
|
||||||
@ -122,9 +73,9 @@ dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize,
|
|||||||
ZDICT_random_params_t *randomParams, ZDICT_cover_params_t *coverParams,
|
ZDICT_random_params_t *randomParams, ZDICT_cover_params_t *coverParams,
|
||||||
ZDICT_legacy_params_t *legacyParams) {
|
ZDICT_legacy_params_t *legacyParams) {
|
||||||
unsigned const displayLevel = randomParams ? randomParams->zParams.notificationLevel :
|
unsigned const displayLevel = randomParams ? randomParams->zParams.notificationLevel :
|
||||||
coverParams ? coverParams->zParams.notificationLevel :
|
coverParams ? coverParams->zParams.notificationLevel :
|
||||||
legacyParams ? legacyParams->zParams.notificationLevel :
|
legacyParams ? legacyParams->zParams.notificationLevel :
|
||||||
0; /* should never happen */
|
DEFAULT_DISPLAYLEVEL; /* no dict */
|
||||||
void* const dictBuffer = malloc(maxDictSize);
|
void* const dictBuffer = malloc(maxDictSize);
|
||||||
|
|
||||||
dictInfo* dInfo;
|
dictInfo* dInfo;
|
||||||
@ -140,21 +91,15 @@ dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize,
|
|||||||
}else if(coverParams) {
|
}else if(coverParams) {
|
||||||
dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
|
dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
|
||||||
info->samplesSizes, info->nbSamples, coverParams);
|
info->samplesSizes, info->nbSamples, coverParams);
|
||||||
} else {
|
} else if(legacyParams) {
|
||||||
size_t totalSize= 0;
|
dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, info->srcBuffer,
|
||||||
for (int i = 0; i < info->nbSamples; i++) {
|
|
||||||
totalSize += info->samplesSizes[i];
|
|
||||||
}
|
|
||||||
size_t const maxMem = findMaxMem(totalSize * MEMMULT) / MEMMULT;
|
|
||||||
size_t loadedSize = (size_t) MIN ((unsigned long long)maxMem, totalSize);
|
|
||||||
fillNoise((char*)(info->srcBuffer) + loadedSize, NOISELENGTH);
|
|
||||||
dictSize = ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, maxDictSize, info->srcBuffer,
|
|
||||||
info->samplesSizes, info->nbSamples, *legacyParams);
|
info->samplesSizes, info->nbSamples, *legacyParams);
|
||||||
|
} else {
|
||||||
|
dictSize = 0;
|
||||||
}
|
}
|
||||||
if (ZDICT_isError(dictSize)) {
|
if (ZDICT_isError(dictSize)) {
|
||||||
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
|
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
|
||||||
free(dictBuffer);
|
free(dictBuffer);
|
||||||
freeSampleInfo(info);
|
|
||||||
return dInfo;
|
return dInfo;
|
||||||
}
|
}
|
||||||
dInfo = (dictInfo *)malloc(sizeof(dictInfo));
|
dInfo = (dictInfo *)malloc(sizeof(dictInfo));
|
||||||
@ -173,6 +118,7 @@ double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLev
|
|||||||
/* Local variables */
|
/* Local variables */
|
||||||
size_t totalCompressedSize = 0;
|
size_t totalCompressedSize = 0;
|
||||||
size_t totalOriginalSize = 0;
|
size_t totalOriginalSize = 0;
|
||||||
|
unsigned hasDict = dInfo->dictSize > 0 ? 1 : 0;
|
||||||
double cRatio;
|
double cRatio;
|
||||||
size_t dstCapacity;
|
size_t dstCapacity;
|
||||||
int i;
|
int i;
|
||||||
@ -193,15 +139,6 @@ double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLev
|
|||||||
dst = malloc(dstCapacity);
|
dst = malloc(dstCapacity);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Create the cctx and cdict */
|
|
||||||
cctx = ZSTD_createCCtx();
|
|
||||||
cdict = ZSTD_createCDict(dInfo->dictBuffer, dInfo->dictSize, compressionLevel);
|
|
||||||
|
|
||||||
if(!cctx || !cdict || !dst) {
|
|
||||||
cRatio = -1;
|
|
||||||
goto _cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Calculate offset for each sample */
|
/* Calculate offset for each sample */
|
||||||
offsets = (size_t *)malloc((srcInfo->nbSamples + 1) * sizeof(size_t));
|
offsets = (size_t *)malloc((srcInfo->nbSamples + 1) * sizeof(size_t));
|
||||||
offsets[0] = 0;
|
offsets[0] = 0;
|
||||||
@ -209,13 +146,35 @@ double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLev
|
|||||||
offsets[i] = offsets[i - 1] + srcInfo->samplesSizes[i - 1];
|
offsets[i] = offsets[i - 1] + srcInfo->samplesSizes[i - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Create the cctx */
|
||||||
|
cctx = ZSTD_createCCtx();
|
||||||
|
if(!cctx || !dst) {
|
||||||
|
cRatio = -1;
|
||||||
|
goto _nodictCleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create CDict if there's a dictionary stored on buffer */
|
||||||
|
if (hasDict) {
|
||||||
|
cdict = ZSTD_createCDict(dInfo->dictBuffer, dInfo->dictSize, compressionLevel);
|
||||||
|
if(!cdict) {
|
||||||
|
cRatio = -1;
|
||||||
|
goto _dictCleanup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Compress each sample and sum their sizes*/
|
/* Compress each sample and sum their sizes*/
|
||||||
const BYTE *const samples = (const BYTE *)srcInfo->srcBuffer;
|
const BYTE *const samples = (const BYTE *)srcInfo->srcBuffer;
|
||||||
for (i = 0; i < srcInfo->nbSamples; i++) {
|
for (i = 0; i < srcInfo->nbSamples; i++) {
|
||||||
const size_t compressedSize = ZSTD_compress_usingCDict(cctx, dst, dstCapacity, samples + offsets[i], srcInfo->samplesSizes[i], cdict);
|
size_t compressedSize;
|
||||||
|
if(hasDict) {
|
||||||
|
compressedSize = ZSTD_compress_usingCDict(cctx, dst, dstCapacity, samples + offsets[i], srcInfo->samplesSizes[i], cdict);
|
||||||
|
} else {
|
||||||
|
compressedSize = ZSTD_compressCCtx(cctx, dst, dstCapacity,samples + offsets[i], srcInfo->samplesSizes[i], compressionLevel);
|
||||||
|
}
|
||||||
if (ZSTD_isError(compressedSize)) {
|
if (ZSTD_isError(compressedSize)) {
|
||||||
cRatio = -1;
|
cRatio = -1;
|
||||||
goto _cleanup;
|
if(hasDict) goto _dictCleanup;
|
||||||
|
else goto _nodictCleanup;
|
||||||
}
|
}
|
||||||
totalCompressedSize += compressedSize;
|
totalCompressedSize += compressedSize;
|
||||||
}
|
}
|
||||||
@ -230,15 +189,14 @@ double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLev
|
|||||||
DISPLAYLEVEL(2, "compressed size is %lu\n", totalCompressedSize);
|
DISPLAYLEVEL(2, "compressed size is %lu\n", totalCompressedSize);
|
||||||
cRatio = (double)totalOriginalSize/(double)totalCompressedSize;
|
cRatio = (double)totalOriginalSize/(double)totalCompressedSize;
|
||||||
|
|
||||||
_cleanup:
|
_dictCleanup:
|
||||||
if(dst) {
|
|
||||||
free(dst);
|
|
||||||
}
|
|
||||||
if(offsets) {
|
|
||||||
free(offsets);
|
|
||||||
}
|
|
||||||
ZSTD_freeCCtx(cctx);
|
|
||||||
ZSTD_freeCDict(cdict);
|
ZSTD_freeCDict(cdict);
|
||||||
|
|
||||||
|
_nodictCleanup:
|
||||||
|
free(dst);
|
||||||
|
free(offsets);
|
||||||
|
ZSTD_freeCCtx(cctx);
|
||||||
|
|
||||||
return cRatio;
|
return cRatio;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -257,102 +215,48 @@ void freeDictInfo(dictInfo* info) {
|
|||||||
/*-********************************************************
|
/*-********************************************************
|
||||||
* Benchmarking functions
|
* Benchmarking functions
|
||||||
**********************************************************/
|
**********************************************************/
|
||||||
/** benchmarkRandom() :
|
/** benchmarkDictBuilder() :
|
||||||
* Measure how long random dictionary builder takes and compression ratio with the random dictionary
|
* Measure how long a dictionary builder takes and compression ratio with the dictionary built
|
||||||
* @return 0 if benchmark successfully, 1 otherwise
|
* @return 0 if benchmark successfully, 1 otherwise
|
||||||
*/
|
*/
|
||||||
int benchmarkRandom(sampleInfo *srcInfo, unsigned maxDictSize, ZDICT_random_params_t *randomParam) {
|
int benchmarkDictBuilder(sampleInfo *srcInfo, unsigned maxDictSize, ZDICT_random_params_t *randomParam,
|
||||||
const int displayLevel = randomParam->zParams.notificationLevel;
|
ZDICT_cover_params_t *coverParam, ZDICT_legacy_params_t *legacyParam) {
|
||||||
|
/* Local variables */
|
||||||
|
const unsigned displayLevel = randomParam ? randomParam->zParams.notificationLevel :
|
||||||
|
coverParam ? coverParam->zParams.notificationLevel :
|
||||||
|
legacyParam ? legacyParam->zParams.notificationLevel :
|
||||||
|
DEFAULT_DISPLAYLEVEL; /* no dict */
|
||||||
|
const char* name = randomParam ? "RANDOM" :
|
||||||
|
coverParam ? "COVER" :
|
||||||
|
legacyParam ? "LEGACY" :
|
||||||
|
"NODICT"; /* no dict */
|
||||||
|
const unsigned cLevel = randomParam ? randomParam->zParams.compressionLevel :
|
||||||
|
coverParam ? coverParam->zParams.compressionLevel :
|
||||||
|
legacyParam ? legacyParam->zParams.compressionLevel :
|
||||||
|
DEFAULT_CLEVEL; /* no dict */
|
||||||
int result = 0;
|
int result = 0;
|
||||||
clock_t t;
|
|
||||||
t = clock();
|
/* Calculate speed */
|
||||||
dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, randomParam, NULL, NULL);
|
const UTIL_time_t begin = UTIL_getTime();
|
||||||
t = clock() - t;
|
dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, randomParam, coverParam, legacyParam);
|
||||||
double time_taken = ((double)t)/CLOCKS_PER_SEC;
|
const U64 timeMicro = UTIL_clockSpanMicro(begin);
|
||||||
|
const double timeSec = timeMicro / (double)SEC_TO_MICRO;
|
||||||
if (!dInfo) {
|
if (!dInfo) {
|
||||||
DISPLAYLEVEL(1, "RANDOM does not train successfully\n");
|
DISPLAYLEVEL(1, "%s does not train successfully\n", name);
|
||||||
result = 1;
|
result = 1;
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
}
|
}
|
||||||
DISPLAYLEVEL(2, "RANDOM took %f seconds to execute \n", time_taken);
|
DISPLAYLEVEL(2, "%s took %f seconds to execute \n", name, timeSec);
|
||||||
|
|
||||||
double cRatio = compressWithDict(srcInfo, dInfo, randomParam->zParams.compressionLevel, displayLevel);
|
/* Calculate compression ratio */
|
||||||
|
double cRatio = compressWithDict(srcInfo, dInfo, cLevel, displayLevel);
|
||||||
if (cRatio < 0) {
|
if (cRatio < 0) {
|
||||||
DISPLAYLEVEL(1, "Compressing with RANDOM dictionary does not work\n");
|
DISPLAYLEVEL(1, "Compressing with %s dictionary does not work\n", name);
|
||||||
result = 1;
|
|
||||||
goto _cleanup;
|
|
||||||
}
|
|
||||||
DISPLAYLEVEL(2, "Compression ratio with random dictionary is %f\n", cRatio);
|
|
||||||
|
|
||||||
|
|
||||||
_cleanup:
|
|
||||||
freeDictInfo(dInfo);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** benchmarkCover() :
|
|
||||||
* Measure how long random dictionary builder takes and compression ratio with the cover dictionary
|
|
||||||
* @return 0 if benchmark successfully, 1 otherwise
|
|
||||||
*/
|
|
||||||
int benchmarkCover(sampleInfo *srcInfo, unsigned maxDictSize,
|
|
||||||
ZDICT_cover_params_t *coverParam) {
|
|
||||||
const int displayLevel = coverParam->zParams.notificationLevel;
|
|
||||||
int result = 0;
|
|
||||||
clock_t t;
|
|
||||||
t = clock();
|
|
||||||
dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, NULL, coverParam, NULL);
|
|
||||||
t = clock() - t;
|
|
||||||
double time_taken = ((double)t)/CLOCKS_PER_SEC;
|
|
||||||
if (!dInfo) {
|
|
||||||
DISPLAYLEVEL(1, "COVER does not train successfully\n");
|
|
||||||
result = 1;
|
|
||||||
goto _cleanup;
|
|
||||||
}
|
|
||||||
DISPLAYLEVEL(2, "COVER took %f seconds to execute \n", time_taken);
|
|
||||||
|
|
||||||
double cRatio = compressWithDict(srcInfo, dInfo, coverParam->zParams.compressionLevel, displayLevel);
|
|
||||||
if (cRatio < 0) {
|
|
||||||
DISPLAYLEVEL(1, "Compressing with COVER dictionary does not work\n");
|
|
||||||
result = 1;
|
|
||||||
goto _cleanup;
|
|
||||||
}
|
|
||||||
DISPLAYLEVEL(2, "Compression ratio with cover dictionary is %f\n", cRatio);
|
|
||||||
|
|
||||||
_cleanup:
|
|
||||||
freeDictInfo(dInfo);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/** benchmarkLegacy() :
|
|
||||||
* Measure how long legacy dictionary builder takes and compression ratio with the legacy dictionary
|
|
||||||
* @return 0 if benchmark successfully, 1 otherwise
|
|
||||||
*/
|
|
||||||
int benchmarkLegacy(sampleInfo *srcInfo, unsigned maxDictSize, ZDICT_legacy_params_t *legacyParam) {
|
|
||||||
const int displayLevel = legacyParam->zParams.notificationLevel;
|
|
||||||
int result = 0;
|
|
||||||
clock_t t;
|
|
||||||
t = clock();
|
|
||||||
dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, NULL, NULL, legacyParam);
|
|
||||||
t = clock() - t;
|
|
||||||
double time_taken = ((double)t)/CLOCKS_PER_SEC;
|
|
||||||
if (!dInfo) {
|
|
||||||
DISPLAYLEVEL(1, "LEGACY does not train successfully\n");
|
|
||||||
result = 1;
|
result = 1;
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
|
|
||||||
}
|
}
|
||||||
DISPLAYLEVEL(2, "LEGACY took %f seconds to execute \n", time_taken);
|
DISPLAYLEVEL(2, "Compression ratio with %s dictionary is %f\n", name, cRatio);
|
||||||
|
|
||||||
double cRatio = compressWithDict(srcInfo, dInfo, legacyParam->zParams.compressionLevel, displayLevel);
|
|
||||||
if (cRatio < 0) {
|
|
||||||
DISPLAYLEVEL(1, "Compressing with LEGACY dictionary does not work\n");
|
|
||||||
result = 1;
|
|
||||||
goto _cleanup;
|
|
||||||
|
|
||||||
}
|
|
||||||
DISPLAYLEVEL(2, "Compression ratio with legacy dictionary is %f\n", cRatio);
|
|
||||||
|
|
||||||
_cleanup:
|
_cleanup:
|
||||||
freeDictInfo(dInfo);
|
freeDictInfo(dInfo);
|
||||||
@ -363,15 +267,16 @@ _cleanup:
|
|||||||
|
|
||||||
int main(int argCount, const char* argv[])
|
int main(int argCount, const char* argv[])
|
||||||
{
|
{
|
||||||
int displayLevel = 2;
|
const int displayLevel = DEFAULT_DISPLAYLEVEL;
|
||||||
const char* programName = argv[0];
|
const char* programName = argv[0];
|
||||||
int result = 0;
|
int result = 0;
|
||||||
|
|
||||||
/* Initialize arguments to default values */
|
/* Initialize arguments to default values */
|
||||||
unsigned k = 200;
|
const unsigned k = 200;
|
||||||
unsigned d = 6;
|
const unsigned d = 6;
|
||||||
unsigned cLevel = 3;
|
const unsigned cLevel = DEFAULT_CLEVEL;
|
||||||
unsigned dictID = 0;
|
const unsigned dictID = 0;
|
||||||
unsigned maxDictSize = g_defaultMaxDictSize;
|
const unsigned maxDictSize = g_defaultMaxDictSize;
|
||||||
|
|
||||||
/* Initialize table to store input files */
|
/* Initialize table to store input files */
|
||||||
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
|
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
|
||||||
@ -379,7 +284,7 @@ int main(int argCount, const char* argv[])
|
|||||||
|
|
||||||
char* fileNamesBuf = NULL;
|
char* fileNamesBuf = NULL;
|
||||||
unsigned fileNamesNb = filenameIdx;
|
unsigned fileNamesNb = filenameIdx;
|
||||||
int followLinks = 0;
|
const int followLinks = 0;
|
||||||
const char** extendedFileList = NULL;
|
const char** extendedFileList = NULL;
|
||||||
|
|
||||||
/* Parse arguments */
|
/* Parse arguments */
|
||||||
@ -394,7 +299,6 @@ int main(int argCount, const char* argv[])
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Get the list of all files recursively (because followLinks==0)*/
|
/* Get the list of all files recursively (because followLinks==0)*/
|
||||||
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
|
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
|
||||||
&fileNamesNb, followLinks);
|
&fileNamesNb, followLinks);
|
||||||
@ -406,6 +310,7 @@ int main(int argCount, const char* argv[])
|
|||||||
filenameIdx = fileNamesNb;
|
filenameIdx = fileNamesNb;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* get sampleInfo */
|
||||||
size_t blockSize = 0;
|
size_t blockSize = 0;
|
||||||
sampleInfo* srcInfo= getSampleInfo(filenameTable,
|
sampleInfo* srcInfo= getSampleInfo(filenameTable,
|
||||||
filenameIdx, blockSize, maxDictSize, displayLevel);
|
filenameIdx, blockSize, maxDictSize, displayLevel);
|
||||||
@ -416,38 +321,53 @@ int main(int argCount, const char* argv[])
|
|||||||
zParams.notificationLevel = displayLevel;
|
zParams.notificationLevel = displayLevel;
|
||||||
zParams.dictID = dictID;
|
zParams.dictID = dictID;
|
||||||
|
|
||||||
|
/* with no dict */
|
||||||
|
{
|
||||||
|
const int noDictResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL);
|
||||||
|
if(noDictResult) {
|
||||||
|
result = 1;
|
||||||
|
goto _cleanup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* for random */
|
/* for random */
|
||||||
ZDICT_random_params_t randomParam;
|
{
|
||||||
randomParam.zParams = zParams;
|
ZDICT_random_params_t randomParam;
|
||||||
randomParam.k = k;
|
randomParam.zParams = zParams;
|
||||||
int randomResult = benchmarkRandom(srcInfo, maxDictSize, &randomParam);
|
randomParam.k = k;
|
||||||
if(randomResult) {
|
const int randomResult = benchmarkDictBuilder(srcInfo, maxDictSize, &randomParam, NULL, NULL);
|
||||||
result = 1;
|
if(randomResult) {
|
||||||
goto _cleanup;
|
result = 1;
|
||||||
|
goto _cleanup;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* for cover */
|
/* for cover */
|
||||||
ZDICT_cover_params_t coverParam;
|
{
|
||||||
memset(&coverParam, 0, sizeof(coverParam));
|
ZDICT_cover_params_t coverParam;
|
||||||
coverParam.zParams = zParams;
|
memset(&coverParam, 0, sizeof(coverParam));
|
||||||
coverParam.splitPoint = 1.0;
|
coverParam.zParams = zParams;
|
||||||
coverParam.d = d;
|
coverParam.splitPoint = 1.0;
|
||||||
coverParam.steps = 40;
|
coverParam.d = d;
|
||||||
coverParam.nbThreads = 1;
|
coverParam.steps = 40;
|
||||||
int coverOptResult = benchmarkCover(srcInfo, maxDictSize, &coverParam);
|
coverParam.nbThreads = 1;
|
||||||
if(coverOptResult) {
|
const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL);
|
||||||
result = 1;
|
if(coverOptResult) {
|
||||||
goto _cleanup;
|
result = 1;
|
||||||
|
goto _cleanup;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* for legacy */
|
/* for legacy */
|
||||||
ZDICT_legacy_params_t legacyParam;
|
{
|
||||||
legacyParam.zParams = zParams;
|
ZDICT_legacy_params_t legacyParam;
|
||||||
legacyParam.selectivityLevel = 9;
|
legacyParam.zParams = zParams;
|
||||||
int legacyResult = benchmarkLegacy(srcInfo, maxDictSize, &legacyParam);
|
legacyParam.selectivityLevel = 9;
|
||||||
if(legacyResult) {
|
const int legacyResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, &legacyParam);
|
||||||
result = 1;
|
if(legacyResult) {
|
||||||
goto _cleanup;
|
result = 1;
|
||||||
|
goto _cleanup;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Free allocated memory */
|
/* Free allocated memory */
|
@ -0,0 +1,6 @@
|
|||||||
|
/* ZDICT_trainFromBuffer_legacy() :
|
||||||
|
* issue : samplesBuffer need to be followed by a noisy guard band.
|
||||||
|
* work around : duplicate the buffer, and add the noise */
|
||||||
|
size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
|
||||||
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||||
|
ZDICT_legacy_params_t params);
|
@ -1,2 +1,2 @@
|
|||||||
echo "Benchmark with in=../../lib/common"
|
echo "Benchmark with in=../../lib/common"
|
||||||
./benchmark in=../../lib/common
|
./benchmark in=../../../lib/common
|
@ -2,9 +2,9 @@ ARG :=
|
|||||||
|
|
||||||
CC ?= gcc
|
CC ?= gcc
|
||||||
CFLAGS ?= -O3
|
CFLAGS ?= -O3
|
||||||
INCLUDES := -I ../../programs -I ../../lib/common -I ../../lib -I ../../lib/dictBuilder
|
INCLUDES := -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
|
||||||
|
|
||||||
TEST_INPUT := ../../lib
|
TEST_INPUT := ../../../lib
|
||||||
TEST_OUTPUT := randomDict
|
TEST_OUTPUT := randomDict
|
||||||
|
|
||||||
all: main run clean
|
all: main run clean
|
||||||
@ -30,8 +30,8 @@ io.o: io.c
|
|||||||
$(CC) $(CFLAGS) $(INCLUDES) -c io.c
|
$(CC) $(CFLAGS) $(INCLUDES) -c io.c
|
||||||
|
|
||||||
libzstd.a:
|
libzstd.a:
|
||||||
$(MAKE) -C ../../lib libzstd.a
|
$(MAKE) -C ../../../lib libzstd.a
|
||||||
mv ../../lib/libzstd.a .
|
mv ../../../lib/libzstd.a .
|
||||||
|
|
||||||
.PHONY: testrun
|
.PHONY: testrun
|
||||||
testrun: main
|
testrun: main
|
||||||
@ -48,5 +48,5 @@ testshell: test.sh
|
|||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean:
|
clean:
|
||||||
rm -f *.o main libzstd.a
|
rm -f *.o main libzstd.a
|
||||||
$(MAKE) -C ../../lib clean
|
$(MAKE) -C ../../../lib clean
|
||||||
echo "Cleaning is completed"
|
echo "Cleaning is completed"
|
@ -16,5 +16,5 @@ To build a random dictionary with the provided arguments: make ARG= followed by
|
|||||||
|
|
||||||
|
|
||||||
### Examples:
|
### Examples:
|
||||||
make ARG="in=../../lib/dictBuilder out=dict100 dictID=520"
|
make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520"
|
||||||
make ARG="in=../../lib/dictBuilder in=../../lib/compress"
|
make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
|
@ -53,6 +53,39 @@ static const size_t g_maxMemory = (sizeof(size_t) == 4) ?
|
|||||||
#define NOISELENGTH 32
|
#define NOISELENGTH 32
|
||||||
|
|
||||||
|
|
||||||
|
/*-*************************************
|
||||||
|
* Commandline related functions
|
||||||
|
***************************************/
|
||||||
|
unsigned readU32FromChar(const char** stringPtr){
|
||||||
|
const char errorMsg[] = "error: numeric value too large";
|
||||||
|
unsigned result = 0;
|
||||||
|
while ((**stringPtr >='0') && (**stringPtr <='9')) {
|
||||||
|
unsigned const max = (((unsigned)(-1)) / 10) - 1;
|
||||||
|
if (result > max) exit(1);
|
||||||
|
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
|
||||||
|
}
|
||||||
|
if ((**stringPtr=='K') || (**stringPtr=='M')) {
|
||||||
|
unsigned const maxK = ((unsigned)(-1)) >> 10;
|
||||||
|
if (result > maxK) exit(1);
|
||||||
|
result <<= 10;
|
||||||
|
if (**stringPtr=='M') {
|
||||||
|
if (result > maxK) exit(1);
|
||||||
|
result <<= 10;
|
||||||
|
}
|
||||||
|
(*stringPtr)++; /* skip `K` or `M` */
|
||||||
|
if (**stringPtr=='i') (*stringPtr)++;
|
||||||
|
if (**stringPtr=='B') (*stringPtr)++;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned longCommandWArg(const char** stringPtr, const char* longCommand){
|
||||||
|
size_t const comSize = strlen(longCommand);
|
||||||
|
int const result = !strncmp(*stringPtr, longCommand, comSize);
|
||||||
|
if (result) *stringPtr += comSize;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ********************************************************
|
/* ********************************************************
|
||||||
* File related operations
|
* File related operations
|
@ -50,5 +50,11 @@ void freeSampleInfo(sampleInfo *info);
|
|||||||
void saveDict(const char* dictFileName, const void* buff, size_t buffSize);
|
void saveDict(const char* dictFileName, const void* buff, size_t buffSize);
|
||||||
|
|
||||||
|
|
||||||
|
unsigned readU32FromChar(const char** stringPtr);
|
||||||
|
|
||||||
size_t findMaxMem(unsigned long long requiredMem);
|
/** longCommandWArg() :
|
||||||
|
* check if *stringPtr is the same as longCommand.
|
||||||
|
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
|
||||||
|
* @return 0 and doesn't modify *stringPtr otherwise.
|
||||||
|
*/
|
||||||
|
unsigned longCommandWArg(const char** stringPtr, const char* longCommand);
|
@ -52,46 +52,6 @@ static const unsigned g_defaultMaxDictSize = 110 KB;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*-*************************************
|
|
||||||
* Commandline related functions
|
|
||||||
***************************************/
|
|
||||||
static unsigned readU32FromChar(const char** stringPtr){
|
|
||||||
const char errorMsg[] = "error: numeric value too large";
|
|
||||||
unsigned result = 0;
|
|
||||||
while ((**stringPtr >='0') && (**stringPtr <='9')) {
|
|
||||||
unsigned const max = (((unsigned)(-1)) / 10) - 1;
|
|
||||||
if (result > max) exit(1);
|
|
||||||
result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
|
|
||||||
}
|
|
||||||
if ((**stringPtr=='K') || (**stringPtr=='M')) {
|
|
||||||
unsigned const maxK = ((unsigned)(-1)) >> 10;
|
|
||||||
if (result > maxK) exit(1);
|
|
||||||
result <<= 10;
|
|
||||||
if (**stringPtr=='M') {
|
|
||||||
if (result > maxK) exit(1);
|
|
||||||
result <<= 10;
|
|
||||||
}
|
|
||||||
(*stringPtr)++; /* skip `K` or `M` */
|
|
||||||
if (**stringPtr=='i') (*stringPtr)++;
|
|
||||||
if (**stringPtr=='B') (*stringPtr)++;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** longCommandWArg() :
|
|
||||||
* check if *stringPtr is the same as longCommand.
|
|
||||||
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
|
|
||||||
* @return 0 and doesn't modify *stringPtr otherwise.
|
|
||||||
*/
|
|
||||||
static unsigned longCommandWArg(const char** stringPtr, const char* longCommand){
|
|
||||||
size_t const comSize = strlen(longCommand);
|
|
||||||
int const result = !strncmp(*stringPtr, longCommand, comSize);
|
|
||||||
if (result) *stringPtr += comSize;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*-*************************************
|
/*-*************************************
|
||||||
* RANDOM
|
* RANDOM
|
||||||
***************************************/
|
***************************************/
|
@ -1,12 +1,12 @@
|
|||||||
echo "Building random dictionary with in=../../lib/common k=200 out=dict1"
|
echo "Building random dictionary with in=../../lib/common k=200 out=dict1"
|
||||||
./main in=../../lib/common k=200 out=dict1
|
./main in=../../../lib/common k=200 out=dict1
|
||||||
zstd -be3 -D dict1 -r ../../lib/common -q
|
zstd -be3 -D dict1 -r ../../../lib/common -q
|
||||||
echo "Building random dictionary with in=../../lib/common k=500 out=dict2 dictID=100 maxdict=140000"
|
echo "Building random dictionary with in=../../lib/common k=500 out=dict2 dictID=100 maxdict=140000"
|
||||||
./main in=../../lib/common k=500 out=dict2 dictID=100 maxdict=140000
|
./main in=../../../lib/common k=500 out=dict2 dictID=100 maxdict=140000
|
||||||
zstd -be3 -D dict2 -r ../../lib/common -q
|
zstd -be3 -D dict2 -r ../../../lib/common -q
|
||||||
echo "Building random dictionary with 2 sample sources"
|
echo "Building random dictionary with 2 sample sources"
|
||||||
./main in=../../lib/common in=../../lib/compress out=dict3
|
./main in=../../../lib/common in=../../../lib/compress out=dict3
|
||||||
zstd -be3 -D dict3 -r ../../lib/common -q
|
zstd -be3 -D dict3 -r ../../../lib/common -q
|
||||||
echo "Removing dict1 dict2 dict3"
|
echo "Removing dict1 dict2 dict3"
|
||||||
rm -f dict1 dict2 dict3
|
rm -f dict1 dict2 dict3
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user