benchmark can use dictionary

dev
Yann Collet 2015-12-18 01:26:48 +01:00
parent fdcad6d3e1
commit 31683c0b28
7 changed files with 134 additions and 81 deletions

View File

@ -2117,7 +2117,7 @@ size_t ZSTD_compressBegin(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, int comp
}
/** ZSTD_compressEnd
/*! ZSTD_compressEnd
* Write frame epilogue
* @return : nb of bytes written into dst (or an error code) */
size_t ZSTD_compressEnd(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize)
@ -2139,6 +2139,7 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize)
size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
void* dst, size_t maxDstSize,
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
ZSTD_parameters params)
{
BYTE* const ostart = (BYTE*)dst;
@ -2151,9 +2152,15 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
op += oSize;
maxDstSize -= oSize;
/* dictionary */
if (dict)
{
oSize = ZSTD_compress_insertDictionary(ctx, dict, dictSize);
if (ZSTD_isError(oSize)) return oSize;
}
/* body (compression) */
ctx->base = (const BYTE*)src;
oSize = ZSTD_compress_generic (ctx, op, maxDstSize, src, srcSize);
oSize = ZSTD_compressContinue (ctx, op, maxDstSize, src, srcSize);
if(ZSTD_isError(oSize)) return oSize;
op += oSize;
maxDstSize -= oSize;
@ -2166,9 +2173,14 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
return (op - ostart);
}
size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel)
{
return ZSTD_compress_advanced(ctx, dst, maxDstSize, src, srcSize, dict, dictSize, ZSTD_getParams(compressionLevel, srcSize+dictSize));
}
size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel)
{
return ZSTD_compress_advanced(ctx, dst, maxDstSize, src, srcSize, ZSTD_getParams(compressionLevel, srcSize));
return ZSTD_compress_advanced(ctx, dst, maxDstSize, src, srcSize, NULL, 0, ZSTD_getParams(compressionLevel, srcSize));
}
size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel)
@ -2181,40 +2193,3 @@ size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSi
return result;
}
size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
void* dst, size_t maxDstSize,
const void* src, size_t srcSize,
const void* dict, size_t dictSize,
int compressionLevel)
{
BYTE* const ostart = (BYTE*)dst;
BYTE* op = ostart;
size_t oSize;
/* Header */
oSize = ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, ZSTD_getParams(compressionLevel, srcSize+dictSize));
if (ZSTD_isError(oSize)) return oSize;
op += oSize;
maxDstSize -= oSize;
if (dict)
{
oSize = ZSTD_compress_insertDictionary(ctx, dict, dictSize);
if (ZSTD_isError(oSize)) return oSize;
}
/* body (compression) */
oSize = ZSTD_compressContinue(ctx, op, maxDstSize, src, srcSize);
if (ZSTD_isError(oSize)) return oSize;
op += oSize;
maxDstSize -= oSize;
/* Close frame */
oSize = ZSTD_compressEnd(ctx, op, maxDstSize);
if (ZSTD_isError(oSize)) return oSize;
op += oSize;
return (op - ostart);
}

View File

@ -676,7 +676,10 @@ static size_t ZSTD_decompressBlock(
}
size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
void* dst, size_t maxDstSize,
const void* src, size_t srcSize,
const void* dict, size_t dictSize)
{
const BYTE* ip = (const BYTE*)src;
const BYTE* iend = ip + srcSize;
@ -686,9 +689,19 @@ size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const v
size_t remainingSize = srcSize;
blockProperties_t blockProperties;
/* init */
ctx->vBase = ctx->base = ctx->dictEnd = dst;
ZSTD_resetDCtx(ctx);
if (dict)
{
ZSTD_decompress_insertDictionary(ctx, dict, dictSize);
ctx->dictEnd = ctx->previousDstEnd;
ctx->vBase = (const char*)dst - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
ctx->base = dst;
}
else
{
ctx->vBase = ctx->base = ctx->dictEnd = dst;
}
/* Frame Header */
{
@ -749,10 +762,16 @@ size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const v
return op-ostart;
}
size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
return ZSTD_decompress_usingDict(dctx, dst, maxDstSize, src, srcSize, NULL, 0);
}
size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
ZSTD_DCtx ctx;
return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
ZSTD_DCtx dctx;
return ZSTD_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize);
}

View File

@ -80,7 +80,7 @@ typedef struct
/* *************************************
* Advanced function
* Advanced functions
***************************************/
/** ZSTD_getParams
* return ZSTD_parameters structure for a selected compression level and srcSize.
@ -91,21 +91,40 @@ ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint
* correct params value to remain within authorized range */
ZSTDLIB_API void ZSTD_validateParams(ZSTD_parameters* params);
/** ZSTD_compress_advanced
* Same as ZSTD_compressCCtx(), with fine-tune control of each compression parameter */
ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
void* dst, size_t maxDstSize,
const void* src, size_t srcSize,
ZSTD_parameters params);
/** ZSTD_compress_usingDict
* Same as ZSTD_compressCCtx(), using a Dictionary content as prefix */
* Same as ZSTD_compressCCtx(), using a Dictionary content as prefix
* Note : dict can be NULL, in which case, it's equivalent to ZSTD_compressCCtx() */
ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
void* dst, size_t maxDstSize,
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
int compressionLevel);
/** ZSTD_compress_advanced
* Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter */
ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
void* dst, size_t maxDstSize,
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
ZSTD_parameters params);
/** Decompression context management */
typedef struct ZSTD_DCtx_s ZSTD_DCtx;
ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
/** ZSTD_decompressDCtx
* Same as ZSTD_decompress, with pre-allocated DCtx structure */
size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
/** ZSTD_decompress_usingDict
* Same as ZSTD_decompressDCtx, using a Dictionary content as prefix
* Note : dict can be NULL, in which case, it's equivalent to ZSTD_decompressDCtx() */
size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
void* dst, size_t maxDstSize,
const void* src, size_t srcSize,
const void* dict, size_t dictSize);
/* **************************************
* Streaming functions (direct mode)
@ -118,7 +137,7 @@ ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t maxD
ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize);
/**
Streaming compression, bufferless mode
Streaming compression, direct mode (bufferless)
A ZSTD_CCtx object is required to track streaming operations.
Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage it.
@ -139,14 +158,11 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSiz
Finish a frame with ZSTD_compressEnd(), which will write the epilogue.
Without it, the frame will be considered incomplete by decoders.
You can then re-use ZSTD_CCtx to compress new frames.
You can then reuse ZSTD_CCtx to compress new frames.
*/
typedef struct ZSTD_DCtx_s ZSTD_DCtx;
ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
ZSTDLIB_API size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx);
ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize);
ZSTDLIB_API void ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* src, size_t srcSize);
@ -168,7 +184,8 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ma
>0 : means there is not enough data into src. Provides the expected size to successfully decode header.
errorCode, which can be tested using ZSTD_isError() (For example, if it's not a ZSTD header)
Then, you can optionally insert a dictionary. This operation must mimic the compressor behavior, otherwise decompression will fail or be corrupted.
Then, you can optionally insert a dictionary.
This operation must mimic the compressor behavior, otherwise decompression will fail or be corrupted.
Then it's possible to start decompression.
Use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.

View File

@ -63,7 +63,7 @@
#endif
#include "mem.h"
#include "zstd.h"
#include "zstd_static.h"
#include "xxhash.h"
#include "datagen.h" /* RDG_genBuffer */
@ -215,7 +215,8 @@ typedef struct
static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
const char* displayName, int cLevel,
const size_t* fileSizes, U32 nbFiles)
const size_t* fileSizes, U32 nbFiles,
const void* dictBuffer, size_t dictBufferSize)
{
const size_t blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */
const U32 maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
@ -223,6 +224,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */
void* const compressedBuffer = malloc(maxCompressedSize);
void* const resultBuffer = malloc(srcSize);
ZSTD_CCtx* ctx = ZSTD_createCCtx();
ZSTD_DCtx* dctx = ZSTD_createDCtx();
U64 crcOrig = XXH64(srcBuffer, srcSize, 0);
U32 nbBlocks = 0;
@ -230,7 +233,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* can only display 17 characters */
/* Memory allocation & restrictions */
if (!compressedBuffer || !resultBuffer || !blockTable)
if (!compressedBuffer || !resultBuffer || !blockTable || !ctx || !dctx)
EXM_THROW(31, "not enough memory");
/* Init blockTable data */
@ -289,7 +292,11 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
while (BMK_GetMilliSpan(milliTime) < TIMELOOP)
{
for (blockNb=0; blockNb<nbBlocks; blockNb++)
blockTable[blockNb].cSize = ZSTD_compress(blockTable[blockNb].cPtr, blockTable[blockNb].cRoom, blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize, cLevel);
blockTable[blockNb].cSize = ZSTD_compress_usingDict(ctx,
blockTable[blockNb].cPtr, blockTable[blockNb].cRoom,
blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize,
dictBuffer, dictBufferSize,
cLevel);
nbLoops++;
}
milliTime = BMK_GetMilliSpan(milliTime);
@ -312,8 +319,10 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
for ( ; BMK_GetMilliSpan(milliTime) < TIMELOOP; nbLoops++)
{
for (blockNb=0; blockNb<nbBlocks; blockNb++)
blockTable[blockNb].resSize = ZSTD_decompress(blockTable[blockNb].resPtr, blockTable[blockNb].srcSize,
blockTable[blockNb].cPtr, blockTable[blockNb].cSize);
blockTable[blockNb].resSize = ZSTD_decompress_usingDict(dctx,
blockTable[blockNb].resPtr, blockTable[blockNb].srcSize,
blockTable[blockNb].cPtr, blockTable[blockNb].cSize,
dictBuffer, dictBufferSize);
}
milliTime = BMK_GetMilliSpan(milliTime);
@ -342,11 +351,15 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
if (crcOrig == crcCheck)
DISPLAY("%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s ,%6.1f MB/s \n", cLevel, displayName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.);
else
DISPLAY("X \n");
}
/* clean up */
free(compressedBuffer);
free(resultBuffer);
ZSTD_freeCCtx(ctx);
ZSTD_freeDCtx(dctx);
return 0;
}
@ -372,16 +385,23 @@ static size_t BMK_findMaxMem(U64 requiredMem)
static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
const char* displayName, int cLevel,
const size_t* fileSizes, unsigned nbFiles)
const size_t* fileSizes, unsigned nbFiles,
const void* dictBuffer, size_t dictBufferSize)
{
if (cLevel < 0)
{
int l;
for (l=1; l <= -cLevel; l++)
BMK_benchMem(srcBuffer, benchedSize, displayName, l, fileSizes, nbFiles);
BMK_benchMem(srcBuffer, benchedSize,
displayName, l,
fileSizes, nbFiles,
dictBuffer, dictBufferSize);
return;
}
BMK_benchMem(srcBuffer, benchedSize, displayName, cLevel, fileSizes, nbFiles);
BMK_benchMem(srcBuffer, benchedSize,
displayName, cLevel,
fileSizes, nbFiles,
dictBuffer, dictBufferSize);
}
static U64 BMK_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles)
@ -417,22 +437,37 @@ static void BMK_loadFiles(void* buffer, size_t bufferSize,
}
}
static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, int cLevel)
static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles,
const char* dictFileName, int cLevel)
{
void* srcBuffer;
size_t benchedSize;
size_t* fileSizes;
void* dictBuffer = NULL;
size_t dictBufferSize = 0;
size_t* fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
U64 totalSizeToLoad = BMK_getTotalFileSize(fileNamesTable, nbFiles);
char mfName[20] = {0};
const char* displayName = NULL;
if (!fileSizes) EXM_THROW(12, "not enough memory for fileSizes");
/* Load dictionary */
if (dictFileName != NULL)
{
U64 dictFileSize = BMK_getFileSize(dictFileName);
if (dictFileSize > 64 MB) EXM_THROW(10, "dictionary file %s too large", dictFileName);
dictBufferSize = (size_t)dictFileSize;
dictBuffer = malloc(dictBufferSize);
if (dictBuffer==NULL) EXM_THROW(11, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize);
BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1);
}
/* Memory allocation & restrictions */
benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
if (benchedSize < totalSizeToLoad)
DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20));
srcBuffer = malloc(benchedSize);
fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
if (!srcBuffer) EXM_THROW(12, "not enough memory");
/* Load input buffer */
@ -443,10 +478,14 @@ static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, in
if (nbFiles > 1) displayName = mfName;
else displayName = fileNamesTable[0];
BMK_benchCLevel(srcBuffer, benchedSize, displayName, cLevel, fileSizes, nbFiles);
BMK_benchCLevel(srcBuffer, benchedSize,
displayName, cLevel,
fileSizes, nbFiles,
dictBuffer, dictBufferSize);
/* clean up */
free(srcBuffer);
free(dictBuffer);
free(fileSizes);
}
@ -465,21 +504,22 @@ static void BMK_syntheticTest(int cLevel, double compressibility)
/* Bench */
snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100));
BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, &benchedSize, 1);
BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, &benchedSize, 1, NULL, 0);
/* clean up */
free(srcBuffer);
}
int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, int cLevel)
int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
const char* dictFileName, int cLevel)
{
double compressibility = (double)g_compressibilityDefault / 100;
if (nbFiles == 0)
BMK_syntheticTest(cLevel, compressibility);
else
BMK_benchFileTable(fileNamesTable, nbFiles, cLevel);
BMK_benchFileTable(fileNamesTable, nbFiles, dictFileName, cLevel);
return 0;
}

View File

@ -26,7 +26,8 @@
/* Main function */
int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, int cLevel);
int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
const char* dictFileName, int cLevel);
/* Set Parameters */
void BMK_SetNbIterations(int nbLoops);

View File

@ -342,6 +342,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
blockTable[blockNb].cSize = ZSTD_compress_advanced(ctx,
blockTable[blockNb].cPtr, blockTable[blockNb].cRoom,
blockTable[blockNb].srcPtr, blockTable[blockNb].srcSize,
NULL, 0,
params);
nbLoops++;
}

View File

@ -140,7 +140,7 @@ static int usage_advanced(const char* programName)
DISPLAY( " -V : display Version number and exit\n");
DISPLAY( " -v : verbose mode\n");
DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n");
DISPLAY( " -m : multiple input filenames mode");
DISPLAY( " -m : multiple input filenames mode \n");
DISPLAY( " -c : force write to standard output, even if it is the console\n");
DISPLAY( " -D file: use file content as Dictionary \n");
#ifndef ZSTD_NOBENCH
@ -354,7 +354,7 @@ int main(int argCount, const char** argv)
if (bench)
{
#ifndef ZSTD_NOBENCH
BMK_benchFiles(argv+fileNameStart, nbFiles, cLevel*rangeBench);
BMK_benchFiles(argv+fileNameStart, nbFiles, dictFileName, cLevel*rangeBench);
#endif
goto _end;
}