From 3d93f2fce75bb33374dece67cf16e2cffac846cb Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 27 Dec 2016 07:19:36 +0100 Subject: [PATCH 01/73] first zstdmt sketch --- lib/compress/zstdmt_compress.c | 310 +++++++++++++++++++++++++++++++++ lib/compress/zstdmt_compress.h | 12 ++ programs/Makefile | 2 +- programs/bench.c | 14 +- 4 files changed, 336 insertions(+), 2 deletions(-) create mode 100644 lib/compress/zstdmt_compress.c create mode 100644 lib/compress/zstdmt_compress.h diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c new file mode 100644 index 00000000..13cc1948 --- /dev/null +++ b/lib/compress/zstdmt_compress.c @@ -0,0 +1,310 @@ +#include /* malloc */ +#include +#include "zstd_internal.h" /* MIN, ERROR */ +#include "zstdmt_compress.h" + +#if 0 +# include + static unsigned g_debugLevel = 4; +# define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); } +#else +# define DEBUGLOG(l, ...) /* disabled */ +#endif + +#define ZSTDMT_NBTHREADS_MAX 128 +#define ZSTDMT_NBSTACKEDFRAMES_MAX (2*ZSTDMT_NBTHREADS_MAX) + +typedef struct frameToWrite_s { + const void* start; + size_t frameSize; + unsigned frameID; + unsigned isLastFrame; +} frameToWrite_t; + +typedef struct ZSTDMT_dstBuffer_s { + ZSTD_outBuffer out; + unsigned frameIDToWrite; + pthread_mutex_t frameTable_mutex; + pthread_mutex_t allFramesWritten_mutex; + frameToWrite_t stackedFrame[ZSTDMT_NBSTACKEDFRAMES_MAX]; + unsigned nbStackedFrames; +} ZSTDMT_dstBufferManager; + +static ZSTDMT_dstBufferManager ZSTDMT_createDstBufferManager(void* dst, size_t dstCapacity) +{ + ZSTDMT_dstBufferManager dbm; + dbm.out.dst = dst; + dbm.out.size = dstCapacity; + dbm.out.pos = 0; + dbm.frameIDToWrite = 0; + pthread_mutex_init(&dbm.frameTable_mutex, NULL); + pthread_mutex_init(&dbm.allFramesWritten_mutex, NULL); + pthread_mutex_lock(&dbm.allFramesWritten_mutex); + dbm.nbStackedFrames = 0; + return dbm; +} + +/* note : can fail if nbStackedFrames > ZSTDMT_NBSTACKEDFRAMES_MAX. + * note2 : can only be called from a section with frameTable_mutex already locked */ +static void ZSTDMT_stackFrameToWrite(ZSTDMT_dstBufferManager* dstBufferManager, frameToWrite_t frame) { + dstBufferManager->stackedFrame[dstBufferManager->nbStackedFrames++] = frame; +} + + +typedef struct buffer_s { + void* start; + size_t bufferSize; +} buffer_t; + +static buffer_t ZSTDMT_getDstBuffer(const ZSTDMT_dstBufferManager* dstBufferManager) +{ + ZSTD_outBuffer const out = dstBufferManager->out; + buffer_t buf; + buf.start = (char*)(out.dst) + out.pos; + buf.bufferSize = out.size - out.pos; + return buf; +} + +/* condition : stackNumber < dstBufferManager->nbStackedFrames. + * note : there can only be one write at a time, due to frameID condition */ +static size_t ZSTDMT_writeFrame(ZSTDMT_dstBufferManager* dstBufferManager, unsigned stackNumber) +{ + ZSTD_outBuffer const out = dstBufferManager->out; + size_t const frameSize = dstBufferManager->stackedFrame[stackNumber].frameSize; + const void* const frameStart = dstBufferManager->stackedFrame[stackNumber].start; + if (out.pos + frameSize > out.size) + return ERROR(dstSize_tooSmall); + DEBUGLOG(3, "writing frame %u (%u bytes) ", dstBufferManager->stackedFrame[stackNumber].frameID, (U32)frameSize); + memcpy((char*)out.dst + out.pos, frameStart, frameSize); + dstBufferManager->out.pos += frameSize; + dstBufferManager->frameIDToWrite = dstBufferManager->stackedFrame[stackNumber].frameID + 1; + return 0; +} + +static size_t ZSTDMT_tryWriteFrame(ZSTDMT_dstBufferManager* dstBufferManager, + const void* src, size_t srcSize, + unsigned frameID, unsigned isLastFrame) +{ + unsigned lastFrameWritten = 0; + + /* check if correct frame ordering; stack otherwise */ + DEBUGLOG(5, "considering writing frame %u ", frameID); + pthread_mutex_lock(&dstBufferManager->frameTable_mutex); + if (frameID != dstBufferManager->frameIDToWrite) { + DEBUGLOG(4, "writing frameID %u : not possible, waiting for %u ", frameID, dstBufferManager->frameIDToWrite); + frameToWrite_t frame = { src, srcSize, frameID, isLastFrame }; + ZSTDMT_stackFrameToWrite(dstBufferManager, frame); + pthread_mutex_unlock(&dstBufferManager->frameTable_mutex); + return 0; + } + pthread_mutex_unlock(&dstBufferManager->frameTable_mutex); + + /* write frame + * note : only one write possible due to frameID condition */ + DEBUGLOG(3, "writing frame %u (%u bytes) ", frameID, (U32)srcSize); + ZSTD_outBuffer const out = dstBufferManager->out; + if (out.pos + srcSize > out.size) + return ERROR(dstSize_tooSmall); + if (frameID) /* frameID==0 compress directly in dst buffer */ + memcpy((char*)out.dst + out.pos, src, srcSize); + dstBufferManager->out.pos += srcSize; + dstBufferManager->frameIDToWrite = frameID+1; + lastFrameWritten = isLastFrame; + + /* check if more frames are stacked */ + pthread_mutex_lock(&dstBufferManager->frameTable_mutex); + unsigned frameWritten = dstBufferManager->nbStackedFrames>0; + while (frameWritten) { + unsigned u; + frameID++; + frameWritten = 0; + for (u=0; unbStackedFrames; u++) { + if (dstBufferManager->stackedFrame[u].frameID == frameID) { + pthread_mutex_unlock(&dstBufferManager->frameTable_mutex); + { size_t const writeError = ZSTDMT_writeFrame(dstBufferManager, u); + if (ZSTD_isError(writeError)) return writeError; } + lastFrameWritten = dstBufferManager->stackedFrame[u].isLastFrame; + /* remove frame from stack */ + pthread_mutex_lock(&dstBufferManager->frameTable_mutex); + dstBufferManager->stackedFrame[u] = dstBufferManager->stackedFrame[dstBufferManager->nbStackedFrames-1]; + dstBufferManager->nbStackedFrames -= 1; + frameWritten = dstBufferManager->nbStackedFrames>0; + break; + } } } + pthread_mutex_unlock(&dstBufferManager->frameTable_mutex); + + /* end reached : last frame written */ + if (lastFrameWritten) pthread_mutex_unlock(&dstBufferManager->allFramesWritten_mutex); + return 0; +} + + + +typedef struct ZSTDMT_jobDescription_s { + const void* src; /* NULL means : kill thread */ + size_t srcSize; + int compressionLevel; + ZSTDMT_dstBufferManager* dstManager; + unsigned frameNumber; + unsigned isLastFrame; +} ZSTDMT_jobDescription; + +typedef struct ZSTDMT_jobAgency_s { + pthread_mutex_t jobAnnounce_mutex; + pthread_mutex_t jobApply_mutex; + ZSTDMT_jobDescription jobAnnounce; +} ZSTDMT_jobAgency; + +/* ZSTDMT_postjob() : + * This function is blocking as long as previous posted job is not taken. + * It could be made non-blocking, with a storage queue. + * But blocking has benefits : on top of memory savings, + * the caller will be able to measure delay, allowing dynamic speed throttle (via compression level). + */ +static void ZSTDMT_postjob(ZSTDMT_jobAgency* jobAgency, ZSTDMT_jobDescription job) +{ + DEBUGLOG(5, "starting job posting "); + pthread_mutex_lock(&jobAgency->jobApply_mutex); /* wait for a thread to take previous job */ + DEBUGLOG(5, "job posting mutex acquired "); + jobAgency->jobAnnounce = job; /* post job */ + pthread_mutex_unlock(&jobAgency->jobAnnounce_mutex); /* announce */ + DEBUGLOG(5, "job available now "); +} + +static ZSTDMT_jobDescription ZSTDMT_getjob(ZSTDMT_jobAgency* jobAgency) +{ + pthread_mutex_lock(&jobAgency->jobAnnounce_mutex); /* should check return code */ + ZSTDMT_jobDescription const job = jobAgency->jobAnnounce; + pthread_mutex_unlock(&jobAgency->jobApply_mutex); + return job; +} + + + +#define ZSTDMT_NBBUFFERSPOOLED_MAX ZSTDMT_NBTHREADS_MAX +typedef struct ZSTDMT_bufferPool_s { + buffer_t bTable[ZSTDMT_NBBUFFERSPOOLED_MAX]; + unsigned nbBuffers; +} ZSTDMT_bufferPool; + +static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) +{ + if (pool->nbBuffers) { /* try to use an existing buffer */ + pool->nbBuffers--; + buffer_t const buf = pool->bTable[pool->nbBuffers]; + size_t const availBufferSize = buf.bufferSize; + if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) /* large enough, but not too much */ + return buf; + free(buf.start); /* size conditions not respected : create a new buffer */ + } + /* create new buffer */ + buffer_t buf; + buf.bufferSize = bSize; + buf.start = calloc(1, bSize); + return buf; +} + +/* effectively store buffer for later re-use, up to pool capacity */ +static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf) +{ + if (pool->nbBuffers >= ZSTDMT_NBBUFFERSPOOLED_MAX) { + free(buf.start); + return; + } + pool->bTable[pool->nbBuffers++] = buf; /* store for later re-use */ +} + + + +struct ZSTDMT_CCtx_s { + pthread_t pthread[ZSTDMT_NBTHREADS_MAX]; + unsigned nbThreads; + ZSTDMT_jobAgency jobAgency; + ZSTDMT_bufferPool bufferPool; +}; + +static void* ZSTDMT_compressionThread(void* arg) +{ + if (arg==NULL) return NULL; /* error : should not be possible */ + ZSTDMT_CCtx* const cctx = (ZSTDMT_CCtx*) arg; + ZSTDMT_jobAgency* const jobAgency = &cctx->jobAgency; + ZSTDMT_bufferPool* const pool = &cctx->bufferPool; + for (;;) { + ZSTDMT_jobDescription const job = ZSTDMT_getjob(jobAgency); + if (job.src == NULL) { + DEBUGLOG(4, "thread exit ") + return NULL; + } + ZSTDMT_dstBufferManager* dstBufferManager = job.dstManager; + size_t const dstBufferCapacity = ZSTD_compressBound(job.srcSize); + DEBUGLOG(4, "requesting a dstBuffer for frame %u", job.frameNumber); + buffer_t const dstBuffer = job.frameNumber ? ZSTDMT_getBuffer(pool, dstBufferCapacity) : ZSTDMT_getDstBuffer(dstBufferManager); /* lack params */ + DEBUGLOG(4, "start compressing frame %u", job.frameNumber); + size_t const cSize = ZSTD_compress(dstBuffer.start, dstBuffer.bufferSize, job.src, job.srcSize, job.compressionLevel); + if (ZSTD_isError(cSize)) return (void*)(cSize); /* error */ + size_t const writeError = ZSTDMT_tryWriteFrame(dstBufferManager, dstBuffer.start, cSize, job.frameNumber, job.isLastFrame); /* pas clair */ + if (ZSTD_isError(writeError)) return (void*)writeError; + if (job.frameNumber) ZSTDMT_releaseBuffer(pool, dstBuffer); + } +} + +ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) +{ + if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL; + ZSTDMT_CCtx* const cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx)); + if (!cctx) return NULL; + pthread_mutex_init(&cctx->jobAgency.jobAnnounce_mutex, NULL); /* check return value ? */ + pthread_mutex_init(&cctx->jobAgency.jobApply_mutex, NULL); + pthread_mutex_lock(&cctx->jobAgency.jobAnnounce_mutex); /* no job at beginning */ + /* start all workers */ + cctx->nbThreads = nbThreads; + DEBUGLOG(2, "nbThreads : %u \n", nbThreads); + unsigned t; + for (t = 0; t < nbThreads; t++) { + pthread_create(&cctx->pthread[t], NULL, ZSTDMT_compressionThread, cctx); /* check return value ? */ + } + return cctx; +} + +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* cctx) +{ + /* free threads */ + /* free mutex (if necessary) */ + /* free bufferPool */ + free(cctx); /* incompleted ! */ + return 0; +} + +size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + ZSTDMT_jobAgency* jobAgency = &cctx->jobAgency; + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize, 0); + size_t const frameSizeTarget = (size_t)1 << (params.cParams.windowLog + 2); + unsigned const nbFrames = (unsigned)(srcSize / frameSizeTarget) + (srcSize < frameSizeTarget) /* min 1 */; + size_t const avgFrameSize = (srcSize + (nbFrames-1)) / nbFrames; + size_t remainingSrcSize = srcSize; + const char* const srcStart = (const char*)src; + size_t frameStartPos = 0; + ZSTDMT_dstBufferManager dbm = ZSTDMT_createDstBufferManager(dst, dstCapacity); + + DEBUGLOG(2, "windowLog : %u => frameSizeTarget : %u ", params.cParams.windowLog, (U32)frameSizeTarget); + DEBUGLOG(2, "nbFrames : %u (size : %u bytes) ", nbFrames, (U32)avgFrameSize); + + { unsigned u; + for (u=0; u /* size_t */ + +typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; + +ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads); +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* cctx); + +size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); diff --git a/programs/Makefile b/programs/Makefile index 8ec9fc69..156bf898 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -32,7 +32,7 @@ FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c -ZSTDCOMP_FILES := $(ZSTDDIR)/compress/zstd_compress.c $(ZSTDDIR)/compress/fse_compress.c $(ZSTDDIR)/compress/huf_compress.c +ZSTDCOMP_FILES := $(ZSTDDIR)/compress/*.c ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/huf_decompress.c ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) ZDICT_FILES := $(ZSTDDIR)/dictBuilder/*.c diff --git a/programs/bench.c b/programs/bench.c index 9a4732a3..4059072f 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -115,6 +115,7 @@ void BMK_SetBlockSize(size_t blockSize) void BMK_setDecodeOnly(unsigned decodeFlag) { g_decodeOnly = (decodeFlag>0); } + /* ******************************************************** * Bench functions **********************************************************/ @@ -132,6 +133,8 @@ typedef struct { #define MIN(a,b) ((a)<(b) ? (a) : (b)) #define MAX(a,b) ((a)>(b) ? (a) : (b)) +#include "compress/zstdmt_compress.h" + static int BMK_benchMem(const void* srcBuffer, size_t srcSize, const char* displayName, int cLevel, const size_t* fileSizes, U32 nbFiles, @@ -153,6 +156,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, U32 nbBlocks; UTIL_time_t ticksPerSecond; + ZSTDMT_CCtx* const mtcctx = ZSTDMT_createCCtx(1); + /* checks */ if (!compressedBuffer || !resultBuffer || !blockTable || !ctx || !dctx) EXM_THROW(31, "allocation error : not enough memory"); @@ -264,6 +269,11 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, blockTable[blockNb].cPtr, blockTable[blockNb].cRoom, blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize, cdict); + } else if (1) { + rSize = ZSTDMT_compressCCtx(mtcctx, + blockTable[blockNb].cPtr, blockTable[blockNb].cRoom, + blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize, + cLevel); } else { rSize = ZSTD_compress_advanced (ctx, blockTable[blockNb].cPtr, blockTable[blockNb].cRoom, @@ -292,8 +302,10 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, memcpy(compressedBuffer, srcBuffer, loadedCompressedSize); } - (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ #if 1 + dCompleted=1; + (void)totalDTime; (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ +#else /* Decompression */ if (!dCompleted) memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ From ce9e1452fd0ab175d0f2c8b37639c05978b08fdd Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 28 Dec 2016 15:31:19 +0100 Subject: [PATCH 02/73] protect buffer pool with a mutex --- lib/compress/zstdmt_compress.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 13cc1948..c698dce0 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -1,5 +1,5 @@ #include /* malloc */ -#include +#include /* posix only, to be replaced by a more portable version */ #include "zstd_internal.h" /* MIN, ERROR */ #include "zstdmt_compress.h" @@ -39,7 +39,7 @@ static ZSTDMT_dstBufferManager ZSTDMT_createDstBufferManager(void* dst, size_t d dbm.frameIDToWrite = 0; pthread_mutex_init(&dbm.frameTable_mutex, NULL); pthread_mutex_init(&dbm.allFramesWritten_mutex, NULL); - pthread_mutex_lock(&dbm.allFramesWritten_mutex); + pthread_mutex_lock(&dbm.allFramesWritten_mutex); /* maybe could be merged into init ? */ dbm.nbStackedFrames = 0; return dbm; } @@ -92,7 +92,7 @@ static size_t ZSTDMT_tryWriteFrame(ZSTDMT_dstBufferManager* dstBufferManager, pthread_mutex_lock(&dstBufferManager->frameTable_mutex); if (frameID != dstBufferManager->frameIDToWrite) { DEBUGLOG(4, "writing frameID %u : not possible, waiting for %u ", frameID, dstBufferManager->frameIDToWrite); - frameToWrite_t frame = { src, srcSize, frameID, isLastFrame }; + frameToWrite_t const frame = { src, srcSize, frameID, isLastFrame }; ZSTDMT_stackFrameToWrite(dstBufferManager, frame); pthread_mutex_unlock(&dstBufferManager->frameTable_mutex); return 0; @@ -121,9 +121,11 @@ static size_t ZSTDMT_tryWriteFrame(ZSTDMT_dstBufferManager* dstBufferManager, for (u=0; unbStackedFrames; u++) { if (dstBufferManager->stackedFrame[u].frameID == frameID) { pthread_mutex_unlock(&dstBufferManager->frameTable_mutex); + DEBUGLOG(4, "catch up frame %u ", frameID); { size_t const writeError = ZSTDMT_writeFrame(dstBufferManager, u); if (ZSTD_isError(writeError)) return writeError; } lastFrameWritten = dstBufferManager->stackedFrame[u].isLastFrame; + dstBufferManager->frameIDToWrite = frameID+1; /* remove frame from stack */ pthread_mutex_lock(&dstBufferManager->frameTable_mutex); dstBufferManager->stackedFrame[u] = dstBufferManager->stackedFrame[dstBufferManager->nbStackedFrames-1]; @@ -183,20 +185,24 @@ static ZSTDMT_jobDescription ZSTDMT_getjob(ZSTDMT_jobAgency* jobAgency) #define ZSTDMT_NBBUFFERSPOOLED_MAX ZSTDMT_NBTHREADS_MAX typedef struct ZSTDMT_bufferPool_s { + pthread_mutex_t bufferPool_mutex; buffer_t bTable[ZSTDMT_NBBUFFERSPOOLED_MAX]; unsigned nbBuffers; } ZSTDMT_bufferPool; static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) { + pthread_mutex_lock(&pool->bufferPool_mutex); if (pool->nbBuffers) { /* try to use an existing buffer */ pool->nbBuffers--; buffer_t const buf = pool->bTable[pool->nbBuffers]; + pthread_mutex_unlock(&pool->bufferPool_mutex); size_t const availBufferSize = buf.bufferSize; if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) /* large enough, but not too much */ return buf; free(buf.start); /* size conditions not respected : create a new buffer */ } + pthread_mutex_unlock(&pool->bufferPool_mutex); /* create new buffer */ buffer_t buf; buf.bufferSize = bSize; @@ -207,11 +213,14 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) /* effectively store buffer for later re-use, up to pool capacity */ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf) { + pthread_mutex_lock(&pool->bufferPool_mutex); if (pool->nbBuffers >= ZSTDMT_NBBUFFERSPOOLED_MAX) { + pthread_mutex_unlock(&pool->bufferPool_mutex); free(buf.start); return; } pool->bTable[pool->nbBuffers++] = buf; /* store for later re-use */ + pthread_mutex_unlock(&pool->bufferPool_mutex); } @@ -253,9 +262,12 @@ ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL; ZSTDMT_CCtx* const cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx)); if (!cctx) return NULL; + /* init jobAgency */ pthread_mutex_init(&cctx->jobAgency.jobAnnounce_mutex, NULL); /* check return value ? */ pthread_mutex_init(&cctx->jobAgency.jobApply_mutex, NULL); pthread_mutex_lock(&cctx->jobAgency.jobAnnounce_mutex); /* no job at beginning */ + /* init bufferPool */ + pthread_mutex_init(&cctx->bufferPool.bufferPool_mutex, NULL); /* start all workers */ cctx->nbThreads = nbThreads; DEBUGLOG(2, "nbThreads : %u \n", nbThreads); From ab7a579180bb26b51358bfc9acddc561732d16f4 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 28 Dec 2016 16:11:09 +0100 Subject: [PATCH 03/73] added -T command , to set nb of threads --- programs/bench.c | 7 +++++-- programs/bench.h | 7 ++++--- programs/zstdcli.c | 8 +++++++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 4059072f..6009ebc7 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -96,6 +96,7 @@ static U32 g_nbSeconds = NBSECONDS; static size_t g_blockSize = 0; static int g_additionalParam = 0; static U32 g_decodeOnly = 0; +static U32 g_nbThreads = 1; void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; } @@ -113,7 +114,9 @@ void BMK_SetBlockSize(size_t blockSize) DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10)); } -void BMK_setDecodeOnly(unsigned decodeFlag) { g_decodeOnly = (decodeFlag>0); } +void BMK_setDecodeOnlyMode(unsigned decodeFlag) { g_decodeOnly = (decodeFlag>0); } + +void BMK_SetNbThreads(unsigned nbThreads) { g_nbThreads = nbThreads; } /* ******************************************************** @@ -156,7 +159,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, U32 nbBlocks; UTIL_time_t ticksPerSecond; - ZSTDMT_CCtx* const mtcctx = ZSTDMT_createCCtx(1); + ZSTDMT_CCtx* const mtcctx = ZSTDMT_createCCtx(g_nbThreads); /* checks */ if (!compressedBuffer || !resultBuffer || !blockTable || !ctx || !dctx) diff --git a/programs/bench.h b/programs/bench.h index 314f3465..87850bcc 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -15,14 +15,15 @@ #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ #include "zstd.h" /* ZSTD_compressionParameters */ -int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,const char* dictFileName, +int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,const char* dictFileName, int cLevel, int cLevelLast, ZSTD_compressionParameters* compressionParams); /* Set Parameters */ void BMK_SetNbSeconds(unsigned nbLoops); void BMK_SetBlockSize(size_t blockSize); -void BMK_setAdditionalParam(int additionalParam); +void BMK_SetNbThreads(unsigned nbThreads); void BMK_setNotificationLevel(unsigned level); -void BMK_setDecodeOnly(unsigned decodeFlag); +void BMK_setAdditionalParam(int additionalParam); +void BMK_setDecodeOnlyMode(unsigned decodeFlag); #endif /* BENCH_H_121279284357 */ diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 978ffcfe..03ad1ac7 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -352,7 +352,7 @@ int main(int argCount, const char* argv[]) /* Decoding */ case 'd': #ifndef ZSTD_NOBENCH - if (operation==zom_bench) { BMK_setDecodeOnly(1); argument++; break; } /* benchmark decode (hidden option) */ + if (operation==zom_bench) { BMK_setDecodeOnlyMode(1); argument++; break; } /* benchmark decode (hidden option) */ #endif operation=zom_decompress; argument++; break; @@ -430,6 +430,12 @@ int main(int argCount, const char* argv[]) dictSelect = readU32FromChar(&argument); break; + /* nb of threads (hidden option) */ + case 'T': + argument++; + BMK_SetNbThreads(readU32FromChar(&argument)); + break; + /* Pause at the end (-p) or set an additional param (-p#) (hidden option) */ case 'p': argument++; #ifndef ZSTD_NOBENCH From 6c0ed9483aab0b0d95e593723f2ef0bdd55160e7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 28 Dec 2016 17:08:28 +0100 Subject: [PATCH 04/73] compression threads use ZSTD_compressCCtx() --- lib/compress/zstdmt_compress.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index c698dce0..0f14dbf3 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -235,13 +235,16 @@ struct ZSTDMT_CCtx_s { static void* ZSTDMT_compressionThread(void* arg) { if (arg==NULL) return NULL; /* error : should not be possible */ - ZSTDMT_CCtx* const cctx = (ZSTDMT_CCtx*) arg; - ZSTDMT_jobAgency* const jobAgency = &cctx->jobAgency; - ZSTDMT_bufferPool* const pool = &cctx->bufferPool; + ZSTDMT_CCtx* const mtctx = (ZSTDMT_CCtx*) arg; + ZSTDMT_jobAgency* const jobAgency = &mtctx->jobAgency; + ZSTDMT_bufferPool* const pool = &mtctx->bufferPool; + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + if (cctx==NULL) return NULL; /* allocation failure : thread not started */ for (;;) { ZSTDMT_jobDescription const job = ZSTDMT_getjob(jobAgency); if (job.src == NULL) { - DEBUGLOG(4, "thread exit ") + DEBUGLOG(4, "thread exit "); + ZSTD_freeCCtx(cctx); return NULL; } ZSTDMT_dstBufferManager* dstBufferManager = job.dstManager; @@ -249,7 +252,8 @@ static void* ZSTDMT_compressionThread(void* arg) DEBUGLOG(4, "requesting a dstBuffer for frame %u", job.frameNumber); buffer_t const dstBuffer = job.frameNumber ? ZSTDMT_getBuffer(pool, dstBufferCapacity) : ZSTDMT_getDstBuffer(dstBufferManager); /* lack params */ DEBUGLOG(4, "start compressing frame %u", job.frameNumber); - size_t const cSize = ZSTD_compress(dstBuffer.start, dstBuffer.bufferSize, job.src, job.srcSize, job.compressionLevel); + //size_t const cSize = ZSTD_compress(dstBuffer.start, dstBuffer.bufferSize, job.src, job.srcSize, job.compressionLevel); + size_t const cSize = ZSTD_compressCCtx(cctx, dstBuffer.start, dstBuffer.bufferSize, job.src, job.srcSize, job.compressionLevel); if (ZSTD_isError(cSize)) return (void*)(cSize); /* error */ size_t const writeError = ZSTDMT_tryWriteFrame(dstBufferManager, dstBuffer.start, cSize, job.frameNumber, job.isLastFrame); /* pas clair */ if (ZSTD_isError(writeError)) return (void*)writeError; From e70912c72bcafdf4f8b43a25017eb579a85b97f6 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 29 Dec 2016 01:24:01 +0100 Subject: [PATCH 05/73] Changed : input divided into roughly equal parts. Debug : can measure time waiting for mutexes to unlock. --- lib/compress/zstdmt_compress.c | 60 ++++++++++++++++++++++++++-------- programs/bench.c | 4 +-- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 0f14dbf3..c86be870 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -5,12 +5,41 @@ #if 0 # include - static unsigned g_debugLevel = 4; +# include +# include + static unsigned g_debugLevel = 2; # define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); } + +static unsigned long long GetCurrentClockTimeMicroseconds() +{ + static clock_t _ticksPerSecond = 0; + if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK); + + struct tms junk; clock_t newTicks = (clock_t) times(&junk); + return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); +} + +#define MUTEX_WAIT_TIME_DLEVEL 5 +#define PTHREAD_MUTEX_LOCK(mutex) \ +if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \ + unsigned long long beforeTime = GetCurrentClockTimeMicroseconds(); \ + pthread_mutex_lock(mutex); \ + unsigned long long afterTime = GetCurrentClockTimeMicroseconds(); \ + unsigned long long elapsedTime = (afterTime-beforeTime); \ + if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \ + DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread %li took %llu microseconds to acquire mutex %s \n", \ + (long int) pthread_self(), elapsedTime, #mutex); \ + } \ +} else pthread_mutex_lock(mutex); + #else + # define DEBUGLOG(l, ...) /* disabled */ +# define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m) + #endif + #define ZSTDMT_NBTHREADS_MAX 128 #define ZSTDMT_NBSTACKEDFRAMES_MAX (2*ZSTDMT_NBTHREADS_MAX) @@ -38,8 +67,9 @@ static ZSTDMT_dstBufferManager ZSTDMT_createDstBufferManager(void* dst, size_t d dbm.out.pos = 0; dbm.frameIDToWrite = 0; pthread_mutex_init(&dbm.frameTable_mutex, NULL); - pthread_mutex_init(&dbm.allFramesWritten_mutex, NULL); - pthread_mutex_lock(&dbm.allFramesWritten_mutex); /* maybe could be merged into init ? */ + pthread_mutex_t* const allFramesWritten_mutex = &dbm.allFramesWritten_mutex; + pthread_mutex_init(allFramesWritten_mutex, NULL); + PTHREAD_MUTEX_LOCK(allFramesWritten_mutex); /* maybe could be merged into init ? */ dbm.nbStackedFrames = 0; return dbm; } @@ -89,7 +119,7 @@ static size_t ZSTDMT_tryWriteFrame(ZSTDMT_dstBufferManager* dstBufferManager, /* check if correct frame ordering; stack otherwise */ DEBUGLOG(5, "considering writing frame %u ", frameID); - pthread_mutex_lock(&dstBufferManager->frameTable_mutex); + PTHREAD_MUTEX_LOCK(&dstBufferManager->frameTable_mutex); if (frameID != dstBufferManager->frameIDToWrite) { DEBUGLOG(4, "writing frameID %u : not possible, waiting for %u ", frameID, dstBufferManager->frameIDToWrite); frameToWrite_t const frame = { src, srcSize, frameID, isLastFrame }; @@ -112,7 +142,7 @@ static size_t ZSTDMT_tryWriteFrame(ZSTDMT_dstBufferManager* dstBufferManager, lastFrameWritten = isLastFrame; /* check if more frames are stacked */ - pthread_mutex_lock(&dstBufferManager->frameTable_mutex); + PTHREAD_MUTEX_LOCK(&dstBufferManager->frameTable_mutex); unsigned frameWritten = dstBufferManager->nbStackedFrames>0; while (frameWritten) { unsigned u; @@ -127,7 +157,7 @@ static size_t ZSTDMT_tryWriteFrame(ZSTDMT_dstBufferManager* dstBufferManager, lastFrameWritten = dstBufferManager->stackedFrame[u].isLastFrame; dstBufferManager->frameIDToWrite = frameID+1; /* remove frame from stack */ - pthread_mutex_lock(&dstBufferManager->frameTable_mutex); + PTHREAD_MUTEX_LOCK(&dstBufferManager->frameTable_mutex); dstBufferManager->stackedFrame[u] = dstBufferManager->stackedFrame[dstBufferManager->nbStackedFrames-1]; dstBufferManager->nbStackedFrames -= 1; frameWritten = dstBufferManager->nbStackedFrames>0; @@ -166,7 +196,7 @@ typedef struct ZSTDMT_jobAgency_s { static void ZSTDMT_postjob(ZSTDMT_jobAgency* jobAgency, ZSTDMT_jobDescription job) { DEBUGLOG(5, "starting job posting "); - pthread_mutex_lock(&jobAgency->jobApply_mutex); /* wait for a thread to take previous job */ + PTHREAD_MUTEX_LOCK(&jobAgency->jobApply_mutex); /* wait for a thread to take previous job */ DEBUGLOG(5, "job posting mutex acquired "); jobAgency->jobAnnounce = job; /* post job */ pthread_mutex_unlock(&jobAgency->jobAnnounce_mutex); /* announce */ @@ -175,7 +205,7 @@ static void ZSTDMT_postjob(ZSTDMT_jobAgency* jobAgency, ZSTDMT_jobDescription jo static ZSTDMT_jobDescription ZSTDMT_getjob(ZSTDMT_jobAgency* jobAgency) { - pthread_mutex_lock(&jobAgency->jobAnnounce_mutex); /* should check return code */ + PTHREAD_MUTEX_LOCK(&jobAgency->jobAnnounce_mutex); /* should check return code */ ZSTDMT_jobDescription const job = jobAgency->jobAnnounce; pthread_mutex_unlock(&jobAgency->jobApply_mutex); return job; @@ -192,7 +222,7 @@ typedef struct ZSTDMT_bufferPool_s { static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) { - pthread_mutex_lock(&pool->bufferPool_mutex); + PTHREAD_MUTEX_LOCK(&pool->bufferPool_mutex); if (pool->nbBuffers) { /* try to use an existing buffer */ pool->nbBuffers--; buffer_t const buf = pool->bTable[pool->nbBuffers]; @@ -213,7 +243,7 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) /* effectively store buffer for later re-use, up to pool capacity */ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf) { - pthread_mutex_lock(&pool->bufferPool_mutex); + PTHREAD_MUTEX_LOCK(&pool->bufferPool_mutex); if (pool->nbBuffers >= ZSTDMT_NBBUFFERSPOOLED_MAX) { pthread_mutex_unlock(&pool->bufferPool_mutex); free(buf.start); @@ -240,6 +270,7 @@ static void* ZSTDMT_compressionThread(void* arg) ZSTDMT_bufferPool* const pool = &mtctx->bufferPool; ZSTD_CCtx* const cctx = ZSTD_createCCtx(); if (cctx==NULL) return NULL; /* allocation failure : thread not started */ + DEBUGLOG(3, "thread %li created ", (long int)pthread_self()); for (;;) { ZSTDMT_jobDescription const job = ZSTDMT_getjob(jobAgency); if (job.src == NULL) { @@ -254,7 +285,7 @@ static void* ZSTDMT_compressionThread(void* arg) DEBUGLOG(4, "start compressing frame %u", job.frameNumber); //size_t const cSize = ZSTD_compress(dstBuffer.start, dstBuffer.bufferSize, job.src, job.srcSize, job.compressionLevel); size_t const cSize = ZSTD_compressCCtx(cctx, dstBuffer.start, dstBuffer.bufferSize, job.src, job.srcSize, job.compressionLevel); - if (ZSTD_isError(cSize)) return (void*)(cSize); /* error */ + if (ZSTD_isError(cSize)) return (void*)(cSize); /* error - find a better way */ size_t const writeError = ZSTDMT_tryWriteFrame(dstBufferManager, dstBuffer.start, cSize, job.frameNumber, job.isLastFrame); /* pas clair */ if (ZSTD_isError(writeError)) return (void*)writeError; if (job.frameNumber) ZSTDMT_releaseBuffer(pool, dstBuffer); @@ -269,7 +300,7 @@ ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) /* init jobAgency */ pthread_mutex_init(&cctx->jobAgency.jobAnnounce_mutex, NULL); /* check return value ? */ pthread_mutex_init(&cctx->jobAgency.jobApply_mutex, NULL); - pthread_mutex_lock(&cctx->jobAgency.jobAnnounce_mutex); /* no job at beginning */ + PTHREAD_MUTEX_LOCK(&cctx->jobAgency.jobAnnounce_mutex); /* no job at beginning */ /* init bufferPool */ pthread_mutex_init(&cctx->bufferPool.bufferPool_mutex, NULL); /* start all workers */ @@ -299,7 +330,8 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx, ZSTDMT_jobAgency* jobAgency = &cctx->jobAgency; ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize, 0); size_t const frameSizeTarget = (size_t)1 << (params.cParams.windowLog + 2); - unsigned const nbFrames = (unsigned)(srcSize / frameSizeTarget) + (srcSize < frameSizeTarget) /* min 1 */; + unsigned const nbFramesMax = (unsigned)(srcSize / frameSizeTarget) + (srcSize < frameSizeTarget) /* min 1 */; + unsigned const nbFrames = MIN(nbFramesMax, cctx->nbThreads); size_t const avgFrameSize = (srcSize + (nbFrames-1)) / nbFrames; size_t remainingSrcSize = srcSize; const char* const srcStart = (const char*)src; @@ -320,7 +352,7 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx, remainingSrcSize -= frameSize; } } - pthread_mutex_lock(&dbm.allFramesWritten_mutex); + PTHREAD_MUTEX_LOCK(&dbm.allFramesWritten_mutex); DEBUGLOG(4, "compressed size : %u ", (U32)dbm.out.pos); return dbm.out.pos; } diff --git a/programs/bench.c b/programs/bench.c index 6009ebc7..b5cc77ee 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -159,8 +159,6 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, U32 nbBlocks; UTIL_time_t ticksPerSecond; - ZSTDMT_CCtx* const mtcctx = ZSTDMT_createCCtx(g_nbThreads); - /* checks */ if (!compressedBuffer || !resultBuffer || !blockTable || !ctx || !dctx) EXM_THROW(31, "allocation error : not enough memory"); @@ -228,6 +226,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, const char* const marks[NB_MARKS] = { " |", " /", " =", "\\" }; U32 markNb = 0; + ZSTDMT_CCtx* const mtcctx = ZSTDMT_createCCtx(g_nbThreads); + UTIL_getTime(&coolTime); DISPLAYLEVEL(2, "\r%79s\r", ""); while (!cCompleted || !dCompleted) { From e777a5be6bbf1981b91009feddf3daf9f8051d2c Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 29 Dec 2016 23:39:44 -0800 Subject: [PATCH 06/73] Add a thread pool for ZSTDMT and COVER --- lib/common/pool.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++ lib/common/pool.h | 45 +++++++++++ 2 files changed, 235 insertions(+) create mode 100644 lib/common/pool.c create mode 100644 lib/common/pool.h diff --git a/lib/common/pool.c b/lib/common/pool.c new file mode 100644 index 00000000..bea48f31 --- /dev/null +++ b/lib/common/pool.c @@ -0,0 +1,190 @@ +/** + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#include "pool.h" +#include /* size_t */ +#include /* malloc, calloc, free */ + +#ifdef ZSTD_PTHREAD + +#include + +/* A job is a function and an opaque argument */ +typedef struct POOL_job_s { + POOL_function function; + void *opaque; +} POOL_job; + +struct POOL_ctx_s { + /* Keep track of the threads */ + pthread_t *threads; + size_t numThreads; + + /* The queue is a circular buffer */ + POOL_job *queue; + size_t queueHead; + size_t queueTail; + size_t queueSize; + /* The mutex protects the queue */ + pthread_mutex_t queueMutex; + /* Condition variable for pushers to wait on when the queue is full */ + pthread_cond_t queuePushCond; + /* Condition variables for poppers to wait on when the queue is empty */ + pthread_cond_t queuePopCond; + /* Indicates if the queue is shutting down */ + int shutdown; +}; + +/* POOL_thread() : + Work thread for the thread pool. + Waits for jobs and executes them. + @returns : NULL on failure else non-null. +*/ +static void *POOL_thread(void *opaque) { + POOL_ctx *ctx = (POOL_ctx *)opaque; + if (!ctx) { return NULL; } + for (;;) { + /* Lock the mutex and wait for a non-empty queue or until shutdown */ + if (pthread_mutex_lock(&ctx->queueMutex)) { return NULL; } + while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) { + if (pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex)) { return NULL; } + } + /* empty => shutting down: so stop */ + if (ctx->queueHead == ctx->queueTail) { + if (pthread_mutex_unlock(&ctx->queueMutex)) { return NULL; } + return opaque; + } + { + /* Pop a job off the queue */ + POOL_job job = ctx->queue[ctx->queueHead]; + ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; + /* Unlock the mutex, signal a pusher, and run the job */ + if (pthread_mutex_unlock(&ctx->queueMutex)) { return NULL; } + if (pthread_cond_signal(&ctx->queuePushCond)) { return NULL; } + job.function(job.opaque); + } + } + /* Unreachable */ +} + +POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) { + int err = 0; + POOL_ctx *ctx; + /* Check the parameters */ + if (!numThreads || !queueSize) { return NULL; } + /* Allocate the context and zero initialize */ + ctx = (POOL_ctx *)calloc(1, sizeof(POOL_ctx)); + if (!ctx) { return NULL; } + /* Initialize the job queue. + * It needs one extra space since one space is wasted to differentiate empty + * and full queues. + */ + ctx->queueSize = queueSize + 1; + ctx->queue = (POOL_job *)malloc(ctx->queueSize * sizeof(POOL_job)); + ctx->queueHead = 0; + ctx->queueTail = 0; + err |= pthread_mutex_init(&ctx->queueMutex, NULL); + err |= pthread_cond_init(&ctx->queuePushCond, NULL); + err |= pthread_cond_init(&ctx->queuePopCond, NULL); + ctx->shutdown = 0; + /* Allocate space for the thread handles */ + ctx->threads = (pthread_t *)malloc(numThreads * sizeof(pthread_t)); + ctx->numThreads = 0; + /* Check for errors */ + if (!ctx->threads || !ctx->queue || err) { POOL_free(ctx); return NULL; } + /* Initialize the threads */ + { size_t i; + for (i = 0; i < numThreads; ++i) { + if (pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { + ctx->numThreads = i; + POOL_free(ctx); + return NULL; + } + } + ctx->numThreads = numThreads; + } + return ctx; +} + +/*! POOL_join() : + Shutdown the queue, wake any sleeping threads, and join all of the threads. +*/ +static void POOL_join(POOL_ctx *ctx) { + /* Shut down the queue */ + pthread_mutex_lock(&ctx->queueMutex); + ctx->shutdown = 1; + pthread_mutex_unlock(&ctx->queueMutex); + /* Wake up sleeping threads */ + pthread_cond_broadcast(&ctx->queuePushCond); + pthread_cond_broadcast(&ctx->queuePopCond); + /* Join all of the threads */ + { size_t i; + for (i = 0; i < ctx->numThreads; ++i) { + pthread_join(ctx->threads[i], NULL); + } + } +} + +void POOL_free(POOL_ctx *ctx) { + if (!ctx) { return; } + POOL_join(ctx); + pthread_mutex_destroy(&ctx->queueMutex); + pthread_cond_destroy(&ctx->queuePushCond); + pthread_cond_destroy(&ctx->queuePopCond); + if (ctx->queue) free(ctx->queue); + if (ctx->threads) free(ctx->threads); + free(ctx); +} + +void POOL_add(void *ctxVoid, POOL_function function, void *opaque) { + POOL_ctx *ctx = (POOL_ctx *)ctxVoid; + if (!ctx) { return; } + + pthread_mutex_lock(&ctx->queueMutex); + { + POOL_job job = {function, opaque}; + /* Wait until there is space in the queue for the new job */ + size_t newTail = (ctx->queueTail + 1) % ctx->queueSize; + while (ctx->queueHead == newTail && !ctx->shutdown) { + pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); + newTail = (ctx->queueTail + 1) % ctx->queueSize; + } + /* The queue is still going => there is space */ + if (!ctx->shutdown) { + ctx->queue[ctx->queueTail] = job; + ctx->queueTail = newTail; + } + } + pthread_mutex_unlock(&ctx->queueMutex); + pthread_cond_signal(&ctx->queuePopCond); +} + +#else + +/* We don't need any data, but if it is empty malloc() might return NULL. */ +struct POOL_ctx_s { + int data; +}; + +POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) { + (void)numThreads; + (void)queueSize; + return (POOL_ctx *)malloc(sizeof(POOL_ctx)); +} + +void POOL_free(POOL_ctx *ctx) { + if (ctx) free(ctx); +} + +void POOL_add(void *ctx, POOL_function function, void *opaque) { + (void)ctx; + function(opaque); +} + +#endif diff --git a/lib/common/pool.h b/lib/common/pool.h new file mode 100644 index 00000000..f4afc1ee --- /dev/null +++ b/lib/common/pool.h @@ -0,0 +1,45 @@ +/** + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ +#ifndef POOL_H +#define POOL_H + +#include /* size_t */ + +typedef struct POOL_ctx_s POOL_ctx; + +/*! POOL_create() : + Create a thread pool with at most `numThreads` threads. + `numThreads` must be at least 1. + The maximum number of queued jobs before blocking is `queueSize`. + `queueSize` must be at least 1. + @return : The POOL_ctx pointer on success else NULL. +*/ +POOL_ctx *POOL_create(size_t numThreads, size_t queueSize); + +/*! POOL_free() : + Free a thread pool returned by POOL_create(). +*/ +void POOL_free(POOL_ctx *ctx); + +/*! POOL_function : + The function type that can be added to a thread pool. +*/ +typedef void (*POOL_function)(void *); +/*! POOL_add_function : + The function type for a generic thread pool add function. +*/ +typedef void (*POOL_add_function)(void *, POOL_function, void *); + +/*! POOL_add() : + Add the job `function(opaque)` to the thread pool. + Possibly blocks until there is room in the queue. +*/ +void POOL_add(void *ctx, POOL_function function, void *opaque); + +#endif From 9c499648e3097fb8e92e74a242fc83d6e6d09776 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 29 Dec 2016 23:41:03 -0800 Subject: [PATCH 07/73] Add thread pool tests --- .travis.yml | 2 +- tests/.gitignore | 1 + tests/Makefile | 8 +++++- tests/pool.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 tests/pool.c diff --git a/.travis.yml b/.travis.yml index 6bf99f1b..36537cbe 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ matrix: os: linux sudo: false - - env: Ubu=12.04cont Cmd="make zlibwrapper && make clean && make -C tests test-symbols && make clean && make -C tests test-zstd-nolegacy && make clean && make cmaketest && make clean && make -C contrib/pzstd googletest pzstd tests check && make -C contrib/pzstd clean" + - env: Ubu=12.04cont Cmd="make zlibwrapper && make clean && make -C tests test-pool && make -C tests test-symbols && make clean && make -C tests test-zstd-nolegacy && make clean && make cmaketest && make clean && make -C contrib/pzstd googletest pzstd tests check && make -C contrib/pzstd clean" os: linux sudo: false language: cpp diff --git a/tests/.gitignore b/tests/.gitignore index e932ad91..5041404d 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -15,6 +15,7 @@ paramgrill32 roundTripCrash longmatch symbols +pool invalidDictionaries # Tmp test directory diff --git a/tests/Makefile b/tests/Makefile index c080fe34..739944de 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -158,6 +158,9 @@ else $(CC) $(FLAGS) $^ -o $@$(EXT) -Wl,-rpath=$(ZSTDDIR) $(ZSTDDIR)/libzstd.so endif +pool : pool.c $(ZSTDDIR)/common/pool.c + $(CC) $(FLAGS) -pthread -DZSTD_PTHREAD $^ -o $@$(EXT) + namespaceTest: if $(CC) namespaceTest.c ../lib/common/xxhash.c -o $@ ; then echo compilation should fail; exit 1 ; fi $(RM) $@ @@ -176,7 +179,7 @@ clean: fuzzer-dll$(EXT) zstreamtest-dll$(EXT) zbufftest-dll$(EXT)\ zstreamtest$(EXT) zstreamtest32$(EXT) \ datagen$(EXT) paramgrill$(EXT) roundTripCrash$(EXT) longmatch$(EXT) \ - symbols$(EXT) invalidDictionaries$(EXT) + symbols$(EXT) invalidDictionaries$(EXT) pool$(EXT) @echo Cleaning completed @@ -288,4 +291,7 @@ test-invalidDictionaries: invalidDictionaries test-symbols: symbols $(QEMU_SYS) ./symbols +test-pool: pool + $(QEMU_SYS) ./pool + endif diff --git a/tests/pool.c b/tests/pool.c new file mode 100644 index 00000000..ce38075d --- /dev/null +++ b/tests/pool.c @@ -0,0 +1,70 @@ +#include "pool.h" +#include +#include +#include + +#define ASSERT_TRUE(p) \ + do { \ + if (!(p)) { \ + return 1; \ + } \ + } while (0) +#define ASSERT_FALSE(p) ASSERT_TRUE(!(p)) +#define ASSERT_EQ(lhs, rhs) ASSERT_TRUE((lhs) == (rhs)) + +struct data { + pthread_mutex_t mutex; + unsigned data[1024]; + size_t i; +}; + +void fn(void *opaque) { + struct data *data = (struct data *)opaque; + pthread_mutex_lock(&data->mutex); + data->data[data->i] = data->i; + ++data->i; + pthread_mutex_unlock(&data->mutex); +} + +int testOrder(size_t numThreads, size_t queueLog) { + struct data data; + POOL_ctx *ctx = POOL_create(numThreads, queueLog); + ASSERT_TRUE(ctx); + data.i = 0; + ASSERT_FALSE(pthread_mutex_init(&data.mutex, NULL)); + { + size_t i; + for (i = 0; i < 1024; ++i) { + POOL_add(ctx, &fn, &data); + } + } + POOL_free(ctx); + ASSERT_EQ(1024, data.i); + { + size_t i; + for (i = 0; i < data.i; ++i) { + ASSERT_EQ(i, data.data[i]); + } + } + ASSERT_FALSE(pthread_mutex_destroy(&data.mutex)); + return 0; +} + +int main(int argc, const char **argv) { + size_t numThreads; + for (numThreads = 1; numThreads <= 8; ++numThreads) { + size_t queueLog; + for (queueLog = 1; queueLog <= 8; ++queueLog) { + if (testOrder(numThreads, queueLog)) { + printf("FAIL: testOrder\n"); + return 1; + } + } + } + printf("PASS: testOrder\n"); + (POOL_create(0, 1) || POOL_create(1, 0)) ? printf("FAIL: testInvalid\n") + : printf("PASS: testInvalid\n"); + (void)argc; + (void)argv; + return 0; +} From c6a6417458751512380f4b284e15f2930dc8026d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 31 Dec 2016 03:31:26 +0100 Subject: [PATCH 08/73] bench correctly measures time for multi-threaded compression (posix only) --- lib/common/pool.c | 14 +++++--------- lib/compress/zstdmt_compress.c | 6 +++--- programs/bench.c | 31 ++++++++++++++++++++++++------- 3 files changed, 32 insertions(+), 19 deletions(-) diff --git a/lib/common/pool.c b/lib/common/pool.c index bea48f31..e3888194 100644 --- a/lib/common/pool.c +++ b/lib/common/pool.c @@ -60,9 +60,8 @@ static void *POOL_thread(void *opaque) { if (pthread_mutex_unlock(&ctx->queueMutex)) { return NULL; } return opaque; } - { - /* Pop a job off the queue */ - POOL_job job = ctx->queue[ctx->queueHead]; + /* Pop a job off the queue */ + { POOL_job job = ctx->queue[ctx->queueHead]; ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; /* Unlock the mutex, signal a pusher, and run the job */ if (pthread_mutex_unlock(&ctx->queueMutex)) { return NULL; } @@ -105,8 +104,7 @@ POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) { ctx->numThreads = i; POOL_free(ctx); return NULL; - } - } + } } ctx->numThreads = numThreads; } return ctx; @@ -127,8 +125,7 @@ static void POOL_join(POOL_ctx *ctx) { { size_t i; for (i = 0; i < ctx->numThreads; ++i) { pthread_join(ctx->threads[i], NULL); - } - } + } } } void POOL_free(POOL_ctx *ctx) { @@ -147,8 +144,7 @@ void POOL_add(void *ctxVoid, POOL_function function, void *opaque) { if (!ctx) { return; } pthread_mutex_lock(&ctx->queueMutex); - { - POOL_job job = {function, opaque}; + { POOL_job const job = {function, opaque}; /* Wait until there is space in the queue for the new job */ size_t newTail = (ctx->queueTail + 1) % ctx->queueSize; while (ctx->queueHead == newTail && !ctx->shutdown) { diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index c86be870..a6a49728 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -322,16 +322,16 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* cctx) return 0; } -size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx, +size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) { - ZSTDMT_jobAgency* jobAgency = &cctx->jobAgency; + ZSTDMT_jobAgency* jobAgency = &mtctx->jobAgency; ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize, 0); size_t const frameSizeTarget = (size_t)1 << (params.cParams.windowLog + 2); unsigned const nbFramesMax = (unsigned)(srcSize / frameSizeTarget) + (srcSize < frameSizeTarget) /* min 1 */; - unsigned const nbFrames = MIN(nbFramesMax, cctx->nbThreads); + unsigned const nbFrames = MIN(nbFramesMax, mtctx->nbThreads); size_t const avgFrameSize = (srcSize + (nbFrames-1)) / nbFrames; size_t remainingSrcSize = srcSize; const char* const srcStart = (const char*)src; diff --git a/programs/bench.c b/programs/bench.c index b5cc77ee..c718e219 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -24,7 +24,7 @@ #include "util.h" /* UTIL_getFileSize, UTIL_sleep */ #include /* malloc, free */ #include /* memset */ -#include /* fprintf, fopen, ftello64 */ +#include /* fprintf, fopen */ #include /* clock_t, clock, CLOCKS_PER_SEC */ #include "mem.h" @@ -88,6 +88,23 @@ static clock_t g_time = 0; exit(error); \ } +/* ************************************* +* Time +***************************************/ +/* for posix only - proper detection macros to setup */ +#include +#include + +typedef unsigned long long clock_us_t; +static clock_us_t BMK_clockMicroSec() +{ + static clock_t _ticksPerSecond = 0; + if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK); + + struct tms junk; clock_t newTicks = (clock_t) times(&junk); (void)junk; + return ((((clock_us_t)newTicks)*(1000000))/_ticksPerSecond); +} + /* ************************************* * Benchmark Parameters @@ -231,7 +248,6 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, UTIL_getTime(&coolTime); DISPLAYLEVEL(2, "\r%79s\r", ""); while (!cCompleted || !dCompleted) { - UTIL_time_t clockStart; /* overheat protection */ if (UTIL_clockSpanMicro(coolTime, ticksPerSecond) > ACTIVEPERIOD_MICROSEC) { @@ -241,13 +257,14 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, } if (!g_decodeOnly) { + clock_us_t clockStart; /* Compression */ DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize); /* warm up and erase result buffer */ UTIL_sleepMilli(1); /* give processor time to other processes */ UTIL_waitForNextTick(ticksPerSecond); - UTIL_getTime(&clockStart); + clockStart = BMK_clockMicroSec(); if (!cCompleted) { /* still some time to do compression tests */ ZSTD_parameters zparams = ZSTD_getParams(cLevel, avgSize, dictBufferSize); @@ -286,11 +303,11 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, blockTable[blockNb].cSize = rSize; } nbLoops++; - } while (UTIL_clockSpanMicro(clockStart, ticksPerSecond) < clockLoop); + } while (BMK_clockMicroSec() - clockStart < clockLoop); ZSTD_freeCDict(cdict); - { U64 const clockSpan = UTIL_clockSpanMicro(clockStart, ticksPerSecond); - if (clockSpan < fastestC*nbLoops) fastestC = clockSpan / nbLoops; - totalCTime += clockSpan; + { clock_us_t const clockSpanMicro = BMK_clockMicroSec() - clockStart; + if (clockSpanMicro < fastestC*nbLoops) fastestC = clockSpanMicro / nbLoops; + totalCTime += clockSpanMicro; cCompleted = (totalCTime >= maxTime); } } From 3b29dbd9e885c84842d58bcf2b9ca2843e8e483c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 31 Dec 2016 06:04:25 +0100 Subject: [PATCH 09/73] new zstdmt version using generic treadpool --- lib/common/pool.c | 6 +- lib/compress/zstdmt_compress.c | 361 ++++++++++++--------------------- 2 files changed, 137 insertions(+), 230 deletions(-) diff --git a/lib/common/pool.c b/lib/common/pool.c index e3888194..4ec1dfff 100644 --- a/lib/common/pool.c +++ b/lib/common/pool.c @@ -46,8 +46,8 @@ struct POOL_ctx_s { Waits for jobs and executes them. @returns : NULL on failure else non-null. */ -static void *POOL_thread(void *opaque) { - POOL_ctx *ctx = (POOL_ctx *)opaque; +static void* POOL_thread(void* opaque) { + POOL_ctx* const ctx = (POOL_ctx*)opaque; if (!ctx) { return NULL; } for (;;) { /* Lock the mutex and wait for a non-empty queue or until shutdown */ @@ -61,7 +61,7 @@ static void *POOL_thread(void *opaque) { return opaque; } /* Pop a job off the queue */ - { POOL_job job = ctx->queue[ctx->queueHead]; + { POOL_job const job = ctx->queue[ctx->queueHead]; ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; /* Unlock the mutex, signal a pusher, and run the job */ if (pthread_mutex_unlock(&ctx->queueMutex)) { return NULL; } diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index a6a49728..1b925914 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -1,5 +1,6 @@ #include /* malloc */ -#include /* posix only, to be replaced by a more portable version */ +#include /* threadpool */ +#include /* mutex */ #include "zstd_internal.h" /* MIN, ERROR */ #include "zstdmt_compress.h" @@ -43,176 +44,11 @@ if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \ #define ZSTDMT_NBTHREADS_MAX 128 #define ZSTDMT_NBSTACKEDFRAMES_MAX (2*ZSTDMT_NBTHREADS_MAX) -typedef struct frameToWrite_s { - const void* start; - size_t frameSize; - unsigned frameID; - unsigned isLastFrame; -} frameToWrite_t; - -typedef struct ZSTDMT_dstBuffer_s { - ZSTD_outBuffer out; - unsigned frameIDToWrite; - pthread_mutex_t frameTable_mutex; - pthread_mutex_t allFramesWritten_mutex; - frameToWrite_t stackedFrame[ZSTDMT_NBSTACKEDFRAMES_MAX]; - unsigned nbStackedFrames; -} ZSTDMT_dstBufferManager; - -static ZSTDMT_dstBufferManager ZSTDMT_createDstBufferManager(void* dst, size_t dstCapacity) -{ - ZSTDMT_dstBufferManager dbm; - dbm.out.dst = dst; - dbm.out.size = dstCapacity; - dbm.out.pos = 0; - dbm.frameIDToWrite = 0; - pthread_mutex_init(&dbm.frameTable_mutex, NULL); - pthread_mutex_t* const allFramesWritten_mutex = &dbm.allFramesWritten_mutex; - pthread_mutex_init(allFramesWritten_mutex, NULL); - PTHREAD_MUTEX_LOCK(allFramesWritten_mutex); /* maybe could be merged into init ? */ - dbm.nbStackedFrames = 0; - return dbm; -} - -/* note : can fail if nbStackedFrames > ZSTDMT_NBSTACKEDFRAMES_MAX. - * note2 : can only be called from a section with frameTable_mutex already locked */ -static void ZSTDMT_stackFrameToWrite(ZSTDMT_dstBufferManager* dstBufferManager, frameToWrite_t frame) { - dstBufferManager->stackedFrame[dstBufferManager->nbStackedFrames++] = frame; -} - - typedef struct buffer_s { void* start; - size_t bufferSize; + size_t size; } buffer_t; -static buffer_t ZSTDMT_getDstBuffer(const ZSTDMT_dstBufferManager* dstBufferManager) -{ - ZSTD_outBuffer const out = dstBufferManager->out; - buffer_t buf; - buf.start = (char*)(out.dst) + out.pos; - buf.bufferSize = out.size - out.pos; - return buf; -} - -/* condition : stackNumber < dstBufferManager->nbStackedFrames. - * note : there can only be one write at a time, due to frameID condition */ -static size_t ZSTDMT_writeFrame(ZSTDMT_dstBufferManager* dstBufferManager, unsigned stackNumber) -{ - ZSTD_outBuffer const out = dstBufferManager->out; - size_t const frameSize = dstBufferManager->stackedFrame[stackNumber].frameSize; - const void* const frameStart = dstBufferManager->stackedFrame[stackNumber].start; - if (out.pos + frameSize > out.size) - return ERROR(dstSize_tooSmall); - DEBUGLOG(3, "writing frame %u (%u bytes) ", dstBufferManager->stackedFrame[stackNumber].frameID, (U32)frameSize); - memcpy((char*)out.dst + out.pos, frameStart, frameSize); - dstBufferManager->out.pos += frameSize; - dstBufferManager->frameIDToWrite = dstBufferManager->stackedFrame[stackNumber].frameID + 1; - return 0; -} - -static size_t ZSTDMT_tryWriteFrame(ZSTDMT_dstBufferManager* dstBufferManager, - const void* src, size_t srcSize, - unsigned frameID, unsigned isLastFrame) -{ - unsigned lastFrameWritten = 0; - - /* check if correct frame ordering; stack otherwise */ - DEBUGLOG(5, "considering writing frame %u ", frameID); - PTHREAD_MUTEX_LOCK(&dstBufferManager->frameTable_mutex); - if (frameID != dstBufferManager->frameIDToWrite) { - DEBUGLOG(4, "writing frameID %u : not possible, waiting for %u ", frameID, dstBufferManager->frameIDToWrite); - frameToWrite_t const frame = { src, srcSize, frameID, isLastFrame }; - ZSTDMT_stackFrameToWrite(dstBufferManager, frame); - pthread_mutex_unlock(&dstBufferManager->frameTable_mutex); - return 0; - } - pthread_mutex_unlock(&dstBufferManager->frameTable_mutex); - - /* write frame - * note : only one write possible due to frameID condition */ - DEBUGLOG(3, "writing frame %u (%u bytes) ", frameID, (U32)srcSize); - ZSTD_outBuffer const out = dstBufferManager->out; - if (out.pos + srcSize > out.size) - return ERROR(dstSize_tooSmall); - if (frameID) /* frameID==0 compress directly in dst buffer */ - memcpy((char*)out.dst + out.pos, src, srcSize); - dstBufferManager->out.pos += srcSize; - dstBufferManager->frameIDToWrite = frameID+1; - lastFrameWritten = isLastFrame; - - /* check if more frames are stacked */ - PTHREAD_MUTEX_LOCK(&dstBufferManager->frameTable_mutex); - unsigned frameWritten = dstBufferManager->nbStackedFrames>0; - while (frameWritten) { - unsigned u; - frameID++; - frameWritten = 0; - for (u=0; unbStackedFrames; u++) { - if (dstBufferManager->stackedFrame[u].frameID == frameID) { - pthread_mutex_unlock(&dstBufferManager->frameTable_mutex); - DEBUGLOG(4, "catch up frame %u ", frameID); - { size_t const writeError = ZSTDMT_writeFrame(dstBufferManager, u); - if (ZSTD_isError(writeError)) return writeError; } - lastFrameWritten = dstBufferManager->stackedFrame[u].isLastFrame; - dstBufferManager->frameIDToWrite = frameID+1; - /* remove frame from stack */ - PTHREAD_MUTEX_LOCK(&dstBufferManager->frameTable_mutex); - dstBufferManager->stackedFrame[u] = dstBufferManager->stackedFrame[dstBufferManager->nbStackedFrames-1]; - dstBufferManager->nbStackedFrames -= 1; - frameWritten = dstBufferManager->nbStackedFrames>0; - break; - } } } - pthread_mutex_unlock(&dstBufferManager->frameTable_mutex); - - /* end reached : last frame written */ - if (lastFrameWritten) pthread_mutex_unlock(&dstBufferManager->allFramesWritten_mutex); - return 0; -} - - - -typedef struct ZSTDMT_jobDescription_s { - const void* src; /* NULL means : kill thread */ - size_t srcSize; - int compressionLevel; - ZSTDMT_dstBufferManager* dstManager; - unsigned frameNumber; - unsigned isLastFrame; -} ZSTDMT_jobDescription; - -typedef struct ZSTDMT_jobAgency_s { - pthread_mutex_t jobAnnounce_mutex; - pthread_mutex_t jobApply_mutex; - ZSTDMT_jobDescription jobAnnounce; -} ZSTDMT_jobAgency; - -/* ZSTDMT_postjob() : - * This function is blocking as long as previous posted job is not taken. - * It could be made non-blocking, with a storage queue. - * But blocking has benefits : on top of memory savings, - * the caller will be able to measure delay, allowing dynamic speed throttle (via compression level). - */ -static void ZSTDMT_postjob(ZSTDMT_jobAgency* jobAgency, ZSTDMT_jobDescription job) -{ - DEBUGLOG(5, "starting job posting "); - PTHREAD_MUTEX_LOCK(&jobAgency->jobApply_mutex); /* wait for a thread to take previous job */ - DEBUGLOG(5, "job posting mutex acquired "); - jobAgency->jobAnnounce = job; /* post job */ - pthread_mutex_unlock(&jobAgency->jobAnnounce_mutex); /* announce */ - DEBUGLOG(5, "job available now "); -} - -static ZSTDMT_jobDescription ZSTDMT_getjob(ZSTDMT_jobAgency* jobAgency) -{ - PTHREAD_MUTEX_LOCK(&jobAgency->jobAnnounce_mutex); /* should check return code */ - ZSTDMT_jobDescription const job = jobAgency->jobAnnounce; - pthread_mutex_unlock(&jobAgency->jobApply_mutex); - return job; -} - - - #define ZSTDMT_NBBUFFERSPOOLED_MAX ZSTDMT_NBTHREADS_MAX typedef struct ZSTDMT_bufferPool_s { pthread_mutex_t bufferPool_mutex; @@ -227,7 +63,7 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) pool->nbBuffers--; buffer_t const buf = pool->bTable[pool->nbBuffers]; pthread_mutex_unlock(&pool->bufferPool_mutex); - size_t const availBufferSize = buf.bufferSize; + size_t const availBufferSize = buf.size; if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) /* large enough, but not too much */ return buf; free(buf.start); /* size conditions not respected : create a new buffer */ @@ -235,7 +71,7 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) pthread_mutex_unlock(&pool->bufferPool_mutex); /* create new buffer */ buffer_t buf; - buf.bufferSize = bSize; + buf.size = bSize; buf.start = calloc(1, bSize); return buf; } @@ -255,79 +91,119 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf) -struct ZSTDMT_CCtx_s { - pthread_t pthread[ZSTDMT_NBTHREADS_MAX]; - unsigned nbThreads; - ZSTDMT_jobAgency jobAgency; - ZSTDMT_bufferPool bufferPool; -}; +typedef struct { + ZSTD_CCtx* cctx; + const void* srcStart; + size_t srcSize; + buffer_t dstBuff; + int compressionLevel; + unsigned frameID; + size_t cSize; + unsigned jobCompleted; + pthread_mutex_t* jobCompleted_mutex; +} ZSTDMT_jobDescription; -static void* ZSTDMT_compressionThread(void* arg) +/* ZSTDMT_compressFrame() : POOL_function type */ +void ZSTDMT_compressFrame(void* jobDescription) { - if (arg==NULL) return NULL; /* error : should not be possible */ - ZSTDMT_CCtx* const mtctx = (ZSTDMT_CCtx*) arg; - ZSTDMT_jobAgency* const jobAgency = &mtctx->jobAgency; - ZSTDMT_bufferPool* const pool = &mtctx->bufferPool; - ZSTD_CCtx* const cctx = ZSTD_createCCtx(); - if (cctx==NULL) return NULL; /* allocation failure : thread not started */ - DEBUGLOG(3, "thread %li created ", (long int)pthread_self()); - for (;;) { - ZSTDMT_jobDescription const job = ZSTDMT_getjob(jobAgency); - if (job.src == NULL) { - DEBUGLOG(4, "thread exit "); - ZSTD_freeCCtx(cctx); - return NULL; - } - ZSTDMT_dstBufferManager* dstBufferManager = job.dstManager; - size_t const dstBufferCapacity = ZSTD_compressBound(job.srcSize); - DEBUGLOG(4, "requesting a dstBuffer for frame %u", job.frameNumber); - buffer_t const dstBuffer = job.frameNumber ? ZSTDMT_getBuffer(pool, dstBufferCapacity) : ZSTDMT_getDstBuffer(dstBufferManager); /* lack params */ - DEBUGLOG(4, "start compressing frame %u", job.frameNumber); - //size_t const cSize = ZSTD_compress(dstBuffer.start, dstBuffer.bufferSize, job.src, job.srcSize, job.compressionLevel); - size_t const cSize = ZSTD_compressCCtx(cctx, dstBuffer.start, dstBuffer.bufferSize, job.src, job.srcSize, job.compressionLevel); - if (ZSTD_isError(cSize)) return (void*)(cSize); /* error - find a better way */ - size_t const writeError = ZSTDMT_tryWriteFrame(dstBufferManager, dstBuffer.start, cSize, job.frameNumber, job.isLastFrame); /* pas clair */ - if (ZSTD_isError(writeError)) return (void*)writeError; - if (job.frameNumber) ZSTDMT_releaseBuffer(pool, dstBuffer); - } + DEBUGLOG(5, "Entering ZSTDMT_compressFrame() "); + ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; + DEBUGLOG(5, "compressing %u bytes with ZSTD_compressCCtx : ", (unsigned)job->srcSize); + job->cSize = ZSTD_compressCCtx(job->cctx, job->dstBuff.start, job->dstBuff.size, job->srcStart, job->srcSize, job->compressionLevel); + DEBUGLOG(5, "compressed to %u bytes ", (unsigned)job->cSize); + job->jobCompleted = 1; + DEBUGLOG(5, "unlocking mutex jobCompleted_mutex"); + pthread_mutex_unlock(job->jobCompleted_mutex); + DEBUGLOG(5, "ZSTDMT_compressFrame completed"); } + +/* note : calls to CCtxPool only from main thread */ + +typedef struct { + unsigned totalCCtx; + unsigned availCCtx; + ZSTD_CCtx* cctx[1]; /* variable size */ +} ZSTDMT_CCtxPool; + +static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads) +{ + ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + nbThreads*sizeof(ZSTD_CCtx*)); + if (!cctxPool) return NULL; + { unsigned u; + for (u=0; ucctx[u] = ZSTD_createCCtx(); /* check for NULL result ! */ + } + cctxPool->totalCCtx = cctxPool->availCCtx = nbThreads; + return cctxPool; +} + +static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool) +{ + if (pool->availCCtx) { + pool->availCCtx--; + return pool->cctx[pool->availCCtx]; + } + /* should not be possible, since totalCCtx==nbThreads */ + return ZSTD_createCCtx(); +} + +static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) +{ + if (pool->availCCtx < pool->totalCCtx) + pool->cctx[pool->availCCtx++] = cctx; + else + /* should not be possible, since totalCCtx==nbThreads */ + ZSTD_freeCCtx(cctx); +} + +static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) +{ + unsigned u; + for (u=0; utotalCCtx; u++) + ZSTD_freeCCtx(pool->cctx[u]); + free(pool); +} + + +struct ZSTDMT_CCtx_s { + POOL_ctx* factory; + ZSTDMT_bufferPool buffPool; + ZSTDMT_CCtxPool* cctxPool; + unsigned nbThreads; + pthread_mutex_t jobCompleted_mutex; + ZSTDMT_jobDescription jobs[1]; /* variable size */ +}; + ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) { if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL; - ZSTDMT_CCtx* const cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx)); + ZSTDMT_CCtx* const cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx) + nbThreads*sizeof(ZSTDMT_jobDescription)); if (!cctx) return NULL; - /* init jobAgency */ - pthread_mutex_init(&cctx->jobAgency.jobAnnounce_mutex, NULL); /* check return value ? */ - pthread_mutex_init(&cctx->jobAgency.jobApply_mutex, NULL); - PTHREAD_MUTEX_LOCK(&cctx->jobAgency.jobAnnounce_mutex); /* no job at beginning */ - /* init bufferPool */ - pthread_mutex_init(&cctx->bufferPool.bufferPool_mutex, NULL); - /* start all workers */ cctx->nbThreads = nbThreads; - DEBUGLOG(2, "nbThreads : %u \n", nbThreads); - unsigned t; - for (t = 0; t < nbThreads; t++) { - pthread_create(&cctx->pthread[t], NULL, ZSTDMT_compressionThread, cctx); /* check return value ? */ - } + cctx->factory = POOL_create(nbThreads, 1); + pthread_mutex_init(&cctx->buffPool.bufferPool_mutex, NULL); + cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads); + pthread_mutex_init(&cctx->jobCompleted_mutex, NULL); return cctx; } -size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* cctx) +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) /* incompleted ! */ { - /* free threads */ - /* free mutex (if necessary) */ + POOL_free(mtctx->factory); + /* free mutexes (if necessary) */ /* free bufferPool */ - free(cctx); /* incompleted ! */ + ZSTDMT_freeCCtxPool(mtctx->cctxPool); + free(mtctx); return 0; } + size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) { - ZSTDMT_jobAgency* jobAgency = &mtctx->jobAgency; ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize, 0); size_t const frameSizeTarget = (size_t)1 << (params.cParams.windowLog + 2); unsigned const nbFramesMax = (unsigned)(srcSize / frameSizeTarget) + (srcSize < frameSizeTarget) /* min 1 */; @@ -336,7 +212,7 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, size_t remainingSrcSize = srcSize; const char* const srcStart = (const char*)src; size_t frameStartPos = 0; - ZSTDMT_dstBufferManager dbm = ZSTDMT_createDstBufferManager(dst, dstCapacity); + DEBUGLOG(2, "windowLog : %u => frameSizeTarget : %u ", params.cParams.windowLog, (U32)frameSizeTarget); DEBUGLOG(2, "nbFrames : %u (size : %u bytes) ", nbFrames, (U32)avgFrameSize); @@ -344,15 +220,46 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, { unsigned u; for (u=0; ubuffPool, dstBufferCapacity) : (buffer_t){ dst, dstCapacity }; + ZSTD_CCtx* cctx = ZSTDMT_getCCtx(mtctx->cctxPool); + + mtctx->jobs[u].srcStart = srcStart + frameStartPos; + mtctx->jobs[u].srcSize = frameSize; + mtctx->jobs[u].compressionLevel = compressionLevel; + mtctx->jobs[u].dstBuff = dstBuffer; + mtctx->jobs[u].cctx = cctx; + mtctx->jobs[u].frameID = u; + mtctx->jobs[u].jobCompleted = 0; + mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex; + DEBUGLOG(3, "posting job %u (%u bytes)", u, (U32)frameSize); - ZSTDMT_jobDescription const job = { srcStart+frameStartPos, frameSize, compressionLevel, - &dbm, u, u==(nbFrames-1) }; - ZSTDMT_postjob(jobAgency, job); + POOL_add(mtctx->factory, ZSTDMT_compressFrame, &mtctx->jobs[u]); + frameStartPos += frameSize; remainingSrcSize -= frameSize; } } + /* note : since nbFrames <= nbThreads, all jobs should be running immediately in parallel */ + + { unsigned frameID; + size_t dstPos = 0; + for (frameID=0; frameIDjobs[frameID].jobCompleted==0) { + DEBUGLOG(4, "waiting for signal jobCompleted_mutex") + pthread_mutex_lock(&mtctx->jobCompleted_mutex); + } + { size_t const cSize = mtctx->jobs[frameID].cSize; + if (ZSTD_isError(cSize)) return cSize; + if (dstPos + cSize > dstCapacity) return ERROR(dstSize_tooSmall); + if (frameID) memcpy((char*)dst + dstPos, mtctx->jobs[frameID].dstBuff.start, mtctx->jobs[frameID].cSize); + dstPos += cSize ; + } + ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[frameID].cctx); + ZSTDMT_releaseBuffer(&mtctx->buffPool, mtctx->jobs[frameID].dstBuff); + } + DEBUGLOG(4, "compressed size : %u ", (U32)dstPos); + return dstPos; + } - PTHREAD_MUTEX_LOCK(&dbm.allFramesWritten_mutex); - DEBUGLOG(4, "compressed size : %u ", (U32)dbm.out.pos); - return dbm.out.pos; } From c8efc1c8749a44af022f619b1a5f5672f66af24f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 31 Dec 2016 14:45:33 +0100 Subject: [PATCH 10/73] simplified Buffer Pool --- lib/compress/zstdmt_compress.c | 64 ++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 1b925914..97de6e64 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -42,51 +42,65 @@ if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \ #define ZSTDMT_NBTHREADS_MAX 128 -#define ZSTDMT_NBSTACKEDFRAMES_MAX (2*ZSTDMT_NBTHREADS_MAX) + +/* === Buffer Pool === */ typedef struct buffer_s { void* start; size_t size; } buffer_t; -#define ZSTDMT_NBBUFFERSPOOLED_MAX ZSTDMT_NBTHREADS_MAX typedef struct ZSTDMT_bufferPool_s { - pthread_mutex_t bufferPool_mutex; - buffer_t bTable[ZSTDMT_NBBUFFERSPOOLED_MAX]; + unsigned totalBuffers;; unsigned nbBuffers; + buffer_t bTable[1]; /* variable size */ } ZSTDMT_bufferPool; +static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads) +{ + unsigned const maxNbBuffers = 2*nbThreads + 2; + ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)calloc(1, sizeof(ZSTDMT_bufferPool) + maxNbBuffers * sizeof(buffer_t)); + if (bufPool==NULL) return NULL; + bufPool->totalBuffers = maxNbBuffers; + return bufPool; +} + +static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool) +{ + unsigned u; + if (!bufPool) return; /* compatibility with free on NULL */ + for (u=0; utotalBuffers; u++) + free(bufPool->bTable[u].start); + free(bufPool); +} + +/* note : invocation only from main thread ! */ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) { - PTHREAD_MUTEX_LOCK(&pool->bufferPool_mutex); if (pool->nbBuffers) { /* try to use an existing buffer */ pool->nbBuffers--; buffer_t const buf = pool->bTable[pool->nbBuffers]; - pthread_mutex_unlock(&pool->bufferPool_mutex); size_t const availBufferSize = buf.size; if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) /* large enough, but not too much */ return buf; free(buf.start); /* size conditions not respected : create a new buffer */ } - pthread_mutex_unlock(&pool->bufferPool_mutex); /* create new buffer */ buffer_t buf; buf.size = bSize; - buf.start = calloc(1, bSize); + buf.start = malloc(bSize); return buf; } /* effectively store buffer for later re-use, up to pool capacity */ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf) { - PTHREAD_MUTEX_LOCK(&pool->bufferPool_mutex); - if (pool->nbBuffers >= ZSTDMT_NBBUFFERSPOOLED_MAX) { - pthread_mutex_unlock(&pool->bufferPool_mutex); - free(buf.start); + if (pool->nbBuffers < pool->totalBuffers) { + pool->bTable[pool->nbBuffers++] = buf; /* store for later re-use */ return; } - pool->bTable[pool->nbBuffers++] = buf; /* store for later re-use */ - pthread_mutex_unlock(&pool->bufferPool_mutex); + /* Reached bufferPool capacity (should not happen) */ + free(buf.start); } @@ -118,7 +132,7 @@ void ZSTDMT_compressFrame(void* jobDescription) } -/* note : calls to CCtxPool only from main thread */ +/* === CCtx Pool === */ typedef struct { unsigned totalCCtx; @@ -126,6 +140,8 @@ typedef struct { ZSTD_CCtx* cctx[1]; /* variable size */ } ZSTDMT_CCtxPool; +/* note : CCtxPool invocation only from main thread */ + static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads) { ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + nbThreads*sizeof(ZSTD_CCtx*)); @@ -168,7 +184,7 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) struct ZSTDMT_CCtx_s { POOL_ctx* factory; - ZSTDMT_bufferPool buffPool; + ZSTDMT_bufferPool* buffPool; ZSTDMT_CCtxPool* cctxPool; unsigned nbThreads; pthread_mutex_t jobCompleted_mutex; @@ -182,7 +198,7 @@ ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) if (!cctx) return NULL; cctx->nbThreads = nbThreads; cctx->factory = POOL_create(nbThreads, 1); - pthread_mutex_init(&cctx->buffPool.bufferPool_mutex, NULL); + cctx->buffPool = ZSTDMT_createBufferPool(nbThreads); cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads); pthread_mutex_init(&cctx->jobCompleted_mutex, NULL); return cctx; @@ -191,9 +207,9 @@ ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) /* incompleted ! */ { POOL_free(mtctx->factory); - /* free mutexes (if necessary) */ - /* free bufferPool */ + ZSTDMT_freeBufferPool(mtctx->buffPool); ZSTDMT_freeCCtxPool(mtctx->cctxPool); + pthread_mutex_destroy(&mtctx->jobCompleted_mutex); free(mtctx); return 0; } @@ -221,7 +237,7 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, for (u=0; ubuffPool, dstBufferCapacity) : (buffer_t){ dst, dstCapacity }; + buffer_t const dstBuffer = u ? ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity) : (buffer_t){ dst, dstCapacity }; ZSTD_CCtx* cctx = ZSTDMT_getCCtx(mtctx->cctxPool); mtctx->jobs[u].srcStart = srcStart + frameStartPos; @@ -252,13 +268,15 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, { size_t const cSize = mtctx->jobs[frameID].cSize; if (ZSTD_isError(cSize)) return cSize; if (dstPos + cSize > dstCapacity) return ERROR(dstSize_tooSmall); - if (frameID) memcpy((char*)dst + dstPos, mtctx->jobs[frameID].dstBuff.start, mtctx->jobs[frameID].cSize); + if (frameID) { + memcpy((char*)dst + dstPos, mtctx->jobs[frameID].dstBuff.start, cSize); + ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[frameID].dstBuff); + } dstPos += cSize ; } ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[frameID].cctx); - ZSTDMT_releaseBuffer(&mtctx->buffPool, mtctx->jobs[frameID].dstBuff); } - DEBUGLOG(4, "compressed size : %u ", (U32)dstPos); + DEBUGLOG(3, "compressed size : %u ", (U32)dstPos); return dstPos; } From 3b9d4343564233db19992950bbdb4a301d97f8f4 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 31 Dec 2016 16:32:19 +0100 Subject: [PATCH 11/73] extended ZSTDMT code support for non-MT systems and WIN32 (preliminary) --- lib/common/pool.c | 7 +-- lib/common/threading.c | 73 +++++++++++++++++++++++++++++++ lib/common/threading.h | 79 ++++++++++++++++++++++++++++++++++ lib/compress/zstdmt_compress.c | 2 +- 4 files changed, 157 insertions(+), 4 deletions(-) create mode 100644 lib/common/threading.c create mode 100644 lib/common/threading.h diff --git a/lib/common/pool.c b/lib/common/pool.c index 4ec1dfff..97ca7dda 100644 --- a/lib/common/pool.c +++ b/lib/common/pool.c @@ -13,7 +13,7 @@ #ifdef ZSTD_PTHREAD -#include +#include /* A job is a function and an opaque argument */ typedef struct POOL_job_s { @@ -161,7 +161,8 @@ void POOL_add(void *ctxVoid, POOL_function function, void *opaque) { pthread_cond_signal(&ctx->queuePopCond); } -#else +#else /* ZSTD_PTHREAD not defined */ +/* No multi-threading support */ /* We don't need any data, but if it is empty malloc() might return NULL. */ struct POOL_ctx_s { @@ -183,4 +184,4 @@ void POOL_add(void *ctx, POOL_function function, void *opaque) { function(opaque); } -#endif +#endif /* ZSTD_PTHREAD */ diff --git a/lib/common/threading.c b/lib/common/threading.c new file mode 100644 index 00000000..1725650c --- /dev/null +++ b/lib/common/threading.c @@ -0,0 +1,73 @@ + +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + */ + +/** + * This file will hold wrapper for systems, which do not support Pthreads + */ + +#ifdef _WIN32 + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ + + +/* === Dependencies === */ +#include +#include +#include "threading.h" + + +/* === Implementation === */ + +static unsigned __stdcall worker(void *arg) +{ + pthread_t* const thread = (pthread_t*) arg; + thread->arg = thread->start_routine(thread->arg); + return 0; +} + +int pthread_create(pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg) +{ + (void)unused; + thread->arg = arg; + thread->start_routine = start_routine; + thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL); + + if (!thread->handle) + return errno; + else + return 0; +} + +int _pthread_join(pthread_t * thread, void **value_ptr) +{ + DWORD result; + + if (!thread->handle) return 0; + + result = WaitForSingleObject(thread->handle, INFINITE); + switch (result) { + case WAIT_OBJECT_0: + if (value_ptr) *value_ptr = thread->arg; + return 0; + case WAIT_ABANDONED: + return EINVAL; + default: + return GetLastError(); + } +} + +#endif diff --git a/lib/common/threading.h b/lib/common/threading.h new file mode 100644 index 00000000..a8126eb7 --- /dev/null +++ b/lib/common/threading.h @@ -0,0 +1,79 @@ + +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + */ + +#ifndef THREADING_H_938743 +#define THREADING_H_938743 + +#if defined (__cplusplus) +extern "C" { +#endif + +#if defined(ZSTD_PTHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +#include + +/* mutex */ +#define pthread_mutex_t CRITICAL_SECTION +#define pthread_mutex_init(a,b) InitializeCriticalSection((a)) +#define pthread_mutex_destroy(a) DeleteCriticalSection((a)) +#define pthread_mutex_lock EnterCriticalSection +#define pthread_mutex_unlock LeaveCriticalSection + +/* pthread_create() and pthread_join() */ +typedef struct { + HANDLE handle; + void* (*start_routine)(void*); + void*varg; +} pthread_t; + +int pthread_create(pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg); + +#define pthread_join(a, b) _pthread_join(&(a), (b)) +int _pthread_join(pthread_t* thread, void** value_ptr); + +/** + * add here more wrappers as required + */ + + +#elif defined(ZSTD_PTHREAD) /* posix assumed ; need a better detection mathod */ +/* === POSIX Systems === */ +# include + +#else /* ZSTD_PTHREAD not defined */ +/* No multithreading support */ + +typedef int pthread_mutex_t; +#define pthread_mutex_init(a,b) +#define pthread_mutex_destroy(a) +#define pthread_mutex_lock(a) +#define pthread_mutex_unlock(a) + +/* do not use pthread_t */ + +#endif /* ZSTD_PTHREAD */ + +#if defined (__cplusplus) +} +#endif + +#endif /* THREADING_H_938743 */ diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 97de6e64..770f5975 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -1,6 +1,6 @@ #include /* malloc */ #include /* threadpool */ -#include /* mutex */ +#include "threading.h" /* mutex */ #include "zstd_internal.h" /* MIN, ERROR */ #include "zstdmt_compress.h" From d13243353465b386e333651e1ba63f5f4b30cdce Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Sat, 31 Dec 2016 19:10:13 -0500 Subject: [PATCH 12/73] Switch thread pool test to threading.h --- .travis.yml | 2 +- tests/Makefile | 8 +++++--- tests/pool.c | 6 +++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 36537cbe..6bf99f1b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ matrix: os: linux sudo: false - - env: Ubu=12.04cont Cmd="make zlibwrapper && make clean && make -C tests test-pool && make -C tests test-symbols && make clean && make -C tests test-zstd-nolegacy && make clean && make cmaketest && make clean && make -C contrib/pzstd googletest pzstd tests check && make -C contrib/pzstd clean" + - env: Ubu=12.04cont Cmd="make zlibwrapper && make clean && make -C tests test-symbols && make clean && make -C tests test-zstd-nolegacy && make clean && make cmaketest && make clean && make -C contrib/pzstd googletest pzstd tests check && make -C contrib/pzstd clean" os: linux sudo: false language: cpp diff --git a/tests/Makefile b/tests/Makefile index 739944de..6312584a 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -48,8 +48,10 @@ ZDICT_FILES := $(ZSTDDIR)/dictBuilder/*.c # Define *.exe as extension for Windows systems ifneq (,$(filter Windows%,$(OS))) EXT =.exe +PTHREAD = -DZSTD_PTHREAD else EXT = +PTHREAD = -pthread -DZSTD_PTHREAD endif VOID = /dev/null @@ -158,8 +160,8 @@ else $(CC) $(FLAGS) $^ -o $@$(EXT) -Wl,-rpath=$(ZSTDDIR) $(ZSTDDIR)/libzstd.so endif -pool : pool.c $(ZSTDDIR)/common/pool.c - $(CC) $(FLAGS) -pthread -DZSTD_PTHREAD $^ -o $@$(EXT) +pool : pool.c $(ZSTDDIR)/common/pool.c $(ZSTDDIR)/common/threading.c + $(CC) $(FLAGS) $(PTHREAD) $^ -o $@$(EXT) namespaceTest: if $(CC) namespaceTest.c ../lib/common/xxhash.c -o $@ ; then echo compilation should fail; exit 1 ; fi @@ -225,7 +227,7 @@ zstd-playTests: datagen file $(ZSTD) ZSTD="$(QEMU_SYS) $(ZSTD)" ./playTests.sh $(ZSTDRTTEST) -test: test-zstd test-fullbench test-fuzzer test-zstream test-longmatch test-invalidDictionaries +test: test-zstd test-fullbench test-fuzzer test-zstream test-longmatch test-invalidDictionaries test-pool test32: test-zstd32 test-fullbench32 test-fuzzer32 test-zstream32 diff --git a/tests/pool.c b/tests/pool.c index ce38075d..27414642 100644 --- a/tests/pool.c +++ b/tests/pool.c @@ -1,5 +1,5 @@ #include "pool.h" -#include +#include "threading.h" #include #include @@ -31,7 +31,7 @@ int testOrder(size_t numThreads, size_t queueLog) { POOL_ctx *ctx = POOL_create(numThreads, queueLog); ASSERT_TRUE(ctx); data.i = 0; - ASSERT_FALSE(pthread_mutex_init(&data.mutex, NULL)); + pthread_mutex_init(&data.mutex, NULL); { size_t i; for (i = 0; i < 1024; ++i) { @@ -46,7 +46,7 @@ int testOrder(size_t numThreads, size_t queueLog) { ASSERT_EQ(i, data.data[i]); } } - ASSERT_FALSE(pthread_mutex_destroy(&data.mutex)); + pthread_mutex_destroy(&data.mutex); return 0; } From 4204e03e77113ccc22ebf805a8a8da60066b83e8 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Sat, 31 Dec 2016 19:10:29 -0500 Subject: [PATCH 13/73] Add threading.h condition variables --- lib/common/threading.c | 2 +- lib/common/threading.h | 31 ++++++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/lib/common/threading.c b/lib/common/threading.c index 1725650c..abad2c15 100644 --- a/lib/common/threading.c +++ b/lib/common/threading.c @@ -15,7 +15,7 @@ * This file will hold wrapper for systems, which do not support Pthreads */ -#ifdef _WIN32 +#if defined(ZSTD_PTHREAD) && defined(_WIN32) /** * Windows minimalist Pthread Wrapper, based on : diff --git a/lib/common/threading.h b/lib/common/threading.h index a8126eb7..d5dc8f75 100644 --- a/lib/common/threading.h +++ b/lib/common/threading.h @@ -24,24 +24,42 @@ extern "C" { * Windows minimalist Pthread Wrapper, based on : * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html */ +#ifdef WINVER +# undef WINVER +#endif +#define WINVER 0x0600 + +#ifdef _WIN32_WINNT +# undef _WIN32_WINNT +#endif +#define _WIN32_WINNT 0x0600 #ifndef WIN32_LEAN_AND_MEAN # define WIN32_LEAN_AND_MEAN #endif + #include /* mutex */ #define pthread_mutex_t CRITICAL_SECTION #define pthread_mutex_init(a,b) InitializeCriticalSection((a)) #define pthread_mutex_destroy(a) DeleteCriticalSection((a)) -#define pthread_mutex_lock EnterCriticalSection -#define pthread_mutex_unlock LeaveCriticalSection +#define pthread_mutex_lock(a) EnterCriticalSection((a)) +#define pthread_mutex_unlock(a) LeaveCriticalSection((a)) + +/* condition variable */ +#define pthread_cond_t CONDITION_VARIABLE +#define pthread_cond_init(a, b) InitializeConditionVariable((a)) +#define pthread_cond_destroy(a) /* No delete */ +#define pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE) +#define pthread_cond_signal(a) WakeConditionVariable((a)) +#define pthread_cond_broadcast(a) WakeAllConditionVariable((a)) /* pthread_create() and pthread_join() */ typedef struct { HANDLE handle; void* (*start_routine)(void*); - void*varg; + void* arg; } pthread_t; int pthread_create(pthread_t* thread, const void* unused, @@ -68,6 +86,13 @@ typedef int pthread_mutex_t; #define pthread_mutex_lock(a) #define pthread_mutex_unlock(a) +typedef int pthread_cond_t; +#define pthread_cond_init(a,b) +#define pthread_cond_destroy(a) +#define pthread_cond_wait(a,b) +#define pthread_cond_signal(a) +#define pthread_cond_broadcast(a) + /* do not use pthread_t */ #endif /* ZSTD_PTHREAD */ From bb13387d7d04253713ed50c2e77c96df00a26d75 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Sat, 31 Dec 2016 19:10:47 -0500 Subject: [PATCH 14/73] Fix pool for threading.h --- lib/common/pool.c | 19 +++++++++---------- lib/common/pool.h | 1 + 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/common/pool.c b/lib/common/pool.c index 97ca7dda..e24691f7 100644 --- a/lib/common/pool.c +++ b/lib/common/pool.c @@ -51,21 +51,21 @@ static void* POOL_thread(void* opaque) { if (!ctx) { return NULL; } for (;;) { /* Lock the mutex and wait for a non-empty queue or until shutdown */ - if (pthread_mutex_lock(&ctx->queueMutex)) { return NULL; } + pthread_mutex_lock(&ctx->queueMutex); while (ctx->queueHead == ctx->queueTail && !ctx->shutdown) { - if (pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex)) { return NULL; } + pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); } /* empty => shutting down: so stop */ if (ctx->queueHead == ctx->queueTail) { - if (pthread_mutex_unlock(&ctx->queueMutex)) { return NULL; } + pthread_mutex_unlock(&ctx->queueMutex); return opaque; } /* Pop a job off the queue */ { POOL_job const job = ctx->queue[ctx->queueHead]; ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; /* Unlock the mutex, signal a pusher, and run the job */ - if (pthread_mutex_unlock(&ctx->queueMutex)) { return NULL; } - if (pthread_cond_signal(&ctx->queuePushCond)) { return NULL; } + pthread_mutex_unlock(&ctx->queueMutex); + pthread_cond_signal(&ctx->queuePushCond); job.function(job.opaque); } } @@ -73,7 +73,6 @@ static void* POOL_thread(void* opaque) { } POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) { - int err = 0; POOL_ctx *ctx; /* Check the parameters */ if (!numThreads || !queueSize) { return NULL; } @@ -88,15 +87,15 @@ POOL_ctx *POOL_create(size_t numThreads, size_t queueSize) { ctx->queue = (POOL_job *)malloc(ctx->queueSize * sizeof(POOL_job)); ctx->queueHead = 0; ctx->queueTail = 0; - err |= pthread_mutex_init(&ctx->queueMutex, NULL); - err |= pthread_cond_init(&ctx->queuePushCond, NULL); - err |= pthread_cond_init(&ctx->queuePopCond, NULL); + pthread_mutex_init(&ctx->queueMutex, NULL); + pthread_cond_init(&ctx->queuePushCond, NULL); + pthread_cond_init(&ctx->queuePopCond, NULL); ctx->shutdown = 0; /* Allocate space for the thread handles */ ctx->threads = (pthread_t *)malloc(numThreads * sizeof(pthread_t)); ctx->numThreads = 0; /* Check for errors */ - if (!ctx->threads || !ctx->queue || err) { POOL_free(ctx); return NULL; } + if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; } /* Initialize the threads */ { size_t i; for (i = 0; i < numThreads; ++i) { diff --git a/lib/common/pool.h b/lib/common/pool.h index f4afc1ee..c26f543f 100644 --- a/lib/common/pool.h +++ b/lib/common/pool.h @@ -39,6 +39,7 @@ typedef void (*POOL_add_function)(void *, POOL_function, void *); /*! POOL_add() : Add the job `function(opaque)` to the thread pool. Possibly blocks until there is room in the queue. + Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed. */ void POOL_add(void *ctx, POOL_function function, void *opaque); From 5ca0fd204548da3a0bdf524f5323045f154d4324 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Sat, 31 Dec 2016 22:39:32 -0500 Subject: [PATCH 15/73] Shorten thread pool tests --- tests/pool.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/pool.c b/tests/pool.c index 27414642..adc5947d 100644 --- a/tests/pool.c +++ b/tests/pool.c @@ -14,7 +14,7 @@ struct data { pthread_mutex_t mutex; - unsigned data[1024]; + unsigned data[16]; size_t i; }; @@ -26,20 +26,20 @@ void fn(void *opaque) { pthread_mutex_unlock(&data->mutex); } -int testOrder(size_t numThreads, size_t queueLog) { +int testOrder(size_t numThreads, size_t queueSize) { struct data data; - POOL_ctx *ctx = POOL_create(numThreads, queueLog); + POOL_ctx *ctx = POOL_create(numThreads, queueSize); ASSERT_TRUE(ctx); data.i = 0; pthread_mutex_init(&data.mutex, NULL); { size_t i; - for (i = 0; i < 1024; ++i) { + for (i = 0; i < 16; ++i) { POOL_add(ctx, &fn, &data); } } POOL_free(ctx); - ASSERT_EQ(1024, data.i); + ASSERT_EQ(16, data.i); { size_t i; for (i = 0; i < data.i; ++i) { @@ -52,19 +52,19 @@ int testOrder(size_t numThreads, size_t queueLog) { int main(int argc, const char **argv) { size_t numThreads; - for (numThreads = 1; numThreads <= 8; ++numThreads) { - size_t queueLog; - for (queueLog = 1; queueLog <= 8; ++queueLog) { - if (testOrder(numThreads, queueLog)) { + for (numThreads = 1; numThreads <= 4; ++numThreads) { + size_t queueSize; + for (queueSize = 1; queueSize <= 2; ++queueSize) { + if (testOrder(numThreads, queueSize)) { printf("FAIL: testOrder\n"); return 1; } } } printf("PASS: testOrder\n"); - (POOL_create(0, 1) || POOL_create(1, 0)) ? printf("FAIL: testInvalid\n") - : printf("PASS: testInvalid\n"); (void)argc; (void)argv; + return (POOL_create(0, 1) || POOL_create(1, 0)) ? printf("FAIL: testInvalid\n"), 1 + : printf("PASS: testInvalid\n"), 0; return 0; } From 2ec635a16236e53014ce9ee69a01cdbf8ca77836 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 1 Jan 2017 17:31:33 +0100 Subject: [PATCH 16/73] use pthread_cond to send signals between threads --- lib/compress/zstdmt_compress.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 770f5975..b9cc81f6 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -115,6 +115,7 @@ typedef struct { size_t cSize; unsigned jobCompleted; pthread_mutex_t* jobCompleted_mutex; + pthread_cond_t* jobCompleted_cond; } ZSTDMT_jobDescription; /* ZSTDMT_compressFrame() : POOL_function type */ @@ -126,7 +127,9 @@ void ZSTDMT_compressFrame(void* jobDescription) job->cSize = ZSTD_compressCCtx(job->cctx, job->dstBuff.start, job->dstBuff.size, job->srcStart, job->srcSize, job->compressionLevel); DEBUGLOG(5, "compressed to %u bytes ", (unsigned)job->cSize); job->jobCompleted = 1; - DEBUGLOG(5, "unlocking mutex jobCompleted_mutex"); + DEBUGLOG(5, "sending jobCompleted signal"); + pthread_mutex_lock(job->jobCompleted_mutex); + pthread_cond_signal(job->jobCompleted_cond); pthread_mutex_unlock(job->jobCompleted_mutex); DEBUGLOG(5, "ZSTDMT_compressFrame completed"); } @@ -188,6 +191,7 @@ struct ZSTDMT_CCtx_s { ZSTDMT_CCtxPool* cctxPool; unsigned nbThreads; pthread_mutex_t jobCompleted_mutex; + pthread_cond_t jobCompleted_cond; ZSTDMT_jobDescription jobs[1]; /* variable size */ }; @@ -201,6 +205,7 @@ ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) cctx->buffPool = ZSTDMT_createBufferPool(nbThreads); cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads); pthread_mutex_init(&cctx->jobCompleted_mutex, NULL); + pthread_cond_init(&cctx->jobCompleted_cond, NULL); return cctx; } @@ -248,6 +253,7 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, mtctx->jobs[u].frameID = u; mtctx->jobs[u].jobCompleted = 0; mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex; + mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond; DEBUGLOG(3, "posting job %u (%u bytes)", u, (U32)frameSize); POOL_add(mtctx->factory, ZSTDMT_compressFrame, &mtctx->jobs[u]); @@ -261,10 +267,14 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, size_t dstPos = 0; for (frameID=0; frameIDjobCompleted_mutex); while (mtctx->jobs[frameID].jobCompleted==0) { - DEBUGLOG(4, "waiting for signal jobCompleted_mutex") - pthread_mutex_lock(&mtctx->jobCompleted_mutex); + DEBUGLOG(4, "waiting for jobCompleted signal for frame %u", frameID); + pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex); } + pthread_mutex_unlock(&mtctx->jobCompleted_mutex); + { size_t const cSize = mtctx->jobs[frameID].cSize; if (ZSTD_isError(cSize)) return cSize; if (dstPos + cSize > dstCapacity) return ERROR(dstSize_tooSmall); From 0ec6a95ba126978567fafc02f839064c60b74b9d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 2 Jan 2017 00:49:42 +0100 Subject: [PATCH 17/73] minor fixes --- lib/compress/zstdmt_compress.c | 5 +++-- lib/zstd.h | 6 +++--- programs/bench.c | 11 +++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index b9cc81f6..294ce86d 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -123,12 +123,12 @@ void ZSTDMT_compressFrame(void* jobDescription) { DEBUGLOG(5, "Entering ZSTDMT_compressFrame() "); ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; - DEBUGLOG(5, "compressing %u bytes with ZSTD_compressCCtx : ", (unsigned)job->srcSize); + DEBUGLOG(5, "compressing %u bytes from frame %u with ZSTD_compressCCtx : ", (unsigned)job->srcSize, job->jobCompleted); job->cSize = ZSTD_compressCCtx(job->cctx, job->dstBuff.start, job->dstBuff.size, job->srcStart, job->srcSize, job->compressionLevel); DEBUGLOG(5, "compressed to %u bytes ", (unsigned)job->cSize); - job->jobCompleted = 1; DEBUGLOG(5, "sending jobCompleted signal"); pthread_mutex_lock(job->jobCompleted_mutex); + job->jobCompleted = 1; pthread_cond_signal(job->jobCompleted_cond); pthread_mutex_unlock(job->jobCompleted_mutex); DEBUGLOG(5, "ZSTDMT_compressFrame completed"); @@ -215,6 +215,7 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) /* incompleted ! */ ZSTDMT_freeBufferPool(mtctx->buffPool); ZSTDMT_freeCCtxPool(mtctx->cctxPool); pthread_mutex_destroy(&mtctx->jobCompleted_mutex); + pthread_cond_destroy(&mtctx->jobCompleted_cond); free(mtctx); return 0; } diff --git a/lib/zstd.h b/lib/zstd.h index 333feff7..55cc466d 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -369,9 +369,9 @@ typedef struct { } ZSTD_compressionParameters; typedef struct { - unsigned contentSizeFlag; /**< 1: content size will be in frame header (if known). */ - unsigned checksumFlag; /**< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */ - unsigned noDictIDFlag; /**< 1: no dict ID will be saved into frame header (if dictionary compression) */ + unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */ + unsigned checksumFlag; /**< 1: generate a 32-bits checksum at end of frame, for error detection */ + unsigned noDictIDFlag; /**< 1: no dictID will be saved into frame header (if dictionary compression) */ } ZSTD_frameParameters; typedef struct { diff --git a/programs/bench.c b/programs/bench.c index c718e219..e846e9ef 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -91,12 +91,12 @@ static clock_t g_time = 0; /* ************************************* * Time ***************************************/ -/* for posix only - proper detection macros to setup */ +/* for posix only - needs proper detection macros to setup */ #include #include typedef unsigned long long clock_us_t; -static clock_us_t BMK_clockMicroSec() +static clock_us_t BMK_clockMicroSec(void) { static clock_t _ticksPerSecond = 0; if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK); @@ -235,7 +235,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, /* Bench */ { U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL); U64 const crcOrig = g_decodeOnly ? 0 : XXH64(srcBuffer, srcSize, 0); - UTIL_time_t coolTime; + clock_us_t coolTime = BMK_clockMicroSec(); U64 const maxTime = (g_nbSeconds * TIMELOOP_MICROSEC) + 1; U64 totalCTime=0, totalDTime=0; U32 cCompleted=g_decodeOnly, dCompleted=0; @@ -245,15 +245,14 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, ZSTDMT_CCtx* const mtcctx = ZSTDMT_createCCtx(g_nbThreads); - UTIL_getTime(&coolTime); DISPLAYLEVEL(2, "\r%79s\r", ""); while (!cCompleted || !dCompleted) { /* overheat protection */ - if (UTIL_clockSpanMicro(coolTime, ticksPerSecond) > ACTIVEPERIOD_MICROSEC) { + if (BMK_clockMicroSec() - coolTime > ACTIVEPERIOD_MICROSEC) { DISPLAYLEVEL(2, "\rcooling down ... \r"); UTIL_sleep(COOLPERIOD_SEC); - UTIL_getTime(&coolTime); + coolTime = BMK_clockMicroSec(); } if (!g_decodeOnly) { From f1cb55192c3b61768678a748a60e9b83f98133f3 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 2 Jan 2017 01:11:55 +0100 Subject: [PATCH 18/73] fixed linux warnings --- lib/common/threading.h | 4 ++-- lib/compress/zstdmt_compress.c | 18 +++++++++--------- programs/bench.c | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/common/threading.h b/lib/common/threading.h index d5dc8f75..4572d71d 100644 --- a/lib/common/threading.h +++ b/lib/common/threading.h @@ -80,13 +80,13 @@ int _pthread_join(pthread_t* thread, void** value_ptr); #else /* ZSTD_PTHREAD not defined */ /* No multithreading support */ -typedef int pthread_mutex_t; +#define pthread_mutex_t int /* #define rather than typedef, as sometimes pthread support is implicit, resulting in duplicated symbols */ #define pthread_mutex_init(a,b) #define pthread_mutex_destroy(a) #define pthread_mutex_lock(a) #define pthread_mutex_unlock(a) -typedef int pthread_cond_t; +#define pthread_cond_t int #define pthread_cond_init(a,b) #define pthread_cond_destroy(a) #define pthread_cond_wait(a,b) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 294ce86d..dd495c98 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -78,18 +78,18 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool) static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) { if (pool->nbBuffers) { /* try to use an existing buffer */ - pool->nbBuffers--; - buffer_t const buf = pool->bTable[pool->nbBuffers]; + buffer_t const buf = pool->bTable[--(pool->nbBuffers)]; size_t const availBufferSize = buf.size; if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) /* large enough, but not too much */ return buf; free(buf.start); /* size conditions not respected : create a new buffer */ } /* create new buffer */ - buffer_t buf; - buf.size = bSize; - buf.start = malloc(bSize); - return buf; + { buffer_t buf; + buf.size = bSize; + buf.start = malloc(bSize); + return buf; + } } /* effectively store buffer for later re-use, up to pool capacity */ @@ -121,9 +121,8 @@ typedef struct { /* ZSTDMT_compressFrame() : POOL_function type */ void ZSTDMT_compressFrame(void* jobDescription) { - DEBUGLOG(5, "Entering ZSTDMT_compressFrame() "); ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; - DEBUGLOG(5, "compressing %u bytes from frame %u with ZSTD_compressCCtx : ", (unsigned)job->srcSize, job->jobCompleted); + DEBUGLOG(5, "thread : compressing %u bytes from frame %u with ZSTD_compressCCtx : ", (unsigned)job->srcSize, job->jobCompleted); job->cSize = ZSTD_compressCCtx(job->cctx, job->dstBuff.start, job->dstBuff.size, job->srcStart, job->srcSize, job->compressionLevel); DEBUGLOG(5, "compressed to %u bytes ", (unsigned)job->cSize); DEBUGLOG(5, "sending jobCompleted signal"); @@ -197,8 +196,9 @@ struct ZSTDMT_CCtx_s { ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) { + ZSTDMT_CCtx* cctx; if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL; - ZSTDMT_CCtx* const cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx) + nbThreads*sizeof(ZSTDMT_jobDescription)); + cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx) + nbThreads*sizeof(ZSTDMT_jobDescription)); if (!cctx) return NULL; cctx->nbThreads = nbThreads; cctx->factory = POOL_create(nbThreads, 1); diff --git a/programs/bench.c b/programs/bench.c index e846e9ef..a3c013a8 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -101,8 +101,8 @@ static clock_us_t BMK_clockMicroSec(void) static clock_t _ticksPerSecond = 0; if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK); - struct tms junk; clock_t newTicks = (clock_t) times(&junk); (void)junk; - return ((((clock_us_t)newTicks)*(1000000))/_ticksPerSecond); + { struct tms junk; clock_t newTicks = (clock_t) times(&junk); (void)junk; + return ((((clock_us_t)newTicks)*(1000000))/_ticksPerSecond); } } From cdb2763f4a325c629794bde2816aeba1fe09f06a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 2 Jan 2017 01:43:56 +0100 Subject: [PATCH 19/73] new Makefile target zstdmt --- programs/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/programs/Makefile b/programs/Makefile index 6bd0014a..4e3510e4 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -130,6 +130,10 @@ gzstd: clean_decomp_o && $(MAKE) zstd; \ fi +zstdmt: CPPFLAGS += -DZSTD_PTHREAD +zstdmt: LDFLAGS += -lpthread +zstdmt: zstd + generate_res: windres/generate_res.bat @@ -164,7 +168,7 @@ ifneq (,$(filter $(shell uname),OpenBSD FreeBSD NetBSD DragonFly SunOS)) MANDIR ?= $(PREFIX)/man/man1 else MANDIR ?= $(PREFIX)/share/man/man1 -endif +endif INSTALL_PROGRAM ?= $(INSTALL) -m 755 INSTALL_SCRIPT ?= $(INSTALL) -m 755 From 747452677d6eca37578ee77307c652f4135909d7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 2 Jan 2017 02:05:45 +0100 Subject: [PATCH 20/73] fixed cmake tests --- build/cmake/lib/CMakeLists.txt | 37 ++++++++--------------------- build/cmake/programs/CMakeLists.txt | 34 ++++++-------------------- programs/zstdcli.c | 12 +++++----- 3 files changed, 23 insertions(+), 60 deletions(-) diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt index 41fe2733..dce39aba 100644 --- a/build/cmake/lib/CMakeLists.txt +++ b/build/cmake/lib/CMakeLists.txt @@ -1,30 +1,10 @@ # ################################################################ -# zstd - Makefile -# Copyright (C) Yann Collet 2014-2016 -# All rights reserved. -# -# BSD license -# -# Redistribution and use in source and binary forms, with or without modification, -# are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, this -# list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# * Copyright (c) 2014-present, Yann Collet, Facebook, Inc. +# * All rights reserved. +# * +# * This source code is licensed under the BSD-style license found in the +# * LICENSE file in the root directory of this source tree. An additional grant +# * of patent rights can be found in the PATENTS file in the same directory. # # You can contact the author at : # - zstd homepage : http://www.zstd.net/ @@ -58,13 +38,16 @@ MESSAGE("ZSTD VERSION ${LIBVER_MAJOR}.${LIBVER_MINOR}.${LIBVER_RELEASE}") SET(Sources ${LIBRARY_DIR}/common/entropy_common.c + ${LIBRARY_DIR}/common/fse_decompress.c + ${LIBRARY_DIR}/common/threading.c + ${LIBRARY_DIR}/common/pool.c ${LIBRARY_DIR}/common/zstd_common.c ${LIBRARY_DIR}/common/error_private.c ${LIBRARY_DIR}/common/xxhash.c - ${LIBRARY_DIR}/common/fse_decompress.c ${LIBRARY_DIR}/compress/fse_compress.c ${LIBRARY_DIR}/compress/huf_compress.c ${LIBRARY_DIR}/compress/zstd_compress.c + ${LIBRARY_DIR}/compress/zstdmt_compress.c ${LIBRARY_DIR}/decompress/huf_decompress.c ${LIBRARY_DIR}/decompress/zstd_decompress.c ${LIBRARY_DIR}/dictBuilder/divsufsort.c diff --git a/build/cmake/programs/CMakeLists.txt b/build/cmake/programs/CMakeLists.txt index c2931b09..9b3c3acc 100644 --- a/build/cmake/programs/CMakeLists.txt +++ b/build/cmake/programs/CMakeLists.txt @@ -1,30 +1,10 @@ # ################################################################ -# zstd - Makefile -# Copyright (C) Yann Collet 2014-2016 -# All rights reserved. -# -# BSD license -# -# Redistribution and use in source and binary forms, with or without modification, -# are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, this -# list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# * Copyright (c) 2015-present, Yann Collet, Facebook, Inc. +# * All rights reserved. +# * +# * This source code is licensed under the BSD-style license found in the +# * LICENSE file in the root directory of this source tree. An additional grant +# * of patent rights can be found in the PATENTS file in the same directory. # # You can contact the author at : # - zstd homepage : http://www.zstd.net/ @@ -40,7 +20,7 @@ SET(ROOT_DIR ../../..) # Define programs directory, where sources and header files are located SET(LIBRARY_DIR ${ROOT_DIR}/lib) SET(PROGRAMS_DIR ${ROOT_DIR}/programs) -INCLUDE_DIRECTORIES(${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${LIBRARY_DIR}/dictBuilder) +INCLUDE_DIRECTORIES(${PROGRAMS_DIR} ${LIBRARY_DIR} ${LIBRARY_DIR}/common ${LIBRARY_DIR}/compression ${LIBRARY_DIR}/dictBuilder) IF (ZSTD_LEGACY_SUPPORT) SET(PROGRAMS_LEGACY_DIR ${PROGRAMS_DIR}/legacy) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 03ad1ac7..0474c96c 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -422,6 +422,12 @@ int main(int argCount, const char* argv[]) BMK_SetBlockSize(bSize); } break; + + /* nb of threads (hidden option) */ + case 'T': + argument++; + BMK_SetNbThreads(readU32FromChar(&argument)); + break; #endif /* ZSTD_NOBENCH */ /* Dictionary Selection level */ @@ -430,12 +436,6 @@ int main(int argCount, const char* argv[]) dictSelect = readU32FromChar(&argument); break; - /* nb of threads (hidden option) */ - case 'T': - argument++; - BMK_SetNbThreads(readU32FromChar(&argument)); - break; - /* Pause at the end (-p) or set an additional param (-p#) (hidden option) */ case 'p': argument++; #ifndef ZSTD_NOBENCH From 6334b04d6123cd426c1bcc69e0167331c9f098d4 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 2 Jan 2017 03:22:18 +0100 Subject: [PATCH 21/73] compile object files, for faster recompilation --- lib/Makefile | 9 +++++---- programs/Makefile | 31 ++++++++++++++----------------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index efd3b87f..34363b7b 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -36,6 +36,8 @@ CPPFLAGS += -I./legacy -DZSTD_LEGACY_SUPPORT=1 ZSTD_FILES+= $(wildcard legacy/*.c) endif +ZSTD_OBJ := $(patsubst %.c,%.o,$(ZSTD_FILES)) + # OS X linker doesn't support -soname, and use different extension # see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html ifeq ($(shell uname), Darwin) @@ -60,10 +62,9 @@ default: lib all: lib libzstd.a: ARFLAGS = rcs -libzstd.a: $(ZSTD_FILES) +libzstd.a: $(ZSTD_OBJ) @echo compiling static library - @$(CC) $(FLAGS) -c $^ - @$(AR) $(ARFLAGS) $@ *.o + @$(AR) $(ARFLAGS) $@ $^ $(LIBZSTD): LDFLAGS += -shared -fPIC -fvisibility=hidden $(LIBZSTD): $(ZSTD_FILES) @@ -84,7 +85,7 @@ lib: libzstd.a libzstd clean: @$(RM) core *.o *.a *.gcda *.$(SHARED_EXT) *.$(SHARED_EXT).* libzstd.pc dll/libzstd.dll dll/libzstd.lib - @$(RM) decompress/*.o + @$(RM) common/*.o compress/*.o decompress/*.o dictBuilder/*.o legacy/*.o deprecated/*.o @echo Cleaning library completed #----------------------------------------------------------------------------- diff --git a/programs/Makefile b/programs/Makefile index 4e3510e4..77d5ab6e 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -1,5 +1,5 @@ # ########################################################################## -# Copyright (c) 2016-present, Yann Collet, Facebook, Inc. +# Copyright (c) 2015-present, Yann Collet, Facebook, Inc. # All rights reserved. # # This source code is licensed under the BSD-style license found in the @@ -33,7 +33,7 @@ FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) ZSTDCOMMON_FILES := $(ZSTDDIR)/common/*.c ZSTDCOMP_FILES := $(ZSTDDIR)/compress/*.c -ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/huf_decompress.c +ZSTDDECOMP_FILES := $(ZSTDDIR)/decompress/*.c ZSTD_FILES := $(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) ZDICT_FILES := $(ZSTDDIR)/dictBuilder/*.c ZSTDDECOMP_O = $(ZSTDDIR)/decompress/zstd_decompress.o @@ -47,6 +47,8 @@ CPPFLAGS += -I$(ZSTDDIR)/legacy ZSTDLEGACY_FILES:= $(ZSTDDIR)/legacy/*.c endif +ZSTDLIB_FILES := $(wildcard $(ZSTD_FILES)) $(wildcard $(ZSTDLEGACY_FILES)) $(wildcard $(ZDICT_FILES)) +ZSTDLIB_OBJ := $(patsubst %.c,%.o,$(ZSTDLIB_FILES)) # Define *.exe as extension for Windows systems ifneq (,$(filter Windows%,$(OS))) @@ -72,8 +74,7 @@ all: zstd $(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP) zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) -zstd : $(ZSTDDECOMP_O) $(ZSTD_FILES) $(ZSTDLEGACY_FILES) $(ZDICT_FILES) \ - zstdcli.c fileio.c bench.c datagen.c dibio.c +zstd : $(ZSTDLIB_OBJ) zstdcli.o fileio.o bench.o datagen.o dibio.o ifneq (,$(filter Windows%,$(OS))) windres/generate_res.bat endif @@ -81,8 +82,7 @@ endif zstd32 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) -zstd32 : $(ZSTDDIR)/decompress/zstd_decompress.c $(ZSTD_FILES) $(ZSTDLEGACY_FILES) $(ZDICT_FILES) \ - zstdcli.c fileio.c bench.c datagen.c dibio.c +zstd32 : $(ZSTDLIB_FILES) zstdcli.c fileio.c bench.c datagen.c dibio.c ifneq (,$(filter Windows%,$(OS))) windres/generate_res.bat endif @@ -104,26 +104,23 @@ zstd-pgo : clean zstd $(RM) $(ZSTDDECOMP_O) $(MAKE) zstd MOREFLAGS=-fprofile-use -zstd-frugal: $(ZSTDDECOMP_O) $(ZSTD_FILES) zstdcli.c fileio.c +zstd-frugal: $(ZSTD_FILES) zstdcli.c fileio.c $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT $^ -o zstd$(EXT) -zstd-small: clean_decomp_o - ZSTD_LEGACY_SUPPORT=0 CFLAGS="-Os -s" $(MAKE) zstd-frugal +zstd-small: + CFLAGS="-Os -s" $(MAKE) zstd-frugal -zstd-decompress-clean: $(ZSTDDECOMP_O) $(ZSTDCOMMON_FILES) $(ZSTDDECOMP_FILES) zstdcli.c fileio.c - $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS $^ -o zstd-decompress$(EXT) - -zstd-decompress: clean_decomp_o - ZSTD_LEGACY_SUPPORT=0 $(MAKE) zstd-decompress-clean +zstd-decompress: $(ZSTDCOMMON_FILES) $(ZSTDDECOMP_FILES) zstdcli.c fileio.c + $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS $^ -o $@$(EXT) zstd-compress: $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) zstdcli.c fileio.c $(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS $^ -o $@$(EXT) -gzstd: clean_decomp_o +gzstd: @echo "int main(){}" | $(CC) -o have_zlib -x c - -lz && echo found zlib || echo did not found zlib @if [ -s have_zlib ]; then \ echo building gzstd with .gz decompression support \ - && rm have_zlib$(EXT) \ + && $(RM) have_zlib$(EXT) fileio.o \ && CPPFLAGS=-DZSTD_GZDECOMPRESS LDFLAGS="-lz" $(MAKE) zstd; \ else \ echo "WARNING : no zlib, building gzstd with only .zst files support : NO .gz SUPPORT !!!" \ @@ -132,7 +129,7 @@ gzstd: clean_decomp_o zstdmt: CPPFLAGS += -DZSTD_PTHREAD zstdmt: LDFLAGS += -lpthread -zstdmt: zstd +zstdmt: clean zstd generate_res: windres/generate_res.bat From 85667997811104a5a5cfa5c37ff530931a454e9e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 3 Jan 2017 00:25:01 +0100 Subject: [PATCH 22/73] separated ppc and ppc64 tests, for more regular timing --- .travis.yml | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6bf99f1b..b0489bd6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -92,7 +92,17 @@ matrix: - gcc-aarch64-linux-gnu - libc6-dev-arm64-cross - - env: Ubu=14.04 Cmd='make ppctest && make clean && make ppc64test' + - env: Ubu=14.04 Cmd='make ppctest' + dist: trusty + sudo: required + addons: + apt: + packages: + - qemu-system-ppc + - qemu-user-static + - gcc-powerpc-linux-gnu + + - env: Ubu=14.04 Cmd='make ppc64test' dist: trusty sudo: required addons: @@ -101,7 +111,6 @@ matrix: - qemu-system-ppc - qemu-user-static - gcc-powerpc-linux-gnu - - libc6-dev-armel-cross - env: Ubu=14.04 Cmd='make -C lib all && CFLAGS="-O1 -g" make -C zlibWrapper valgrindTest && make -C tests valgrindTest' os: linux @@ -114,7 +123,7 @@ matrix: - env: Ubu=14.04 Cmd="make gpptest && make clean && make gnu90test && make clean && make c99test && make clean && make gnu99test && make clean - && make clangtest && make clean && make -C contrib/pzstd googletest32 + && make clangtest && make clean && make -C contrib/pzstd googletest32 && make -C contrib/pzstd all32 && make -C contrib/pzstd check && make -C contrib/pzstd clean" os: linux dist: trusty From 47557ba2b259828960598e1040af3f1d71222cd3 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 11 Jan 2017 15:35:56 +0100 Subject: [PATCH 23/73] fixed ZSTDMT_createCCtxPool() when inner CCtx creation fails --- lib/compress/zstdmt_compress.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index dd495c98..9657bdc6 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -74,7 +74,7 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool) free(bufPool); } -/* note : invocation only from main thread ! */ +/* assumption : invocation from main thread only ! */ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) { if (pool->nbBuffers) { /* try to use an existing buffer */ @@ -92,7 +92,7 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) } } -/* effectively store buffer for later re-use, up to pool capacity */ +/* store buffer for later re-use, up to pool capacity */ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf) { if (pool->nbBuffers < pool->totalBuffers) { @@ -122,15 +122,12 @@ typedef struct { void ZSTDMT_compressFrame(void* jobDescription) { ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; - DEBUGLOG(5, "thread : compressing %u bytes from frame %u with ZSTD_compressCCtx : ", (unsigned)job->srcSize, job->jobCompleted); job->cSize = ZSTD_compressCCtx(job->cctx, job->dstBuff.start, job->dstBuff.size, job->srcStart, job->srcSize, job->compressionLevel); - DEBUGLOG(5, "compressed to %u bytes ", (unsigned)job->cSize); - DEBUGLOG(5, "sending jobCompleted signal"); + DEBUGLOG(5, "frame %u : compressed %u bytes into %u bytes ", (unsigned)job->frameID, (unsigned)job->srcSize, (unsigned)job->cSize); pthread_mutex_lock(job->jobCompleted_mutex); job->jobCompleted = 1; pthread_cond_signal(job->jobCompleted_cond); pthread_mutex_unlock(job->jobCompleted_mutex); - DEBUGLOG(5, "ZSTDMT_compressFrame completed"); } @@ -142,16 +139,22 @@ typedef struct { ZSTD_CCtx* cctx[1]; /* variable size */ } ZSTDMT_CCtxPool; -/* note : CCtxPool invocation only from main thread */ +/* assumption : CCtxPool invocation only from main thread */ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads) { ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + nbThreads*sizeof(ZSTD_CCtx*)); if (!cctxPool) return NULL; - { unsigned u; - for (u=0; ucctx[u] = ZSTD_createCCtx(); /* check for NULL result ! */ - } + { unsigned threadNb; + for (threadNb=0; threadNbcctx[threadNb] = ZSTD_createCCtx(); + if (cctxPool->cctx[threadNb]==NULL) { /* failed cctx allocation : abort cctxPool creation */ + unsigned u; + for (u=0; ucctx[u]); + free(cctxPool); + return NULL; + } } } cctxPool->totalCCtx = cctxPool->availCCtx = nbThreads; return cctxPool; } From 8ce1cc2bec798fb959dcc403b462eebf3a985119 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 11 Jan 2017 15:44:26 +0100 Subject: [PATCH 24/73] improved ZSTD_createCCtxPool() cancellation use ZSTD_freeCCtxPool() to release the partially created pool. avoids to duplicate logic. Also : identified a new difficult corner case : when freeing the Pool, all CCtx should be previously released back to the pool. Otherwise, it means some CCtx are still in use. There is currently no clear policy on what to do in such a case. Note : it's supposed to never happen. Since pool creation/usage is static, it has no external user, which limits risks. --- lib/compress/zstdmt_compress.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 9657bdc6..5c7b654a 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -141,6 +141,15 @@ typedef struct { /* assumption : CCtxPool invocation only from main thread */ +/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */ +static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) +{ + unsigned u; + for (u=0; uavailCCtx; u++) /* note : availCCtx is supposed == totalCCtx; otherwise, some CCtx are still in use */ + ZSTD_freeCCtx(pool->cctx[u]); + free(pool); +} + static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads) { ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + nbThreads*sizeof(ZSTD_CCtx*)); @@ -149,10 +158,8 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads) for (threadNb=0; threadNbcctx[threadNb] = ZSTD_createCCtx(); if (cctxPool->cctx[threadNb]==NULL) { /* failed cctx allocation : abort cctxPool creation */ - unsigned u; - for (u=0; ucctx[u]); - free(cctxPool); + cctxPool->totalCCtx = cctxPool->availCCtx = threadNb; + ZSTDMT_freeCCtxPool(cctxPool); return NULL; } } } cctxPool->totalCCtx = cctxPool->availCCtx = nbThreads; @@ -165,7 +172,7 @@ static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool) pool->availCCtx--; return pool->cctx[pool->availCCtx]; } - /* should not be possible, since totalCCtx==nbThreads */ + /* note : should not be possible, since totalCCtx==nbThreads */ return ZSTD_createCCtx(); } @@ -174,18 +181,10 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) if (pool->availCCtx < pool->totalCCtx) pool->cctx[pool->availCCtx++] = cctx; else - /* should not be possible, since totalCCtx==nbThreads */ + /* note : should not be possible, since totalCCtx==nbThreads */ ZSTD_freeCCtx(cctx); } -static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) -{ - unsigned u; - for (u=0; utotalCCtx; u++) - ZSTD_freeCCtx(pool->cctx[u]); - free(pool); -} - struct ZSTDMT_CCtx_s { POOL_ctx* factory; From 085179bb78248796775193433c0596969511da55 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 11 Jan 2017 15:58:05 +0100 Subject: [PATCH 25/73] fixed ZSTDMT_createCCtx() : checked inner objects are properly created --- lib/compress/zstdmt_compress.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 5c7b654a..5fbf32b9 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -206,12 +206,16 @@ ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) cctx->factory = POOL_create(nbThreads, 1); cctx->buffPool = ZSTDMT_createBufferPool(nbThreads); cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads); - pthread_mutex_init(&cctx->jobCompleted_mutex, NULL); + if (!cctx->factory | !cctx->buffPool | !cctx->cctxPool) { /* one object was not created */ + ZSTDMT_freeCCtx(cctx); + return NULL; + } + pthread_mutex_init(&cctx->jobCompleted_mutex, NULL); /* Todo : check init function return */ pthread_cond_init(&cctx->jobCompleted_cond, NULL); return cctx; } -size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) /* incompleted ! */ +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) { POOL_free(mtctx->factory); ZSTDMT_freeBufferPool(mtctx->buffPool); From 04cbc364996dc6688436500b327003b63537631c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 11 Jan 2017 16:08:08 +0100 Subject: [PATCH 26/73] minor refactor (release CCtx 1st) and comment clarification --- lib/compress/zstdmt_compress.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 5fbf32b9..8471b750 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -277,21 +277,21 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, pthread_mutex_lock(&mtctx->jobCompleted_mutex); while (mtctx->jobs[frameID].jobCompleted==0) { - DEBUGLOG(4, "waiting for jobCompleted signal for frame %u", frameID); + DEBUGLOG(4, "waiting for jobCompleted signal from frame %u", frameID); pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex); } pthread_mutex_unlock(&mtctx->jobCompleted_mutex); + ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[frameID].cctx); { size_t const cSize = mtctx->jobs[frameID].cSize; if (ZSTD_isError(cSize)) return cSize; if (dstPos + cSize > dstCapacity) return ERROR(dstSize_tooSmall); - if (frameID) { + if (frameID) { /* note : frame 0 is already written directly into dst */ memcpy((char*)dst + dstPos, mtctx->jobs[frameID].dstBuff.start, cSize); ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[frameID].dstBuff); } dstPos += cSize ; } - ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[frameID].cctx); } DEBUGLOG(3, "compressed size : %u ", (U32)dstPos); return dstPos; From 5eb749e734120c3b50c4e434b45728ac7cdcc451 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 11 Jan 2017 18:21:25 +0100 Subject: [PATCH 27/73] ZSTDMT_compress() creates a single frame The new strategy involves cutting frame at block level. The result is a single frame, preserving ZSTD_getDecompressedSize() As a consequence, bench can now make a full round-trip, since the result is compatible with ZSTD_decompress(). This strategy will not make it possible to decode the frame with multiple threads since the exact cut between independent blocks is not known. MT decoding needs further discussions. --- Makefile | 2 +- lib/compress/zstd_compress.c | 6 ++++-- lib/compress/zstdmt_compress.c | 31 +++++++++++++++++++++++++------ lib/zstd.h | 6 +++--- programs/Makefile | 2 +- programs/bench.c | 17 +++++++++-------- 6 files changed, 43 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index 19b12d0e..0a3634c3 100644 --- a/Makefile +++ b/Makefile @@ -88,7 +88,7 @@ travis-install: $(MAKE) install PREFIX=~/install_test_dir gpptest: clean - $(MAKE) -C programs all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror" + CC=g++ $(MAKE) -C programs all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror" gcc5test: clean gcc-5 -v diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 7626b33a..c4dbb6ce 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2408,12 +2408,14 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, cctx->nextSrc = ip + srcSize; - { size_t const cSize = frame ? + if (srcSize) { + size_t const cSize = frame ? ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); if (ZSTD_isError(cSize)) return cSize; return cSize + fhSize; - } + } else + return fhSize; } diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 8471b750..ae986468 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -28,8 +28,8 @@ if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \ unsigned long long afterTime = GetCurrentClockTimeMicroseconds(); \ unsigned long long elapsedTime = (afterTime-beforeTime); \ if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \ - DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread %li took %llu microseconds to acquire mutex %s \n", \ - (long int) pthread_self(), elapsedTime, #mutex); \ + DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \ + elapsedTime, #mutex); \ } \ } else pthread_mutex_lock(mutex); @@ -112,6 +112,7 @@ typedef struct { buffer_t dstBuff; int compressionLevel; unsigned frameID; + unsigned long long fullFrameSize; size_t cSize; unsigned jobCompleted; pthread_mutex_t* jobCompleted_mutex; @@ -122,9 +123,26 @@ typedef struct { void ZSTDMT_compressFrame(void* jobDescription) { ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; - job->cSize = ZSTD_compressCCtx(job->cctx, job->dstBuff.start, job->dstBuff.size, job->srcStart, job->srcSize, job->compressionLevel); + buffer_t dstBuff = job->dstBuff; + ZSTD_parameters const params = ZSTD_getParams(job->compressionLevel, job->fullFrameSize, 0); + size_t hSize = ZSTD_compressBegin_advanced(job->cctx, NULL, 0, params, job->fullFrameSize); + if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } + hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, 0); /* flush frame header */ + if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } + if ((job->frameID & 1) == 0) { /* preserve frame header when it is first beginning of frame */ + dstBuff.start = (char*)dstBuff.start + hSize; + dstBuff.size -= hSize; + } else + hSize = 0; + + job->cSize = (job->frameID>=2) ? /* last chunk signal */ + ZSTD_compressEnd(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, job->srcSize) : + ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, job->srcSize); + if (!ZSTD_isError(job->cSize)) job->cSize += hSize; DEBUGLOG(5, "frame %u : compressed %u bytes into %u bytes ", (unsigned)job->frameID, (unsigned)job->srcSize, (unsigned)job->cSize); - pthread_mutex_lock(job->jobCompleted_mutex); + +_endJob: + PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex); job->jobCompleted = 1; pthread_cond_signal(job->jobCompleted_cond); pthread_mutex_unlock(job->jobCompleted_mutex); @@ -254,10 +272,11 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, mtctx->jobs[u].srcStart = srcStart + frameStartPos; mtctx->jobs[u].srcSize = frameSize; + mtctx->jobs[u].fullFrameSize = srcSize; mtctx->jobs[u].compressionLevel = compressionLevel; mtctx->jobs[u].dstBuff = dstBuffer; mtctx->jobs[u].cctx = cctx; - mtctx->jobs[u].frameID = u; + mtctx->jobs[u].frameID = (u>0) | ((u==nbFrames-1)<<1); mtctx->jobs[u].jobCompleted = 0; mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex; mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond; @@ -275,7 +294,7 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, for (frameID=0; frameIDjobCompleted_mutex); + PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex); while (mtctx->jobs[frameID].jobCompleted==0) { DEBUGLOG(4, "waiting for jobCompleted signal from frame %u", frameID); pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex); diff --git a/lib/zstd.h b/lib/zstd.h index 55cc466d..198f45ea 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -561,10 +561,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); In which case, it will "discard" the relevant memory section from its history. Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. - It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame, - Without last block mark, frames will be considered unfinished (broken) by decoders. + It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. + Without last block mark, frames will be considered unfinished (corrupted) by decoders. - You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame. + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame. */ /*===== Buffer-less streaming compression functions =====*/ diff --git a/programs/Makefile b/programs/Makefile index 77d5ab6e..ff95ddc6 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -129,7 +129,7 @@ gzstd: zstdmt: CPPFLAGS += -DZSTD_PTHREAD zstdmt: LDFLAGS += -lpthread -zstdmt: clean zstd +zstdmt: zstd generate_res: windres/generate_res.bat diff --git a/programs/bench.c b/programs/bench.c index a3c013a8..40e1d4ab 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -321,7 +321,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, memcpy(compressedBuffer, srcBuffer, loadedCompressedSize); } -#if 1 +#if 0 /* disable decompression test */ dCompleted=1; (void)totalDTime; (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ #else @@ -330,13 +330,14 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, UTIL_sleepMilli(1); /* give processor time to other processes */ UTIL_waitForNextTick(ticksPerSecond); - UTIL_getTime(&clockStart); if (!dCompleted) { U64 clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1; U32 nbLoops = 0; + clock_us_t clockStart; ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictBufferSize); if (!ddict) EXM_THROW(2, "ZSTD_createDDict() allocation failure"); + clockStart = BMK_clockMicroSec(); do { U32 blockNb; for (blockNb=0; blockNb= maxTime); } } From 107bcbbbc23c73e8d48f066adc8959af690c0b12 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 12 Jan 2017 01:25:46 +0100 Subject: [PATCH 28/73] zstdmt : changed internal naming from frame to chunk Since the result of mt compression is a single frame, changed naming, which implied the concatenation of multiple frames. minor : ensures that content size is written in header --- lib/compress/zstdmt_compress.c | 261 ++++++++++++++++++++++----------- lib/compress/zstdmt_compress.h | 16 +- 2 files changed, 191 insertions(+), 86 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index ae986468..6fe37a6f 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -1,7 +1,8 @@ #include /* malloc */ +#include /* memcpy */ #include /* threadpool */ #include "threading.h" /* mutex */ -#include "zstd_internal.h" /* MIN, ERROR */ +#include "zstd_internal.h" /* MIN, ERROR, ZSTD_* */ #include "zstdmt_compress.h" #if 0 @@ -43,7 +44,7 @@ if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \ #define ZSTDMT_NBTHREADS_MAX 128 -/* === Buffer Pool === */ +/* ===== Buffer Pool ===== */ typedef struct buffer_s { void* start; @@ -82,13 +83,12 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) size_t const availBufferSize = buf.size; if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) /* large enough, but not too much */ return buf; - free(buf.start); /* size conditions not respected : create a new buffer */ + free(buf.start); /* size conditions not respected : scratch this buffer and create a new one */ } /* create new buffer */ - { buffer_t buf; - buf.size = bSize; - buf.start = malloc(bSize); - return buf; + { void* const start = malloc(bSize); + if (start==NULL) bSize = 0; + return (buffer_t) { start, bSize }; /* note : start can be NULL if malloc fails ! */ } } @@ -104,52 +104,7 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf) } - -typedef struct { - ZSTD_CCtx* cctx; - const void* srcStart; - size_t srcSize; - buffer_t dstBuff; - int compressionLevel; - unsigned frameID; - unsigned long long fullFrameSize; - size_t cSize; - unsigned jobCompleted; - pthread_mutex_t* jobCompleted_mutex; - pthread_cond_t* jobCompleted_cond; -} ZSTDMT_jobDescription; - -/* ZSTDMT_compressFrame() : POOL_function type */ -void ZSTDMT_compressFrame(void* jobDescription) -{ - ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; - buffer_t dstBuff = job->dstBuff; - ZSTD_parameters const params = ZSTD_getParams(job->compressionLevel, job->fullFrameSize, 0); - size_t hSize = ZSTD_compressBegin_advanced(job->cctx, NULL, 0, params, job->fullFrameSize); - if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } - hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, 0); /* flush frame header */ - if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } - if ((job->frameID & 1) == 0) { /* preserve frame header when it is first beginning of frame */ - dstBuff.start = (char*)dstBuff.start + hSize; - dstBuff.size -= hSize; - } else - hSize = 0; - - job->cSize = (job->frameID>=2) ? /* last chunk signal */ - ZSTD_compressEnd(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, job->srcSize) : - ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, job->srcSize); - if (!ZSTD_isError(job->cSize)) job->cSize += hSize; - DEBUGLOG(5, "frame %u : compressed %u bytes into %u bytes ", (unsigned)job->frameID, (unsigned)job->srcSize, (unsigned)job->cSize); - -_endJob: - PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex); - job->jobCompleted = 1; - pthread_cond_signal(job->jobCompleted_cond); - pthread_mutex_unlock(job->jobCompleted_mutex); -} - - -/* === CCtx Pool === */ +/* ===== CCtx Pool ===== */ typedef struct { unsigned totalCCtx; @@ -191,11 +146,12 @@ static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool) return pool->cctx[pool->availCCtx]; } /* note : should not be possible, since totalCCtx==nbThreads */ - return ZSTD_createCCtx(); + return ZSTD_createCCtx(); /* note : can be NULL is creation fails ! */ } static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) { + if (cctx==NULL) return; /* release on NULL */ if (pool->availCCtx < pool->totalCCtx) pool->cctx[pool->availCCtx++] = cctx; else @@ -204,6 +160,55 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) } +/* ===== Thread worker ===== */ + +typedef struct { + ZSTD_CCtx* cctx; + const void* srcStart; + size_t srcSize; + buffer_t dstBuff; + size_t cSize; + size_t dstFlushed; + unsigned long long fullFrameSize; + unsigned firstChunk; + unsigned lastChunk; + unsigned jobCompleted; + pthread_mutex_t* jobCompleted_mutex; + pthread_cond_t* jobCompleted_cond; + ZSTD_parameters params; +} ZSTDMT_jobDescription; + +/* ZSTDMT_compressChunk() : POOL_function type */ +void ZSTDMT_compressChunk(void* jobDescription) +{ + ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; + buffer_t dstBuff = job->dstBuff; + size_t hSize = ZSTD_compressBegin_advanced(job->cctx, NULL, 0, job->params, job->fullFrameSize); + if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } + hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, 0); /* flush frame header */ + if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } + if (job->firstChunk) { /* preserve frame header when it is first chunk - otherwise, overwrite */ + dstBuff.start = (char*)dstBuff.start + hSize; + dstBuff.size -= hSize; + } else + hSize = 0; + + job->cSize = (job->lastChunk) ? /* last chunk signal */ + ZSTD_compressEnd(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, job->srcSize) : + ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, job->srcSize); + if (!ZSTD_isError(job->cSize)) job->cSize += hSize; + DEBUGLOG(5, "chunk %u : compressed %u bytes into %u bytes ", (unsigned)job->lastChunk, (unsigned)job->srcSize, (unsigned)job->cSize); + +_endJob: + PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex); + job->jobCompleted = 1; + pthread_cond_signal(job->jobCompleted_cond); + pthread_mutex_unlock(job->jobCompleted_mutex); +} + + +/* ===== Multi-threaded compression ===== */ + struct ZSTDMT_CCtx_s { POOL_ctx* factory; ZSTDMT_bufferPool* buffPool; @@ -250,64 +255,66 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, const void* src, size_t srcSize, int compressionLevel) { - ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize, 0); - size_t const frameSizeTarget = (size_t)1 << (params.cParams.windowLog + 2); - unsigned const nbFramesMax = (unsigned)(srcSize / frameSizeTarget) + (srcSize < frameSizeTarget) /* min 1 */; - unsigned const nbFrames = MIN(nbFramesMax, mtctx->nbThreads); - size_t const avgFrameSize = (srcSize + (nbFrames-1)) / nbFrames; + ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0); + size_t const chunkTargetSize = (size_t)1 << (params.cParams.windowLog + 2); + unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + (srcSize < chunkTargetSize) /* min 1 */; + unsigned const nbChunks = MIN(nbChunksMax, mtctx->nbThreads); + size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks; + size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0xFFFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */ size_t remainingSrcSize = srcSize; const char* const srcStart = (const char*)src; size_t frameStartPos = 0; - - DEBUGLOG(2, "windowLog : %u => frameSizeTarget : %u ", params.cParams.windowLog, (U32)frameSizeTarget); - DEBUGLOG(2, "nbFrames : %u (size : %u bytes) ", nbFrames, (U32)avgFrameSize); + DEBUGLOG(3, "windowLog : %2u => chunkTargetSize : %u bytes ", params.cParams.windowLog, (U32)chunkTargetSize); + DEBUGLOG(2, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize); + params.fParams.contentSizeFlag = 1; { unsigned u; - for (u=0; ubuffPool, dstBufferCapacity) : (buffer_t){ dst, dstCapacity }; - ZSTD_CCtx* cctx = ZSTDMT_getCCtx(mtctx->cctxPool); + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool); /* should check for NULL ! */ mtctx->jobs[u].srcStart = srcStart + frameStartPos; - mtctx->jobs[u].srcSize = frameSize; + mtctx->jobs[u].srcSize = chunkSize; mtctx->jobs[u].fullFrameSize = srcSize; - mtctx->jobs[u].compressionLevel = compressionLevel; + mtctx->jobs[u].params = params; mtctx->jobs[u].dstBuff = dstBuffer; mtctx->jobs[u].cctx = cctx; - mtctx->jobs[u].frameID = (u>0) | ((u==nbFrames-1)<<1); + mtctx->jobs[u].firstChunk = (u==0); + mtctx->jobs[u].lastChunk = (u==nbChunks-1); mtctx->jobs[u].jobCompleted = 0; mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex; mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond; - DEBUGLOG(3, "posting job %u (%u bytes)", u, (U32)frameSize); - POOL_add(mtctx->factory, ZSTDMT_compressFrame, &mtctx->jobs[u]); + DEBUGLOG(3, "posting job %u (%u bytes)", u, (U32)chunkSize); + POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]); - frameStartPos += frameSize; - remainingSrcSize -= frameSize; + frameStartPos += chunkSize; + remainingSrcSize -= chunkSize; } } - /* note : since nbFrames <= nbThreads, all jobs should be running immediately in parallel */ + /* note : since nbChunks <= nbThreads, all jobs should be running immediately in parallel */ - { unsigned frameID; + { unsigned chunkID; size_t dstPos = 0; - for (frameID=0; frameIDjobCompleted_mutex); - while (mtctx->jobs[frameID].jobCompleted==0) { - DEBUGLOG(4, "waiting for jobCompleted signal from frame %u", frameID); + while (mtctx->jobs[chunkID].jobCompleted==0) { + DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", chunkID); pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex); } pthread_mutex_unlock(&mtctx->jobCompleted_mutex); - ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[frameID].cctx); - { size_t const cSize = mtctx->jobs[frameID].cSize; + ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx); + { size_t const cSize = mtctx->jobs[chunkID].cSize; if (ZSTD_isError(cSize)) return cSize; if (dstPos + cSize > dstCapacity) return ERROR(dstSize_tooSmall); - if (frameID) { /* note : frame 0 is already written directly into dst */ - memcpy((char*)dst + dstPos, mtctx->jobs[frameID].dstBuff.start, cSize); - ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[frameID].dstBuff); + if (chunkID) { /* note : chunk 0 is already written directly into dst */ + memcpy((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); + ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff); } dstPos += cSize ; } @@ -317,3 +324,89 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, } } + + +/* ====================================== */ +/* ======= Streaming API ======= */ +/* ====================================== */ + +#if 0 + +size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { + zcs->params = ZSTD_getParams(compressionLevel, 0, 0); + zcs->targetSectionSize = 1 << (zcs->params.cParams.windowLog + 2); + zcs->inBuffSize = 5 * (1 << zcs->params.cParams.windowLog); + zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); /* check for NULL ! */ + zcs->inBuff.current = 0; + zcs->doneJobID = 0; + zcs->nextJobID = 0; + return 0; +} + +typedef struct { + buffer_t buffer; + unsigned current; +} inBuff_t; + + +size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + /* fill input buffer */ + { size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.current); + memcpy((char*)zcs->inBuff.buffer.start + zcs->inBuff.current, input->src, toLoad); + input->pos += toLoad; + } + + if (zcs->inBuff.current == zcs->inBuffSize) { /* filled enough : let's compress */ + size_t const dstBufferCapacity = ZSTD_compressBound(zcs->targetSectionSize); + buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->targetSectionSize); /* should check for NULL */ + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool); /* should check for NULL */ + unsigned const jobID = zcs->nextJobID & zcs->jobIDmask; + + zcs->jobs[jobID].srcStart = zcs->inBuff.start; + zcs->jobs[jobID].srcSize = zcs->targetSectionSize; + zcs->jobs[jobID].fullFrameSize = 0; + zcs->jobs[jobID].compressionLevel = zcs->compressionLevel; + zcs->jobs[jobID].dstBuff = dstBuffer; + zcs->jobs[jobID].cctx = cctx; + zcs->jobs[jobID].frameID = (jobID>0); + zcs->jobs[jobID].jobCompleted = 0; + zcs->jobs[jobID].dstFlushed = 0; + zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex; + zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond; + + /* get a new buffer for next input - save remaining into it */ + zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); /* check for NULL ! */ + zcs->inBuff.current = zcs->inBuffSize - zcs->targetSectionSize; + memcpy(zcs->inBuff.buffer.start, (char*)zcs->jobs[jobID].srcStart + zcs->targetSectionSize, zcs->inBuff.current); + + DEBUGLOG(3, "posting job %u (%u bytes)", jobID, (U32)zcs->jobs[jobID].srcSize); + POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); + zcs->nextJobID++; + } + + /* check if there is any data available to flush */ + { unsigned const jobID = zcs->doneJobID & zcs->jobIDmask; + ZSTDMT_jobDescription job = zcs->jobs[jobID]; + if (job.jobCompleted) { /* job completed : output can be flushed */ + size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); + ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); zcs->jobs[jobID].cctx = NULL; /* release cctx for future task */ + free(job.srcStart); zcs->jobs[jobID].srcStart = NULL; /* note : need a buff_t for release */ + memcpy((char*)output->dst + output->pos, job.dstBuff.start + job.dstFlushed, toWrite); + output->pos += toWrite; + job.dstFlushed += toWrite; + if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => next one */ + ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); + zcs->doneJobID++; + } else + zcs->jobs[jobID].dstFlushed = job.dstFlushed; + } } + + /* recommended next input size : fill current input buffer */ + return zcs->inBuffSize - zcs->inBuff.current; +} + +size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); +size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); + +#endif diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index 73ee379b..ca5d6b60 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -1,12 +1,24 @@ +/* === Dependencies === */ #include /* size_t */ +#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer */ + + +/* === Simple one-pass functions === */ typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; - -ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads); +ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads); size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* cctx); size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel); + + +/* === Streaming functions === */ + +size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel); +size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); +size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); From b05c4828eaf67fce7d2be9ef70a53591351b9ec8 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 12 Jan 2017 02:01:28 +0100 Subject: [PATCH 29/73] zstdmt : correctly check for cctx and buffer allocation Result from getBuffer and getCCtx could be NULL when allocation fails. Now correctly checks : job creation stop and last job reports an allocation error. releaseBuffer and releaseCCtx are now also compatible with NULL input. Identified a new potential issue : when early job fails, later jobs are not collected for resource retrieval. --- lib/compress/zstd_compress.c | 4 ++-- lib/compress/zstdmt_compress.c | 20 ++++++++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index c4dbb6ce..d4800dce 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2627,9 +2627,9 @@ size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t di } -size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel) +size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) { - return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel); + return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); } diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 6fe37a6f..7e8bb9f3 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -95,6 +95,7 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) /* store buffer for later re-use, up to pool capacity */ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf) { + if (buf.start == NULL) return; /* release on NULL */ if (pool->nbBuffers < pool->totalBuffers) { pool->bTable[pool->nbBuffers++] = buf; /* store for later re-use */ return; @@ -187,10 +188,10 @@ void ZSTDMT_compressChunk(void* jobDescription) if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, 0); /* flush frame header */ if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } - if (job->firstChunk) { /* preserve frame header when it is first chunk - otherwise, overwrite */ + if (job->firstChunk) { /* preserve frame header when it is first chunk */ dstBuff.start = (char*)dstBuff.start + hSize; dstBuff.size -= hSize; - } else + } else /* otherwise, overwrite */ hSize = 0; job->cSize = (job->lastChunk) ? /* last chunk signal */ @@ -258,7 +259,7 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0); size_t const chunkTargetSize = (size_t)1 << (params.cParams.windowLog + 2); unsigned const nbChunksMax = (unsigned)(srcSize / chunkTargetSize) + (srcSize < chunkTargetSize) /* min 1 */; - unsigned const nbChunks = MIN(nbChunksMax, mtctx->nbThreads); + unsigned nbChunks = MIN(nbChunksMax, mtctx->nbThreads); size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks; size_t const avgChunkSize = ((proposedChunkSize & 0x1FFFF) < 0xFFFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */ size_t remainingSrcSize = srcSize; @@ -274,7 +275,14 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize); size_t const dstBufferCapacity = u ? ZSTD_compressBound(chunkSize) : dstCapacity; buffer_t const dstBuffer = u ? ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity) : (buffer_t){ dst, dstCapacity }; - ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool); /* should check for NULL ! */ + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool); + + if ((cctx==NULL) || (dstBuffer.start==NULL)) { + mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */ + mtctx->jobs[u].jobCompleted = 1; + nbChunks = u+1; + break; /* let's wait for previous jobs to complete, but don't start new ones */ + } mtctx->jobs[u].srcStart = srcStart + frameStartPos; mtctx->jobs[u].srcSize = chunkSize; @@ -310,8 +318,8 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx); { size_t const cSize = mtctx->jobs[chunkID].cSize; - if (ZSTD_isError(cSize)) return cSize; - if (dstPos + cSize > dstCapacity) return ERROR(dstSize_tooSmall); + if (ZSTD_isError(cSize)) return cSize; /* leaving here : later ressources won't be released */ + if (dstPos + cSize > dstCapacity) return ERROR(dstSize_tooSmall); /* leaving here : later ressources won't be released */ if (chunkID) { /* note : chunk 0 is already written directly into dst */ memcpy((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff); From ad9f6bd1238a9eb9a49bceb2df1b31d44942fcda Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 12 Jan 2017 03:06:35 +0100 Subject: [PATCH 30/73] zstdmt : fix : resources properly collected even when early fail In previous version, main function would return early when detecting a job error. Late threads resources were therefore not collected back into pools. New version just register the error, but continue the collecting process. All buffers and context should be released back to pool before leaving main function. --- lib/compress/zstdmt_compress.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 7e8bb9f3..24f5e5b8 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -305,7 +305,7 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, /* note : since nbChunks <= nbThreads, all jobs should be running immediately in parallel */ { unsigned chunkID; - size_t dstPos = 0; + size_t error = 0, dstPos = 0; for (chunkID=0; chunkIDcctxPool, mtctx->jobs[chunkID].cctx); { size_t const cSize = mtctx->jobs[chunkID].cSize; - if (ZSTD_isError(cSize)) return cSize; /* leaving here : later ressources won't be released */ - if (dstPos + cSize > dstCapacity) return ERROR(dstSize_tooSmall); /* leaving here : later ressources won't be released */ + if (ZSTD_isError(cSize)) error = cSize; + if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall); if (chunkID) { /* note : chunk 0 is already written directly into dst */ - memcpy((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); + if (!error) memcpy((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff); } dstPos += cSize ; } } - DEBUGLOG(3, "compressed size : %u ", (U32)dstPos); - return dstPos; + if (!error) DEBUGLOG(3, "compressed size : %u ", (U32)dstPos); + return error ? error : dstPos; } } From 5b726dbe4dcbaafca2dcf5b6ed89023b287061fa Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 12 Jan 2017 17:46:46 +0100 Subject: [PATCH 31/73] fix gcc-arm warning "suggest braces around empty body" --- lib/compress/zstdmt_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 24f5e5b8..6f467f6a 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -36,7 +36,7 @@ if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \ #else -# define DEBUGLOG(l, ...) /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ # define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m) #endif From a73c4129329d6dc4c81987af987f3574569bbc0f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 17 Jan 2017 15:31:16 -0800 Subject: [PATCH 32/73] completed ZSTDMT streaming compression Provides the baseline compression API : size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel); size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); Not tested yet --- lib/compress/zstdmt_compress.c | 148 ++++++++++++++++++++++++++------- 1 file changed, 119 insertions(+), 29 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 6f467f6a..fb9183f9 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -2,10 +2,11 @@ #include /* memcpy */ #include /* threadpool */ #include "threading.h" /* mutex */ -#include "zstd_internal.h" /* MIN, ERROR, ZSTD_* */ +#include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */ #include "zstdmt_compress.h" #if 0 + # include # include # include @@ -163,8 +164,14 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) /* ===== Thread worker ===== */ +typedef struct { + buffer_t buffer; + size_t filled; +} inBuff_t; + typedef struct { ZSTD_CCtx* cctx; + buffer_t src; const void* srcStart; size_t srcSize; buffer_t dstBuff; @@ -208,25 +215,41 @@ _endJob: } +/* ------------------------------------------ */ /* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ struct ZSTDMT_CCtx_s { POOL_ctx* factory; ZSTDMT_bufferPool* buffPool; ZSTDMT_CCtxPool* cctxPool; - unsigned nbThreads; pthread_mutex_t jobCompleted_mutex; pthread_cond_t jobCompleted_cond; - ZSTDMT_jobDescription jobs[1]; /* variable size */ + size_t targetSectionSize; + size_t inBuffSize; + inBuff_t inBuff; + ZSTD_parameters params; + unsigned nbThreads; + unsigned jobIDMask; + unsigned doneJobID; + unsigned nextJobID; + unsigned frameEnded; + ZSTDMT_jobDescription jobs[1]; /* variable size (must lies at the end) */ }; ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) { ZSTDMT_CCtx* cctx; + U32 const minNbJobs = nbThreads + 1; + U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1; + U32 const nbJobs = 1 << nbJobsLog2; + DEBUGLOG(4, "nbThreads : %u ; minNbJobs : %u ; nbJobsLog2 : %u ; nbJobs : %u \n", + nbThreads, minNbJobs, nbJobsLog2, nbJobs); if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL; - cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx) + nbThreads*sizeof(ZSTDMT_jobDescription)); + cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx) + nbJobs*sizeof(ZSTDMT_jobDescription)); if (!cctx) return NULL; cctx->nbThreads = nbThreads; + cctx->jobIDMask = nbJobs - 1; cctx->factory = POOL_create(nbThreads, 1); cctx->buffPool = ZSTDMT_createBufferPool(nbThreads); cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads); @@ -338,46 +361,46 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, /* ======= Streaming API ======= */ /* ====================================== */ -#if 0 +#if 1 size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { zcs->params = ZSTD_getParams(compressionLevel, 0, 0); - zcs->targetSectionSize = 1 << (zcs->params.cParams.windowLog + 2); + zcs->targetSectionSize = (size_t)1 << (zcs->params.cParams.windowLog + 2); zcs->inBuffSize = 5 * (1 << zcs->params.cParams.windowLog); zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); /* check for NULL ! */ - zcs->inBuff.current = 0; + zcs->inBuff.filled = 0; zcs->doneJobID = 0; zcs->nextJobID = 0; + zcs->frameEnded = 0; return 0; } -typedef struct { - buffer_t buffer; - unsigned current; -} inBuff_t; - size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) { + if (zcs->frameEnded) return ERROR(stage_wrong); + /* fill input buffer */ - { size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.current); - memcpy((char*)zcs->inBuff.buffer.start + zcs->inBuff.current, input->src, toLoad); + { size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.filled); + memcpy((char*)zcs->inBuff.buffer.start + zcs->inBuff.filled, input->src, toLoad); input->pos += toLoad; } - if (zcs->inBuff.current == zcs->inBuffSize) { /* filled enough : let's compress */ + if (zcs->inBuff.filled == zcs->inBuffSize) { /* filled enough : let's compress */ size_t const dstBufferCapacity = ZSTD_compressBound(zcs->targetSectionSize); - buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->targetSectionSize); /* should check for NULL */ + buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity); /* should check for NULL */ ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool); /* should check for NULL */ - unsigned const jobID = zcs->nextJobID & zcs->jobIDmask; + unsigned const jobID = zcs->nextJobID & zcs->jobIDMask; - zcs->jobs[jobID].srcStart = zcs->inBuff.start; + zcs->jobs[jobID].src = zcs->inBuff.buffer; + zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; zcs->jobs[jobID].srcSize = zcs->targetSectionSize; zcs->jobs[jobID].fullFrameSize = 0; - zcs->jobs[jobID].compressionLevel = zcs->compressionLevel; + zcs->jobs[jobID].params = zcs->params; zcs->jobs[jobID].dstBuff = dstBuffer; zcs->jobs[jobID].cctx = cctx; - zcs->jobs[jobID].frameID = (jobID>0); + zcs->jobs[jobID].firstChunk = (jobID==0); + zcs->jobs[jobID].lastChunk = 0; zcs->jobs[jobID].jobCompleted = 0; zcs->jobs[jobID].dstFlushed = 0; zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex; @@ -385,22 +408,22 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu /* get a new buffer for next input - save remaining into it */ zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); /* check for NULL ! */ - zcs->inBuff.current = zcs->inBuffSize - zcs->targetSectionSize; - memcpy(zcs->inBuff.buffer.start, (char*)zcs->jobs[jobID].srcStart + zcs->targetSectionSize, zcs->inBuff.current); + zcs->inBuff.filled = (U32)(zcs->inBuffSize - zcs->targetSectionSize); + memcpy(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->targetSectionSize, zcs->inBuff.filled); - DEBUGLOG(3, "posting job %u (%u bytes)", jobID, (U32)zcs->jobs[jobID].srcSize); + DEBUGLOG(3, "posting job %u (%u bytes)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize); POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); zcs->nextJobID++; } /* check if there is any data available to flush */ - { unsigned const jobID = zcs->doneJobID & zcs->jobIDmask; + { unsigned const jobID = zcs->doneJobID & zcs->jobIDMask; ZSTDMT_jobDescription job = zcs->jobs[jobID]; if (job.jobCompleted) { /* job completed : output can be flushed */ size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); zcs->jobs[jobID].cctx = NULL; /* release cctx for future task */ - free(job.srcStart); zcs->jobs[jobID].srcStart = NULL; /* note : need a buff_t for release */ - memcpy((char*)output->dst + output->pos, job.dstBuff.start + job.dstFlushed, toWrite); + ZSTDMT_releaseBuffer(zcs->buffPool, job.src); zcs->jobs[jobID].srcStart = NULL; zcs->jobs[jobID].src = (buffer_t) { NULL, 0 }; + memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); output->pos += toWrite; job.dstFlushed += toWrite; if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => next one */ @@ -411,10 +434,77 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu } } /* recommended next input size : fill current input buffer */ - return zcs->inBuffSize - zcs->inBuff.current; + return zcs->inBuffSize - zcs->inBuff.filled; +} + +static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned endFrame) +{ + size_t const srcSize = zcs->inBuff.filled; + + if ((srcSize > 0) || (endFrame && !zcs->frameEnded)) { + size_t const dstBufferCapacity = ZSTD_compressBound(srcSize); + buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity); /* should check for NULL */ + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool); /* should check for NULL */ + unsigned const jobID = zcs->nextJobID & zcs->jobIDMask; + zcs->jobs[jobID].src = zcs->inBuff.buffer; + zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; + zcs->jobs[jobID].srcSize = srcSize; + zcs->jobs[jobID].fullFrameSize = 0; + zcs->jobs[jobID].params = zcs->params; + zcs->jobs[jobID].dstBuff = dstBuffer; + zcs->jobs[jobID].cctx = cctx; + zcs->jobs[jobID].firstChunk = (jobID==0); + zcs->jobs[jobID].lastChunk = endFrame; + zcs->jobs[jobID].jobCompleted = 0; + zcs->jobs[jobID].dstFlushed = 0; + zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex; + zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond; + + /* get a new buffer for next input */ + if (!endFrame) { + zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); /* check for NULL ! */ + zcs->inBuff.filled = 0; + } else { + zcs->frameEnded = 1; + } + + DEBUGLOG(3, "posting job %u (%u bytes)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize); + POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); + zcs->nextJobID++; + } + + /* check if there is any data available to flush */ + { unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask; + ZSTDMT_jobDescription job = zcs->jobs[wJobID]; + if (job.jobCompleted) { /* job completed : output can be flushed */ + size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); + ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); zcs->jobs[wJobID].cctx = NULL; /* release cctx for future task */ + ZSTDMT_releaseBuffer(zcs->buffPool, job.src); zcs->jobs[wJobID].srcStart = NULL; zcs->jobs[wJobID].src = (buffer_t) { NULL, 0 }; + memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); + output->pos += toWrite; + job.dstFlushed += toWrite; + if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => next one */ + ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); zcs->jobs[wJobID].dstBuff = (buffer_t) { NULL, 0 }; + zcs->doneJobID++; + } else { + zcs->jobs[wJobID].dstFlushed = job.dstFlushed; + } } + /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */ + if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed); + return (zcs->doneJobID < zcs->nextJobID); + } +} + + +size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output) +{ + return ZSTDMT_flushStream_internal(zcs, output, 0); +} + +size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output) +{ + return ZSTDMT_flushStream_internal(zcs, output, 1); } -size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); -size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); #endif From d0a1d45582181efd76350922a240f14d7893c985 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 17 Jan 2017 16:15:18 -0800 Subject: [PATCH 33/73] ZSTDMT_{flush,end}Stream() now block on next job completion when nothing to flush The main issue was to avoid a caller to continually loop on {flush,end}Stream() when there was nothing ready to be flushed but still some compression work ongoing in a worker thread. The continuous loop would have resulted in wasted energy. The new version makes call to {flush,end}Stream blocking when there is nothing ready to be flushed. Of course, if all worker threads have exhausted job, it will return zero (all flush completed). Note : There are still some remaining issues to report error codes and properly collect back resources into pools when an error is triggered. --- lib/compress/zstdmt_compress.c | 42 ++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index fb9183f9..57cc107f 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -388,10 +388,15 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu if (zcs->inBuff.filled == zcs->inBuffSize) { /* filled enough : let's compress */ size_t const dstBufferCapacity = ZSTD_compressBound(zcs->targetSectionSize); - buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity); /* should check for NULL */ - ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool); /* should check for NULL */ + buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity); + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool); unsigned const jobID = zcs->nextJobID & zcs->jobIDMask; + if ((cctx==NULL) || (dstBuffer.start==NULL)) { + zcs->jobs[jobID].cSize = ERROR(memory_allocation); /* job result : how to collect that error ? */ + zcs->jobs[jobID].jobCompleted = 1; + } + zcs->jobs[jobID].src = zcs->inBuff.buffer; zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; zcs->jobs[jobID].srcSize = zcs->targetSectionSize; @@ -426,17 +431,18 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); output->pos += toWrite; job.dstFlushed += toWrite; - if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => next one */ - ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); + if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => go to next one */ + ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); zcs->jobs[jobID].dstBuff = (buffer_t) { NULL, 0 }; zcs->doneJobID++; - } else - zcs->jobs[jobID].dstFlushed = job.dstFlushed; - } } + } else { + zcs->jobs[jobID].dstFlushed = job.dstFlushed; /* save flush level into zcs for later retrieval */ + } } } /* recommended next input size : fill current input buffer */ return zcs->inBuffSize - zcs->inBuff.filled; } + static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned endFrame) { size_t const srcSize = zcs->inBuff.filled; @@ -469,14 +475,20 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp } DEBUGLOG(3, "posting job %u (%u bytes)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize); - POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); + POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */ zcs->nextJobID++; } /* check if there is any data available to flush */ { unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask; - ZSTDMT_jobDescription job = zcs->jobs[wJobID]; - if (job.jobCompleted) { /* job completed : output can be flushed */ + PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex); + while (zcs->jobs[wJobID].jobCompleted==0) { + DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */ + pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); + } + pthread_mutex_unlock(&zcs->jobCompleted_mutex); + { /* job completed : output can be flushed */ + ZSTDMT_jobDescription job = zcs->jobs[wJobID]; size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); zcs->jobs[wJobID].cctx = NULL; /* release cctx for future task */ ZSTDMT_releaseBuffer(zcs->buffPool, job.src); zcs->jobs[wJobID].srcStart = NULL; zcs->jobs[wJobID].src = (buffer_t) { NULL, 0 }; @@ -488,11 +500,11 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp zcs->doneJobID++; } else { zcs->jobs[wJobID].dstFlushed = job.dstFlushed; - } } - /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */ - if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed); - return (zcs->doneJobID < zcs->nextJobID); - } + } + /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */ + if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed); + return (zcs->doneJobID < zcs->nextJobID); + } } } From 0d6b8f65a9f5864bc75e5bc5a9a7c7abe0c5d197 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 17 Jan 2017 17:46:33 -0800 Subject: [PATCH 34/73] ZSTDMT_free() scrubs potentially unfinished jobs to release their resources In some complex scenarios (free() without finishing compression), it is possible that some resources are still into jobs and not collected back into pools. In which case, previous version of free() would miss them. This would be equivalent to a leak. New version ensures that it even foes after such resource. It requires job consumers to properly mark resources as released, by replacing entries by NULL after releasing back to the pool. Obviously, it's not recommended to free() zstdmt context mid-term, still that's now a supported scenario. The same methodology is also used to ensure proper resource collection after an error is detected. Still to do : - detect compression errors (not just allocation ones) - properly manage resource when init() is called without finishing previous compression. --- lib/compress/zstdmt_compress.c | 90 +++++++++++++++++++++++++++++----- 1 file changed, 79 insertions(+), 11 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 57cc107f..c864ef21 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -52,6 +52,8 @@ typedef struct buffer_s { size_t size; } buffer_t; +static const buffer_t g_nullBuffer = (buffer_t) { NULL, 0 }; + typedef struct ZSTDMT_bufferPool_s { unsigned totalBuffers;; unsigned nbBuffers; @@ -262,10 +264,27 @@ ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) return cctx; } +/* ZSTDMT_releaseAllJobResources() : + * Ensure all workers are killed first. */ +static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) +{ + unsigned jobID; + for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) { + ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff); + mtctx->jobs[jobID].dstBuff = g_nullBuffer; + ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].src); + mtctx->jobs[jobID].src = g_nullBuffer; + ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx); + mtctx->jobs[jobID].cctx = NULL; + } +} + size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) { + if (mtctx==NULL) return 0; /* compatible with free on NULL */ POOL_free(mtctx->factory); - ZSTDMT_freeBufferPool(mtctx->buffPool); + ZSTDMT_releaseAllJobResources(mtctx); /* kill workers first */ + ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources first */ ZSTDMT_freeCCtxPool(mtctx->cctxPool); pthread_mutex_destroy(&mtctx->jobCompleted_mutex); pthread_cond_destroy(&mtctx->jobCompleted_cond); @@ -340,12 +359,15 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, pthread_mutex_unlock(&mtctx->jobCompleted_mutex); ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx); + mtctx->jobs[chunkID].cctx = NULL; + mtctx->jobs[chunkID].srcStart = NULL; { size_t const cSize = mtctx->jobs[chunkID].cSize; if (ZSTD_isError(cSize)) error = cSize; if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall); if (chunkID) { /* note : chunk 0 is already written directly into dst */ if (!error) memcpy((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff); + mtctx->jobs[chunkID].dstBuff = g_nullBuffer; } dstPos += cSize ; } @@ -363,6 +385,19 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, #if 1 +static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) { + while (zcs->doneJobID < zcs->nextJobID) { + unsigned const jobID = zcs->doneJobID & zcs->jobIDMask; + PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex); + while (zcs->jobs[jobID].jobCompleted==0) { + DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */ + pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); + } + pthread_mutex_unlock(&zcs->jobCompleted_mutex); + zcs->doneJobID++; + } +} + size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { zcs->params = ZSTD_getParams(compressionLevel, 0, 0); zcs->targetSectionSize = (size_t)1 << (zcs->params.cParams.windowLog + 2); @@ -393,8 +428,12 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu unsigned const jobID = zcs->nextJobID & zcs->jobIDMask; if ((cctx==NULL) || (dstBuffer.start==NULL)) { - zcs->jobs[jobID].cSize = ERROR(memory_allocation); /* job result : how to collect that error ? */ + zcs->jobs[jobID].cSize = ERROR(memory_allocation); zcs->jobs[jobID].jobCompleted = 1; + zcs->nextJobID++; + ZSTDMT_waitForAllJobsCompleted(zcs); + ZSTDMT_releaseAllJobResources(zcs); + return ERROR(memory_allocation); } zcs->jobs[jobID].src = zcs->inBuff.buffer; @@ -412,7 +451,15 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond; /* get a new buffer for next input - save remaining into it */ - zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); /* check for NULL ! */ + zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); + if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */ + zcs->jobs[jobID].cSize = ERROR(memory_allocation); + zcs->jobs[jobID].jobCompleted = 1; + zcs->nextJobID++; + ZSTDMT_waitForAllJobsCompleted(zcs); + ZSTDMT_releaseAllJobResources(zcs); + return ERROR(memory_allocation); + } zcs->inBuff.filled = (U32)(zcs->inBuffSize - zcs->targetSectionSize); memcpy(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->targetSectionSize, zcs->inBuff.filled); @@ -426,13 +473,16 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu ZSTDMT_jobDescription job = zcs->jobs[jobID]; if (job.jobCompleted) { /* job completed : output can be flushed */ size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); - ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); zcs->jobs[jobID].cctx = NULL; /* release cctx for future task */ - ZSTDMT_releaseBuffer(zcs->buffPool, job.src); zcs->jobs[jobID].srcStart = NULL; zcs->jobs[jobID].src = (buffer_t) { NULL, 0 }; + ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); + zcs->jobs[jobID].cctx = NULL; + ZSTDMT_releaseBuffer(zcs->buffPool, job.src); + zcs->jobs[jobID].srcStart = NULL; zcs->jobs[jobID].src = g_nullBuffer; memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); output->pos += toWrite; job.dstFlushed += toWrite; if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => go to next one */ - ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); zcs->jobs[jobID].dstBuff = (buffer_t) { NULL, 0 }; + ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); + zcs->jobs[jobID].dstBuff = g_nullBuffer; zcs->doneJobID++; } else { zcs->jobs[jobID].dstFlushed = job.dstFlushed; /* save flush level into zcs for later retrieval */ @@ -449,9 +499,19 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp if ((srcSize > 0) || (endFrame && !zcs->frameEnded)) { size_t const dstBufferCapacity = ZSTD_compressBound(srcSize); - buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity); /* should check for NULL */ - ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool); /* should check for NULL */ + buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity); + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool); unsigned const jobID = zcs->nextJobID & zcs->jobIDMask; + + if ((cctx==NULL) || (dstBuffer.start==NULL)) { + zcs->jobs[jobID].cSize = ERROR(memory_allocation); + zcs->jobs[jobID].jobCompleted = 1; + zcs->nextJobID++; + ZSTDMT_waitForAllJobsCompleted(zcs); + ZSTDMT_releaseAllJobResources(zcs); + return ERROR(memory_allocation); + } + zcs->jobs[jobID].src = zcs->inBuff.buffer; zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; zcs->jobs[jobID].srcSize = srcSize; @@ -468,8 +528,16 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp /* get a new buffer for next input */ if (!endFrame) { - zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); /* check for NULL ! */ + zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); zcs->inBuff.filled = 0; + if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */ + zcs->jobs[jobID].cSize = ERROR(memory_allocation); + zcs->jobs[jobID].jobCompleted = 1; + zcs->nextJobID++; + ZSTDMT_waitForAllJobsCompleted(zcs); + ZSTDMT_releaseAllJobResources(zcs); + return ERROR(memory_allocation); + } } else { zcs->frameEnded = 1; } @@ -491,12 +559,12 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp ZSTDMT_jobDescription job = zcs->jobs[wJobID]; size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); zcs->jobs[wJobID].cctx = NULL; /* release cctx for future task */ - ZSTDMT_releaseBuffer(zcs->buffPool, job.src); zcs->jobs[wJobID].srcStart = NULL; zcs->jobs[wJobID].src = (buffer_t) { NULL, 0 }; + ZSTDMT_releaseBuffer(zcs->buffPool, job.src); zcs->jobs[wJobID].srcStart = NULL; zcs->jobs[wJobID].src = g_nullBuffer; memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); output->pos += toWrite; job.dstFlushed += toWrite; if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => next one */ - ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); zcs->jobs[wJobID].dstBuff = (buffer_t) { NULL, 0 }; + ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); zcs->jobs[wJobID].dstBuff = g_nullBuffer; zcs->doneJobID++; } else { zcs->jobs[wJobID].dstFlushed = job.dstFlushed; From a6db7a7b9b3c60d24f4645cfb71a2aaa0d77a072 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 18 Jan 2017 11:57:34 -0800 Subject: [PATCH 35/73] fixed cmaketest (buffer_t){NULL,0} is not considered a constant. {NULL,0} is. --- lib/compress/zstdmt_compress.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index c864ef21..329dc78f 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -52,7 +52,7 @@ typedef struct buffer_s { size_t size; } buffer_t; -static const buffer_t g_nullBuffer = (buffer_t) { NULL, 0 }; +static const buffer_t g_nullBuffer = { NULL, 0 }; typedef struct ZSTDMT_bufferPool_s { unsigned totalBuffers;; @@ -277,6 +277,8 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx); mtctx->jobs[jobID].cctx = NULL; } + ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer); + mtctx->inBuff.buffer = g_nullBuffer; } size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) @@ -402,7 +404,8 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { zcs->params = ZSTD_getParams(compressionLevel, 0, 0); zcs->targetSectionSize = (size_t)1 << (zcs->params.cParams.windowLog + 2); zcs->inBuffSize = 5 * (1 << zcs->params.cParams.windowLog); - zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); /* check for NULL ! */ + zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); + if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation); zcs->inBuff.filled = 0; zcs->doneJobID = 0; zcs->nextJobID = 0; @@ -413,7 +416,7 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) { - if (zcs->frameEnded) return ERROR(stage_wrong); + if (zcs->frameEnded) return ERROR(stage_wrong); /* current frame being ended. Finish it and restart a new one */ /* fill input buffer */ { size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.filled); From 563ef8acf440033fbc60df75df06cef31982ae73 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 18 Jan 2017 12:12:10 -0800 Subject: [PATCH 36/73] CCtxPool starts empty, as suggested by @terrelln Also : make zstdmt now a target from root --- .gitignore | 1 + Makefile | 5 +++++ lib/compress/zstdmt_compress.c | 22 +++++++--------------- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 796a696d..dd7a7451 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ # Executables zstd +zstdmt *.exe *.out *.app diff --git a/Makefile b/Makefile index 0a3634c3..8ffc9ae9 100644 --- a/Makefile +++ b/Makefile @@ -50,6 +50,11 @@ zstd: @$(MAKE) -C $(PRGDIR) $@ cp $(PRGDIR)/zstd$(EXT) . +.PHONY: zstdmt +zstdmt: + @$(MAKE) -C $(PRGDIR) $@ + cp $(PRGDIR)/zstd$(EXT) ./zstdmt$(EXT) + .PHONY: zlibwrapper zlibwrapper: $(MAKE) -C $(ZWRAPDIR) test diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 329dc78f..f880e852 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -122,8 +122,8 @@ typedef struct { static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) { unsigned u; - for (u=0; uavailCCtx; u++) /* note : availCCtx is supposed == totalCCtx; otherwise, some CCtx are still in use */ - ZSTD_freeCCtx(pool->cctx[u]); + for (u=0; utotalCCtx; u++) + ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */ free(pool); } @@ -131,15 +131,8 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads) { ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + nbThreads*sizeof(ZSTD_CCtx*)); if (!cctxPool) return NULL; - { unsigned threadNb; - for (threadNb=0; threadNbcctx[threadNb] = ZSTD_createCCtx(); - if (cctxPool->cctx[threadNb]==NULL) { /* failed cctx allocation : abort cctxPool creation */ - cctxPool->totalCCtx = cctxPool->availCCtx = threadNb; - ZSTDMT_freeCCtxPool(cctxPool); - return NULL; - } } } - cctxPool->totalCCtx = cctxPool->availCCtx = nbThreads; + cctxPool->totalCCtx = nbThreads; + cctxPool->availCCtx = 0; return cctxPool; } @@ -149,17 +142,16 @@ static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool) pool->availCCtx--; return pool->cctx[pool->availCCtx]; } - /* note : should not be possible, since totalCCtx==nbThreads */ - return ZSTD_createCCtx(); /* note : can be NULL is creation fails ! */ + return ZSTD_createCCtx(); /* note : can be NULL, when creation fails ! */ } static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) { - if (cctx==NULL) return; /* release on NULL */ + if (cctx==NULL) return; /* compatibility with release on NULL */ if (pool->availCCtx < pool->totalCCtx) pool->cctx[pool->availCCtx++] = cctx; else - /* note : should not be possible, since totalCCtx==nbThreads */ + /* pool overflow : should not happen, since totalCCtx==nbThreads */ ZSTD_freeCCtx(cctx); } From 4885f591b30348cfef45fffff24a9a7dbb19ea40 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 18 Jan 2017 14:11:37 -0800 Subject: [PATCH 37/73] trap compression errors, collect back resources from workers --- lib/compress/zstdmt_compress.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index f880e852..3762f5a2 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -423,7 +423,6 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu unsigned const jobID = zcs->nextJobID & zcs->jobIDMask; if ((cctx==NULL) || (dstBuffer.start==NULL)) { - zcs->jobs[jobID].cSize = ERROR(memory_allocation); zcs->jobs[jobID].jobCompleted = 1; zcs->nextJobID++; ZSTDMT_waitForAllJobsCompleted(zcs); @@ -438,7 +437,7 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu zcs->jobs[jobID].params = zcs->params; zcs->jobs[jobID].dstBuff = dstBuffer; zcs->jobs[jobID].cctx = cctx; - zcs->jobs[jobID].firstChunk = (jobID==0); + zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0); zcs->jobs[jobID].lastChunk = 0; zcs->jobs[jobID].jobCompleted = 0; zcs->jobs[jobID].dstFlushed = 0; @@ -448,7 +447,6 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu /* get a new buffer for next input - save remaining into it */ zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */ - zcs->jobs[jobID].cSize = ERROR(memory_allocation); zcs->jobs[jobID].jobCompleted = 1; zcs->nextJobID++; ZSTDMT_waitForAllJobsCompleted(zcs); @@ -472,6 +470,11 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu zcs->jobs[jobID].cctx = NULL; ZSTDMT_releaseBuffer(zcs->buffPool, job.src); zcs->jobs[jobID].srcStart = NULL; zcs->jobs[jobID].src = g_nullBuffer; + if (ZSTD_isError(job.cSize)) { + ZSTDMT_waitForAllJobsCompleted(zcs); + ZSTDMT_releaseAllJobResources(zcs); + return job.cSize; + } memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); output->pos += toWrite; job.dstFlushed += toWrite; @@ -499,7 +502,6 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp unsigned const jobID = zcs->nextJobID & zcs->jobIDMask; if ((cctx==NULL) || (dstBuffer.start==NULL)) { - zcs->jobs[jobID].cSize = ERROR(memory_allocation); zcs->jobs[jobID].jobCompleted = 1; zcs->nextJobID++; ZSTDMT_waitForAllJobsCompleted(zcs); @@ -514,7 +516,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp zcs->jobs[jobID].params = zcs->params; zcs->jobs[jobID].dstBuff = dstBuffer; zcs->jobs[jobID].cctx = cctx; - zcs->jobs[jobID].firstChunk = (jobID==0); + zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0); zcs->jobs[jobID].lastChunk = endFrame; zcs->jobs[jobID].jobCompleted = 0; zcs->jobs[jobID].dstFlushed = 0; @@ -526,7 +528,6 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); zcs->inBuff.filled = 0; if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */ - zcs->jobs[jobID].cSize = ERROR(memory_allocation); zcs->jobs[jobID].jobCompleted = 1; zcs->nextJobID++; ZSTDMT_waitForAllJobsCompleted(zcs); @@ -543,6 +544,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp } /* check if there is any data available to flush */ + if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */ { unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask; PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex); while (zcs->jobs[wJobID].jobCompleted==0) { @@ -555,6 +557,11 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); zcs->jobs[wJobID].cctx = NULL; /* release cctx for future task */ ZSTDMT_releaseBuffer(zcs->buffPool, job.src); zcs->jobs[wJobID].srcStart = NULL; zcs->jobs[wJobID].src = g_nullBuffer; + if (ZSTD_isError(job.cSize)) { + ZSTDMT_waitForAllJobsCompleted(zcs); + ZSTDMT_releaseAllJobResources(zcs); + return job.cSize; + } memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); output->pos += toWrite; job.dstFlushed += toWrite; From 3a01c46b266996e7256faf731dd6813b9f47f3a7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 18 Jan 2017 15:18:17 -0800 Subject: [PATCH 38/73] ZSTDMT_initCStream() supports restart from invalid state ZSTDMT_initCStream() will correcly scrub for resources when it detects that previous compression was not properly finished. --- lib/compress/zstdmt_compress.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 3762f5a2..c417e8aa 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -228,6 +228,7 @@ struct ZSTDMT_CCtx_s { unsigned doneJobID; unsigned nextJobID; unsigned frameEnded; + unsigned allJobsCompleted; ZSTDMT_jobDescription jobs[1]; /* variable size (must lies at the end) */ }; @@ -244,6 +245,7 @@ ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) if (!cctx) return NULL; cctx->nbThreads = nbThreads; cctx->jobIDMask = nbJobs - 1; + cctx->allJobsCompleted = 1; cctx->factory = POOL_create(nbThreads, 1); cctx->buffPool = ZSTDMT_createBufferPool(nbThreads); cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads); @@ -277,8 +279,8 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) { if (mtctx==NULL) return 0; /* compatible with free on NULL */ POOL_free(mtctx->factory); - ZSTDMT_releaseAllJobResources(mtctx); /* kill workers first */ - ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources first */ + if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */ + ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */ ZSTDMT_freeCCtxPool(mtctx->cctxPool); pthread_mutex_destroy(&mtctx->jobCompleted_mutex); pthread_cond_destroy(&mtctx->jobCompleted_cond); @@ -393,6 +395,11 @@ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) { } size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { + if (zcs->allJobsCompleted == 0) { /* previous job not correctly finished */ + ZSTDMT_waitForAllJobsCompleted(zcs); + ZSTDMT_releaseAllJobResources(zcs); + zcs->allJobsCompleted = 1; + } zcs->params = ZSTD_getParams(compressionLevel, 0, 0); zcs->targetSectionSize = (size_t)1 << (zcs->params.cParams.windowLog + 2); zcs->inBuffSize = 5 * (1 << zcs->params.cParams.windowLog); @@ -402,13 +409,14 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { zcs->doneJobID = 0; zcs->nextJobID = 0; zcs->frameEnded = 0; + zcs->allJobsCompleted = 0; return 0; } size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) { - if (zcs->frameEnded) return ERROR(stage_wrong); /* current frame being ended. Finish it and restart a new one */ + if (zcs->frameEnded) return ERROR(stage_wrong); /* current frame being ended. Only flush is allowed. Restart with init */ /* fill input buffer */ { size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.filled); @@ -573,7 +581,9 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp } /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */ if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed); - return (zcs->doneJobID < zcs->nextJobID); + if (zcs->doneJobID < zcs->nextJobID) return 1; /* still some buffer to flush */ + zcs->allJobsCompleted = zcs->frameEnded; + return 0; } } } From 6073b3e6b86488d1d6963b7dbf3fedf34ac7158e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 18 Jan 2017 15:32:38 -0800 Subject: [PATCH 39/73] ZSTDMT_endStream : nullify input buffer after flush There will be no more input after ZSTDMT_endStream invocation : only flush/end is allowed (to fully collect compressed result). --- lib/compress/zstdmt_compress.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index c417e8aa..d552acee 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -543,6 +543,8 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp return ERROR(memory_allocation); } } else { + zcs->inBuff.buffer = g_nullBuffer; + zcs->inBuff.filled = 0; zcs->frameEnded = 1; } From 37226c1e9f968dd1a0f4bdbcfaf54716aa88697d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Jan 2017 10:18:17 -0800 Subject: [PATCH 40/73] Simplified compressChunk job minor refactoring : compression done in a single call on first chunk Avoid a mutable hSize variable and eventual recombination to cSize at the end --- lib/compress/zstdmt_compress.c | 52 +++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index d552acee..93220f5c 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -10,8 +10,16 @@ # include # include # include - static unsigned g_debugLevel = 2; -# define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); } + static unsigned g_debugLevel = 3; +# define DEBUGLOGRAW(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __VA_ARGS__); } +# define DEBUGLOG(l, ...) if (l<=g_debugLevel) { fprintf(stderr, __FILE__ ": "); fprintf(stderr, __VA_ARGS__); fprintf(stderr, " \n"); } + +# define DEBUG_PRINTHEX(l,p,n) { \ + unsigned debug_u; \ + for (debug_u=0; debug_u<(n); debug_u++) \ + DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \ + DEBUGLOGRAW(l, " \n"); \ +} static unsigned long long GetCurrentClockTimeMicroseconds() { @@ -39,6 +47,7 @@ if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \ # define DEBUGLOG(l, ...) {} /* disabled */ # define PTHREAD_MUTEX_LOCK(m) pthread_mutex_lock(m) +# define DEBUG_PRINTHEX(l,p,n) {} #endif @@ -184,22 +193,20 @@ typedef struct { void ZSTDMT_compressChunk(void* jobDescription) { ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; - buffer_t dstBuff = job->dstBuff; - size_t hSize = ZSTD_compressBegin_advanced(job->cctx, NULL, 0, job->params, job->fullFrameSize); - if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } - hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, 0); /* flush frame header */ - if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } - if (job->firstChunk) { /* preserve frame header when it is first chunk */ - dstBuff.start = (char*)dstBuff.start + hSize; - dstBuff.size -= hSize; - } else /* otherwise, overwrite */ - hSize = 0; + buffer_t const dstBuff = job->dstBuff; + size_t const initError = ZSTD_compressBegin_advanced(job->cctx, NULL, 0, job->params, job->fullFrameSize); + if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } + if (!job->firstChunk) { + size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, 0); /* flush frame header */ + if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } + } + DEBUGLOG(3, "Compressing : "); + DEBUG_PRINTHEX(3, job->srcStart, 12); job->cSize = (job->lastChunk) ? /* last chunk signal */ ZSTD_compressEnd(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, job->srcSize) : ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, job->srcSize); - if (!ZSTD_isError(job->cSize)) job->cSize += hSize; - DEBUGLOG(5, "chunk %u : compressed %u bytes into %u bytes ", (unsigned)job->lastChunk, (unsigned)job->srcSize, (unsigned)job->cSize); + DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk); _endJob: PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex); @@ -271,8 +278,10 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx); mtctx->jobs[jobID].cctx = NULL; } + memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription)); ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer); mtctx->inBuff.buffer = g_nullBuffer; + mtctx->allJobsCompleted = 1; } size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) @@ -335,6 +344,7 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond; DEBUGLOG(3, "posting job %u (%u bytes)", u, (U32)chunkSize); + DEBUG_PRINTHEX(3, mtctx->jobs[u].srcStart, 12); POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]); frameStartPos += chunkSize; @@ -345,14 +355,14 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, { unsigned chunkID; size_t error = 0, dstPos = 0; for (chunkID=0; chunkIDjobCompleted_mutex); while (mtctx->jobs[chunkID].jobCompleted==0) { DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", chunkID); pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex); } pthread_mutex_unlock(&mtctx->jobCompleted_mutex); + DEBUGLOG(3, "ready to write chunk %u ", chunkID); ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx); mtctx->jobs[chunkID].cctx = NULL; @@ -422,6 +432,7 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu { size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.filled); memcpy((char*)zcs->inBuff.buffer.start + zcs->inBuff.filled, input->src, toLoad); input->pos += toLoad; + zcs->inBuff.filled += toLoad; } if (zcs->inBuff.filled == zcs->inBuffSize) { /* filled enough : let's compress */ @@ -438,6 +449,7 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu return ERROR(memory_allocation); } + DEBUGLOG(1, "preparing job %u to compress %u bytes \n", (U32)zcs->nextJobID, (U32)zcs->targetSectionSize); zcs->jobs[jobID].src = zcs->inBuff.buffer; zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; zcs->jobs[jobID].srcSize = zcs->targetSectionSize; @@ -474,6 +486,7 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu ZSTDMT_jobDescription job = zcs->jobs[jobID]; if (job.jobCompleted) { /* job completed : output can be flushed */ size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); + DEBUGLOG(1, "trying to flush compressed data from job %u \n", (U32)zcs->doneJobID); ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); zcs->jobs[jobID].cctx = NULL; ZSTDMT_releaseBuffer(zcs->buffPool, job.src); @@ -489,6 +502,7 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => go to next one */ ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); zcs->jobs[jobID].dstBuff = g_nullBuffer; + zcs->jobs[jobID].jobCompleted = 0; zcs->doneJobID++; } else { zcs->jobs[jobID].dstFlushed = job.dstFlushed; /* save flush level into zcs for later retrieval */ @@ -503,6 +517,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp { size_t const srcSize = zcs->inBuff.filled; + DEBUGLOG(1, "flushing : %u bytes to compress", (U32)srcSize); if ((srcSize > 0) || (endFrame && !zcs->frameEnded)) { size_t const dstBufferCapacity = ZSTD_compressBound(srcSize); buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity); @@ -548,12 +563,13 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp zcs->frameEnded = 1; } - DEBUGLOG(3, "posting job %u (%u bytes)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize); + DEBUGLOG(1, "posting job %u : %u bytes (end:%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk); POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */ zcs->nextJobID++; } /* check if there is any data available to flush */ + DEBUGLOG(1, "zcs->doneJobID : %u ; zcs->nextJobID : %u ", zcs->doneJobID, zcs->nextJobID); if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */ { unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask; PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex); @@ -565,6 +581,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp { /* job completed : output can be flushed */ ZSTDMT_jobDescription job = zcs->jobs[wJobID]; size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); + DEBUGLOG(1, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID); ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); zcs->jobs[wJobID].cctx = NULL; /* release cctx for future task */ ZSTDMT_releaseBuffer(zcs->buffPool, job.src); zcs->jobs[wJobID].srcStart = NULL; zcs->jobs[wJobID].src = g_nullBuffer; if (ZSTD_isError(job.cSize)) { @@ -577,6 +594,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp job.dstFlushed += toWrite; if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => next one */ ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); zcs->jobs[wJobID].dstBuff = g_nullBuffer; + zcs->jobs[wJobID].jobCompleted = 0; zcs->doneJobID++; } else { zcs->jobs[wJobID].dstFlushed = job.dstFlushed; From 32dfae6f9841871d88eadcd27a970efab71feb28 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Jan 2017 10:32:55 -0800 Subject: [PATCH 41/73] fixed Multi-threaded compression MT compression generates a single frame. Multi-threading operates by breaking the frames into independent sections. But from a decoder perspective, there is no difference : it's just a suite of blocks. Problem is, decoder preserves repCodes from previous block to start decoding next block. This is also valid between sections, since they are no different than changing block. Previous version would incorrectly initialize repcodes to their default value at the beginning of each section. When using them, there was a mismatch between encoder (default values) and decoder (values from previous block). This change ensures that repcodes won't be used at the beginning of a new section. It works by setting them to 0. This only works with regular (single segment) variants : extDict variants will fail ! Fortunately, sections beyond the 1st one belong to this category. To be checked : btopt strategy. This change was only validated from fast to btlazy2 strategies. --- lib/common/zstd_internal.h | 9 +++++++++ lib/compress/zstd_compress.c | 8 ++++++++ lib/compress/zstdmt_compress.c | 11 ++++++----- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 96e05775..4b56ce1a 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -267,4 +267,13 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) } +/* hidden functions */ + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); + + #endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index d4800dce..84a4a021 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -317,6 +317,14 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, } } +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { + int i; + for (i=0; irep[i] = 0; +} /*! ZSTD_copyCCtx() : * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 93220f5c..b060b73f 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -176,10 +176,10 @@ typedef struct { ZSTD_CCtx* cctx; buffer_t src; const void* srcStart; - size_t srcSize; + size_t srcSize; buffer_t dstBuff; - size_t cSize; - size_t dstFlushed; + size_t cSize; + size_t dstFlushed; unsigned long long fullFrameSize; unsigned firstChunk; unsigned lastChunk; @@ -196,9 +196,10 @@ void ZSTDMT_compressChunk(void* jobDescription) buffer_t const dstBuff = job->dstBuff; size_t const initError = ZSTD_compressBegin_advanced(job->cctx, NULL, 0, job->params, job->fullFrameSize); if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } - if (!job->firstChunk) { - size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, 0); /* flush frame header */ + if (!job->firstChunk) { /* flush frame header */ + size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, 0); if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } + ZSTD_invalidateRepCodes(job->cctx); } DEBUGLOG(3, "Compressing : "); From 736788f8e82c85197797879142837c130044eb72 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Jan 2017 12:12:50 -0800 Subject: [PATCH 42/73] added streaming fuzzer tests for MT API Also : fixed corner case, where nb of jobs completed becomes > jobQueueSize which is possible when many flushes are issued while there is not enough dst buffer to flush completed ones. --- lib/compress/zstdmt_compress.c | 21 ++- programs/bench.c | 11 ++ tests/Makefile | 6 +- tests/zstreamtest.c | 330 ++++++++++++++++++++++++++++----- 4 files changed, 310 insertions(+), 58 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index b060b73f..775c52aa 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -243,7 +243,7 @@ struct ZSTDMT_CCtx_s { ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) { ZSTDMT_CCtx* cctx; - U32 const minNbJobs = nbThreads + 1; + U32 const minNbJobs = nbThreads + 2; U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1; U32 const nbJobs = 1 << nbJobsLog2; DEBUGLOG(4, "nbThreads : %u ; minNbJobs : %u ; nbJobsLog2 : %u ; nbJobs : %u \n", @@ -436,7 +436,8 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu zcs->inBuff.filled += toLoad; } - if (zcs->inBuff.filled == zcs->inBuffSize) { /* filled enough : let's compress */ + if ( (zcs->inBuff.filled == zcs->inBuffSize) /* filled enough : let's compress */ + && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { /* avoid overwriting job round buffer */ size_t const dstBufferCapacity = ZSTD_compressBound(zcs->targetSectionSize); buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity); ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool); @@ -477,8 +478,8 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu zcs->inBuff.filled = (U32)(zcs->inBuffSize - zcs->targetSectionSize); memcpy(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->targetSectionSize, zcs->inBuff.filled); - DEBUGLOG(3, "posting job %u (%u bytes)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize); - POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); + DEBUGLOG(3, "posting job %u (%u bytes) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask); + POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* This call is blocking if all workers are busy */ zcs->nextJobID++; } @@ -487,7 +488,7 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu ZSTDMT_jobDescription job = zcs->jobs[jobID]; if (job.jobCompleted) { /* job completed : output can be flushed */ size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); - DEBUGLOG(1, "trying to flush compressed data from job %u \n", (U32)zcs->doneJobID); + DEBUGLOG(1, "flush %u bytes from job %u ", (U32)toWrite, zcs->doneJobID); ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); zcs->jobs[jobID].cctx = NULL; ZSTDMT_releaseBuffer(zcs->buffPool, job.src); @@ -500,6 +501,7 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); output->pos += toWrite; job.dstFlushed += toWrite; + DEBUGLOG(1, "remaining : %u bytes ", (U32)(job.cSize - job.dstFlushed)); if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => go to next one */ ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); zcs->jobs[jobID].dstBuff = g_nullBuffer; @@ -519,7 +521,8 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp size_t const srcSize = zcs->inBuff.filled; DEBUGLOG(1, "flushing : %u bytes to compress", (U32)srcSize); - if ((srcSize > 0) || (endFrame && !zcs->frameEnded)) { + if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded)) + && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { size_t const dstBufferCapacity = ZSTD_compressBound(srcSize); buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity); ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool); @@ -564,7 +567,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp zcs->frameEnded = 1; } - DEBUGLOG(1, "posting job %u : %u bytes (end:%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk); + DEBUGLOG(1, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask); POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */ zcs->nextJobID++; } @@ -575,7 +578,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp { unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask; PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex); while (zcs->jobs[wJobID].jobCompleted==0) { - DEBUGLOG(4, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */ + DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID); /* we want to block when waiting for data to flush */ pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); } pthread_mutex_unlock(&zcs->jobCompleted_mutex); @@ -602,7 +605,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp } /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */ if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed); - if (zcs->doneJobID < zcs->nextJobID) return 1; /* still some buffer to flush */ + if ((zcs->doneJobID < zcs->nextJobID) || (zcs->inBuff.filled)) return 1; /* still some buffer to flush */ zcs->allJobsCompleted = zcs->frameEnded; return 0; } } diff --git a/programs/bench.c b/programs/bench.c index 40e1d4ab..5299b471 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -385,6 +385,17 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, pos = (U32)(u - bacc); bNb = pos / (128 KB); DISPLAY("(block %u, sub %u, pos %u) \n", segNb, bNb, pos); + if (u>5) { + int n; + for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); + DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]); + for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); + DISPLAY(" \n"); + for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)resultBuffer)[u+n]); + DISPLAY(" :%02X: ", ((const BYTE*)resultBuffer)[u]); + for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)resultBuffer)[u+n]); + DISPLAY(" \n"); + } break; } if (u==srcSize-1) { /* should never happen */ diff --git a/tests/Makefile b/tests/Makefile index 6312584a..2f399242 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -18,17 +18,13 @@ # zstreamtest32: Same as zstreamtest, but forced to compile in 32-bits mode # ########################################################################## -DESTDIR?= -PREFIX ?= /usr/local -BINDIR = $(PREFIX)/bin -MANDIR = $(PREFIX)/share/man/man1 ZSTDDIR = ../lib PRGDIR = ../programs PYTHON ?= python3 TESTARTEFACT := versionsTest namespaceTest -CPPFLAGS+= -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) +CPPFLAGS+= -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) CFLAGS ?= -O3 CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 \ -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index ce619308..8720ec78 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -29,6 +29,7 @@ #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_maxCLevel, ZSTD_customMem */ #include "zstd.h" /* ZSTD_compressBound */ #include "zstd_errors.h" /* ZSTD_error_srcSize_wrong */ +#include "zstdmt_compress.h" #include "datagen.h" /* RDG_genBuffer */ #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ #include "xxhash.h" /* XXH64_* */ @@ -137,7 +138,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo cSize = skippableFrameSize + 8; /* Basic compression test */ - DISPLAYLEVEL(4, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + DISPLAYLEVEL(3, "test%3i : compress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); ZSTD_initCStream_usingDict(zc, CNBuffer, 128 KB, 1); outBuff.dst = (char*)(compressedBuffer)+cSize; outBuff.size = compressedBufferSize; @@ -151,16 +152,16 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo { size_t const r = ZSTD_endStream(zc, &outBuff); if (r != 0) goto _output_error; } /* error, or some data not flushed */ cSize += outBuff.pos; - DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100); + DISPLAYLEVEL(3, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100); - DISPLAYLEVEL(4, "test%3i : check CStream size : ", testNb++); + DISPLAYLEVEL(3, "test%3i : check CStream size : ", testNb++); { size_t const s = ZSTD_sizeof_CStream(zc); if (ZSTD_isError(s)) goto _output_error; - DISPLAYLEVEL(4, "OK (%u bytes) \n", (U32)s); + DISPLAYLEVEL(3, "OK (%u bytes) \n", (U32)s); } /* skippable frame test */ - DISPLAYLEVEL(4, "test%3i : decompress skippable frame : ", testNb++); + DISPLAYLEVEL(3, "test%3i : decompress skippable frame : ", testNb++); ZSTD_initDStream_usingDict(zd, CNBuffer, 128 KB); inBuff.src = compressedBuffer; inBuff.size = cSize; @@ -171,11 +172,11 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo { size_t const r = ZSTD_decompressStream(zd, &outBuff, &inBuff); if (r != 0) goto _output_error; } if (outBuff.pos != 0) goto _output_error; /* skippable frame len is 0 */ - DISPLAYLEVEL(4, "OK \n"); + DISPLAYLEVEL(3, "OK \n"); /* Basic decompression test */ inBuff2 = inBuff; - DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); + DISPLAYLEVEL(3, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); ZSTD_initDStream_usingDict(zd, CNBuffer, 128 KB); { size_t const r = ZSTD_setDStreamParameter(zd, ZSTDdsp_maxWindowSize, 1000000000); /* large limit */ if (ZSTD_isError(r)) goto _output_error; } @@ -183,33 +184,33 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo if (remaining != 0) goto _output_error; } /* should reach end of frame == 0; otherwise, some data left, or an error */ if (outBuff.pos != CNBufferSize) goto _output_error; /* should regenerate the same amount */ if (inBuff.pos != inBuff.size) goto _output_error; /* should have read the entire frame */ - DISPLAYLEVEL(4, "OK \n"); + DISPLAYLEVEL(3, "OK \n"); /* Re-use without init */ - DISPLAYLEVEL(4, "test%3i : decompress again without init (re-use previous settings): ", testNb++); + DISPLAYLEVEL(3, "test%3i : decompress again without init (re-use previous settings): ", testNb++); outBuff.pos = 0; { size_t const remaining = ZSTD_decompressStream(zd, &outBuff, &inBuff2); if (remaining != 0) goto _output_error; } /* should reach end of frame == 0; otherwise, some data left, or an error */ if (outBuff.pos != CNBufferSize) goto _output_error; /* should regenerate the same amount */ if (inBuff.pos != inBuff.size) goto _output_error; /* should have read the entire frame */ - DISPLAYLEVEL(4, "OK \n"); + DISPLAYLEVEL(3, "OK \n"); /* check regenerated data is byte exact */ - DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++); + DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++); { size_t i; for (i=0; i 100 bytes */ - DISPLAYLEVEL(4, "OK (%s)\n", ZSTD_getErrorName(r)); } + DISPLAYLEVEL(3, "OK (%s)\n", ZSTD_getErrorName(r)); } _end: @@ -412,6 +411,7 @@ static size_t findDiff(const void* buf1, const void* buf2, size_t max) for (u=0; u= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); } + else { DISPLAYUPDATE(2, "\r%6u ", testNb); } + FUZ_rand(&coreSeed); + lseed = coreSeed ^ prime1; + + /* states full reset (deliberately not synchronized) */ + /* some issues can only happen when reusing states */ + if ((FUZ_rand(&lseed) & 0xFF) == 131) { + U32 const nbThreads = (FUZ_rand(&lseed) % 6) + 1; + ZSTDMT_freeCCtx(zc); + zc = ZSTDMT_createCCtx(nbThreads); + resetAllowed=0; + } + if ((FUZ_rand(&lseed) & 0xFF) == 132) { + ZSTD_freeDStream(zd); + zd = ZSTD_createDStream(); + ZSTD_initDStream_usingDict(zd, NULL, 0); /* ensure at least one init */ + } + + /* srcBuffer selection [0-4] */ + { U32 buffNb = FUZ_rand(&lseed) & 0x7F; + if (buffNb & 7) buffNb=2; /* most common : compressible (P) */ + else { + buffNb >>= 3; + if (buffNb & 7) { + const U32 tnb[2] = { 1, 3 }; /* barely/highly compressible */ + buffNb = tnb[buffNb >> 3]; + } else { + const U32 tnb[2] = { 0, 4 }; /* not compressible / sparse */ + buffNb = tnb[buffNb >> 3]; + } } + srcBuffer = cNoiseBuffer[buffNb]; + } + + /* compression init */ + if ((FUZ_rand(&lseed)&1) /* at beginning, to keep same nb of rand */ + && oldTestLog /* at least one test happened */ && resetAllowed) { + maxTestSize = FUZ_randomLength(&lseed, oldTestLog+2); + if (maxTestSize >= srcBufferSize) maxTestSize = srcBufferSize-1; + { int const compressionLevel = (FUZ_rand(&lseed) % 5) + 1; + size_t const resetError = ZSTDMT_initCStream(zc, compressionLevel); + CHECK(ZSTD_isError(resetError), "ZSTD_resetCStream error : %s", ZSTD_getErrorName(resetError)); + } + } else { + U32 const testLog = FUZ_rand(&lseed) % maxSrcLog; + U32 const cLevel = (FUZ_rand(&lseed) % (ZSTD_maxCLevel() - (testLog/3))) + 1; + maxTestSize = FUZ_rLogLength(&lseed, testLog); + oldTestLog = testLog; + /* random dictionary selection */ + dictSize = 0; + dict = NULL; + { U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize; + ZSTD_parameters params = ZSTD_getParams(cLevel, pledgedSrcSize, dictSize); + params.fParams.checksumFlag = FUZ_rand(&lseed) & 1; + params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1; + { size_t const initError = ZSTDMT_initCStream(zc, cLevel); + CHECK (ZSTD_isError(initError),"ZSTD_initCStream_advanced error : %s", ZSTD_getErrorName(initError)); + } } } + + /* multi-segments compression test */ + XXH64_reset(&xxhState, 0); + { ZSTD_outBuffer outBuff = { cBuffer, cBufferSize, 0 } ; + U32 n; + for (n=0, cSize=0, totalTestSize=0 ; totalTestSize < maxTestSize ; n++) { + /* compress random chunks into randomly sized dst buffers */ + { size_t const randomSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const srcSize = MIN (maxTestSize-totalTestSize, randomSrcSize); + size_t const srcStart = FUZ_rand(&lseed) % (srcBufferSize - srcSize); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const dstBuffSize = MIN(cBufferSize - cSize, randomDstSize); + ZSTD_inBuffer inBuff = { srcBuffer+srcStart, srcSize, 0 }; + outBuff.size = outBuff.pos + dstBuffSize; + + DISPLAYLEVEL(5, "Sending %u bytes to compress \n", (U32)srcSize); + { size_t const compressionError = ZSTDMT_compressStream(zc, &outBuff, &inBuff); + CHECK (ZSTD_isError(compressionError), "compression error : %s", ZSTD_getErrorName(compressionError)); } + + XXH64_update(&xxhState, srcBuffer+srcStart, inBuff.pos); + memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, inBuff.pos); + totalTestSize += inBuff.pos; + } + + /* random flush operation, to mess around */ + if ((FUZ_rand(&lseed) & 15) == 0) { + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize); + outBuff.size = outBuff.pos + adjustedDstSize; + DISPLAYLEVEL(5, "Flushing into dst buffer of size %u \n", (U32)adjustedDstSize); + { size_t const flushError = ZSTDMT_flushStream(zc, &outBuff); + CHECK (ZSTD_isError(flushError), "flush error : %s", ZSTD_getErrorName(flushError)); + } } } + + /* final frame epilogue */ + { size_t remainingToFlush = (size_t)(-1); + while (remainingToFlush) { + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const adjustedDstSize = MIN(cBufferSize - cSize, randomDstSize); + outBuff.size = outBuff.pos + adjustedDstSize; + DISPLAYLEVEL(5, "Ending into dst buffer of size %u \n", (U32)adjustedDstSize); + remainingToFlush = ZSTDMT_endStream(zc, &outBuff); + CHECK (ZSTD_isError(remainingToFlush), "flush error : %s", ZSTD_getErrorName(remainingToFlush)); + DISPLAYLEVEL(5, "endStream : remainingToFlush : %u \n", (U32)remainingToFlush); + } } + DISPLAYLEVEL(5, "Frame completed \n"); + crcOrig = XXH64_digest(&xxhState); + cSize = outBuff.pos; + } + + /* multi - fragments decompression test */ + if (!dictSize /* don't reset if dictionary : could be different */ && (FUZ_rand(&lseed) & 1)) { + CHECK (ZSTD_isError(ZSTD_resetDStream(zd)), "ZSTD_resetDStream failed"); + } else { + ZSTD_initDStream_usingDict(zd, dict, dictSize); + } + { size_t decompressionResult = 1; + ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 }; + ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 }; + for (totalGenSize = 0 ; decompressionResult ; ) { + size_t const readCSrcSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const randomDstSize = FUZ_randomLength(&lseed, maxSampleLog); + size_t const dstBuffSize = MIN(dstBufferSize - totalGenSize, randomDstSize); + inBuff.size = inBuff.pos + readCSrcSize; + outBuff.size = inBuff.pos + dstBuffSize; + decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff); + CHECK (ZSTD_isError(decompressionResult), "decompression error : %s", ZSTD_getErrorName(decompressionResult)); + } + CHECK (decompressionResult != 0, "frame not fully decoded"); + CHECK (outBuff.pos != totalTestSize, "decompressed data : wrong size") + CHECK (inBuff.pos != cSize, "compressed data should be fully read") + { U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0); + if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize); + CHECK (crcDest!=crcOrig, "decompressed data corrupted"); + } } + + /*===== noisy/erroneous src decompression test =====*/ + + /* add some noise */ + { U32 const nbNoiseChunks = (FUZ_rand(&lseed) & 7) + 2; + U32 nn; for (nn=0; nn='0') && (*argument<='9')) { @@ -731,7 +973,7 @@ int main(int argc, const char** argv) } break; - case 'T': + case 'T': /* limit tests by time */ argument++; nbTests=0; g_clockTime=0; while ((*argument>='0') && (*argument<='9')) { @@ -744,7 +986,7 @@ int main(int argc, const char** argv) g_clockTime *= CLOCKS_PER_SEC; break; - case 's': + case 's': /* manually select seed */ argument++; seed=0; seedset=1; @@ -755,7 +997,7 @@ int main(int argc, const char** argv) } break; - case 't': + case 't': /* select starting test number */ argument++; testNb=0; while ((*argument>='0') && (*argument<='9')) { @@ -799,12 +1041,12 @@ int main(int argc, const char** argv) if (testNb==0) { result = basicUnitTests(0, ((double)proba) / 100, customNULL); /* constant seed for predictability */ if (!result) { - DISPLAYLEVEL(4, "Unit tests using customMem :\n") + DISPLAYLEVEL(3, "Unit tests using customMem :\n") result = basicUnitTests(0, ((double)proba) / 100, customMem); /* use custom memory allocation functions */ } } - if (!result) - result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100); + if (!result) result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100); + if (!result) result = fuzzerTests_MT(seed, nbTests, testNb, ((double)proba) / 100); if (mainPause) { int unused; From f22adae984f25a7ba625cd9ebe1d0cbea2c85861 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Jan 2017 13:46:30 -0800 Subject: [PATCH 43/73] fixed minor warning (unused variable) in fuzzer --- programs/zstdcli.c | 2 +- tests/zstreamtest.c | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 0474c96c..c9d8100e 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -561,7 +561,7 @@ int main(int argCount, const char* argv[]) } } #endif - /* No warning message in pipe mode (stdin + stdout) or multi-files mode */ + /* No status message in pipe mode (stdin - stdout) or multi-files mode */ if (!strcmp(filenameTable[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1; if ((filenameIdx>1) & (displayLevel==2)) displayLevel=1; diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 8720ec78..6cf6c4a0 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -769,13 +769,9 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp /* random dictionary selection */ dictSize = 0; dict = NULL; - { U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize; - ZSTD_parameters params = ZSTD_getParams(cLevel, pledgedSrcSize, dictSize); - params.fParams.checksumFlag = FUZ_rand(&lseed) & 1; - params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1; - { size_t const initError = ZSTDMT_initCStream(zc, cLevel); - CHECK (ZSTD_isError(initError),"ZSTD_initCStream_advanced error : %s", ZSTD_getErrorName(initError)); - } } } + { size_t const initError = ZSTDMT_initCStream(zc, cLevel); + CHECK (ZSTD_isError(initError),"ZSTD_initCStream_advanced error : %s", ZSTD_getErrorName(initError)); + } } /* multi-segments compression test */ XXH64_reset(&xxhState, 0); From 0f984d94c4a17345fb0d2ba475c406524d618f56 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Jan 2017 14:05:07 -0800 Subject: [PATCH 44/73] changed MT enabling macro to ZSTD_MULTITHREAD --- lib/common/pool.c | 6 +++--- lib/common/threading.c | 6 +++--- lib/common/threading.h | 8 ++++---- programs/Makefile | 2 +- tests/Makefile | 10 +++++----- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/lib/common/pool.c b/lib/common/pool.c index e24691f7..f40ed39d 100644 --- a/lib/common/pool.c +++ b/lib/common/pool.c @@ -11,7 +11,7 @@ #include /* size_t */ #include /* malloc, calloc, free */ -#ifdef ZSTD_PTHREAD +#ifdef ZSTD_MULTITHREAD #include @@ -160,7 +160,7 @@ void POOL_add(void *ctxVoid, POOL_function function, void *opaque) { pthread_cond_signal(&ctx->queuePopCond); } -#else /* ZSTD_PTHREAD not defined */ +#else /* ZSTD_MULTITHREAD not defined */ /* No multi-threading support */ /* We don't need any data, but if it is empty malloc() might return NULL. */ @@ -183,4 +183,4 @@ void POOL_add(void *ctx, POOL_function function, void *opaque) { function(opaque); } -#endif /* ZSTD_PTHREAD */ +#endif /* ZSTD_MULTITHREAD */ diff --git a/lib/common/threading.c b/lib/common/threading.c index abad2c15..38bbab0d 100644 --- a/lib/common/threading.c +++ b/lib/common/threading.c @@ -12,10 +12,10 @@ */ /** - * This file will hold wrapper for systems, which do not support Pthreads + * This file will hold wrapper for systems, which do not support pthreads */ -#if defined(ZSTD_PTHREAD) && defined(_WIN32) +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) /** * Windows minimalist Pthread Wrapper, based on : @@ -70,4 +70,4 @@ int _pthread_join(pthread_t * thread, void **value_ptr) } } -#endif +#endif /* ZSTD_MULTITHREAD */ diff --git a/lib/common/threading.h b/lib/common/threading.h index 4572d71d..74b2ec04 100644 --- a/lib/common/threading.h +++ b/lib/common/threading.h @@ -18,7 +18,7 @@ extern "C" { #endif -#if defined(ZSTD_PTHREAD) && defined(_WIN32) +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) /** * Windows minimalist Pthread Wrapper, based on : @@ -73,11 +73,11 @@ int _pthread_join(pthread_t* thread, void** value_ptr); */ -#elif defined(ZSTD_PTHREAD) /* posix assumed ; need a better detection mathod */ +#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection mathod */ /* === POSIX Systems === */ # include -#else /* ZSTD_PTHREAD not defined */ +#else /* ZSTD_MULTITHREAD not defined */ /* No multithreading support */ #define pthread_mutex_t int /* #define rather than typedef, as sometimes pthread support is implicit, resulting in duplicated symbols */ @@ -95,7 +95,7 @@ int _pthread_join(pthread_t* thread, void** value_ptr); /* do not use pthread_t */ -#endif /* ZSTD_PTHREAD */ +#endif /* ZSTD_MULTITHREAD */ #if defined (__cplusplus) } diff --git a/programs/Makefile b/programs/Makefile index ff95ddc6..02c924f3 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -127,7 +127,7 @@ gzstd: && $(MAKE) zstd; \ fi -zstdmt: CPPFLAGS += -DZSTD_PTHREAD +zstdmt: CPPFLAGS += -DZSTD_MULTITHREAD zstdmt: LDFLAGS += -lpthread zstdmt: zstd diff --git a/tests/Makefile b/tests/Makefile index 2f399242..6bc1ad2e 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -44,10 +44,10 @@ ZDICT_FILES := $(ZSTDDIR)/dictBuilder/*.c # Define *.exe as extension for Windows systems ifneq (,$(filter Windows%,$(OS))) EXT =.exe -PTHREAD = -DZSTD_PTHREAD +MULTITHREAD = -DZSTD_MULTITHREAD else EXT = -PTHREAD = -pthread -DZSTD_PTHREAD +MULTITHREAD = -pthread -DZSTD_MULTITHREAD endif VOID = /dev/null @@ -122,10 +122,10 @@ zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c zbufftest.c $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@$(EXT) zstreamtest : $(ZSTD_FILES) $(PRGDIR)/datagen.c zstreamtest.c - $(CC) $(FLAGS) $^ -o $@$(EXT) + $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) zstreamtest32 : $(ZSTD_FILES) $(PRGDIR)/datagen.c zstreamtest.c - $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) + $(CC) -m32 $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) zstreamtest-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd zstreamtest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c zstreamtest.c @@ -157,7 +157,7 @@ else endif pool : pool.c $(ZSTDDIR)/common/pool.c $(ZSTDDIR)/common/threading.c - $(CC) $(FLAGS) $(PTHREAD) $^ -o $@$(EXT) + $(CC) $(FLAGS) $(MULTITHREAD) $^ -o $@$(EXT) namespaceTest: if $(CC) namespaceTest.c ../lib/common/xxhash.c -o $@ ; then echo compilation should fail; exit 1 ; fi From 19d670ba9d59b987c51dab5d900d45c62b24490a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Jan 2017 15:32:07 -0800 Subject: [PATCH 45/73] Added ZSTDMT_initCStream_advanced() variant Correctly compress with custom params and dictionary Added relevant fuzzer test in zstreamtest Also : new macro ZSTDMT_SECTION_LOGSIZE_MIN, which sets a minimum size for a full job (note : a flush() command can still generate a partial job anytime) --- lib/compress/zstdmt_compress.c | 44 +++++++++++++++++++++++++++------- lib/compress/zstdmt_compress.h | 9 +++++-- tests/Makefile | 4 ++-- tests/zstreamtest.c | 31 ++++++++++++++++-------- 4 files changed, 66 insertions(+), 22 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 775c52aa..dd1fd345 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -1,3 +1,12 @@ + + +/* ====== Tuning parameters ====== */ +#ifndef ZSTDMT_SECTION_LOGSIZE_MIN +#define ZSTDMT_SECTION_LOGSIZE_MIN 20 /*< minimum size for a full compression job (20==2^20==1 MB) */ +#endif + +/* ====== Dependencies ====== */ + #include /* malloc */ #include /* memcpy */ #include /* threadpool */ @@ -180,13 +189,15 @@ typedef struct { buffer_t dstBuff; size_t cSize; size_t dstFlushed; - unsigned long long fullFrameSize; unsigned firstChunk; unsigned lastChunk; unsigned jobCompleted; pthread_mutex_t* jobCompleted_mutex; pthread_cond_t* jobCompleted_cond; ZSTD_parameters params; + const void* dict; + size_t dictSize; + unsigned long long fullFrameSize; } ZSTDMT_jobDescription; /* ZSTDMT_compressChunk() : POOL_function type */ @@ -194,7 +205,7 @@ void ZSTDMT_compressChunk(void* jobDescription) { ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; buffer_t const dstBuff = job->dstBuff; - size_t const initError = ZSTD_compressBegin_advanced(job->cctx, NULL, 0, job->params, job->fullFrameSize); + size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->dict, job->dictSize, job->params, job->fullFrameSize); if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } if (!job->firstChunk) { /* flush frame header */ size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, 0); @@ -237,6 +248,9 @@ struct ZSTDMT_CCtx_s { unsigned nextJobID; unsigned frameEnded; unsigned allJobsCompleted; + unsigned long long frameContentSize; + const void* dict; + size_t dictSize; ZSTDMT_jobDescription jobs[1]; /* variable size (must lies at the end) */ }; @@ -405,15 +419,20 @@ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) { } } -size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { +size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) { if (zcs->allJobsCompleted == 0) { /* previous job not correctly finished */ ZSTDMT_waitForAllJobsCompleted(zcs); ZSTDMT_releaseAllJobResources(zcs); zcs->allJobsCompleted = 1; } - zcs->params = ZSTD_getParams(compressionLevel, 0, 0); - zcs->targetSectionSize = (size_t)1 << (zcs->params.cParams.windowLog + 2); - zcs->inBuffSize = 5 * (1 << zcs->params.cParams.windowLog); + params.fParams.checksumFlag = 0; /* current limitation : no checksum (to be lifted in a later version) */ + zcs->params = params; + zcs->dict = dict; + zcs->dictSize = dictSize; + zcs->frameContentSize = pledgedSrcSize; + zcs->targetSectionSize = (size_t)1 << MAX(ZSTDMT_SECTION_LOGSIZE_MIN, (zcs->params.cParams.windowLog + 2)); + zcs->inBuffSize = zcs->targetSectionSize + (1 << zcs->params.cParams.windowLog); zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation); zcs->inBuff.filled = 0; @@ -424,6 +443,11 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { return 0; } +size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0); + return ZSTDMT_initCStream_advanced(zcs, NULL, 0, params, 0); +} + size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) { @@ -455,8 +479,10 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu zcs->jobs[jobID].src = zcs->inBuff.buffer; zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; zcs->jobs[jobID].srcSize = zcs->targetSectionSize; - zcs->jobs[jobID].fullFrameSize = 0; zcs->jobs[jobID].params = zcs->params; + zcs->jobs[jobID].dict = zcs->nextJobID == 0 ? zcs->dict : NULL; + zcs->jobs[jobID].dictSize = zcs->nextJobID == 0 ? zcs->dictSize : 0; + zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize; zcs->jobs[jobID].dstBuff = dstBuffer; zcs->jobs[jobID].cctx = cctx; zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0); @@ -539,8 +565,10 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp zcs->jobs[jobID].src = zcs->inBuff.buffer; zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; zcs->jobs[jobID].srcSize = srcSize; - zcs->jobs[jobID].fullFrameSize = 0; zcs->jobs[jobID].params = zcs->params; + zcs->jobs[jobID].dict = zcs->nextJobID == 0 ? zcs->dict : NULL; + zcs->jobs[jobID].dictSize = zcs->nextJobID == 0 ? zcs->dictSize : 0; + zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize; zcs->jobs[jobID].dstBuff = dstBuffer; zcs->jobs[jobID].cctx = cctx; zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0); diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index ca5d6b60..d1b01a79 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -1,6 +1,7 @@ /* === Dependencies === */ #include /* size_t */ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */ #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer */ @@ -19,6 +20,10 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx, /* === Streaming functions === */ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel); +size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown ; current limitation : no checksum */ + size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); -size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); + +size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ +size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ diff --git a/tests/Makefile b/tests/Makefile index 6bc1ad2e..bbc8d3de 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -26,8 +26,8 @@ TESTARTEFACT := versionsTest namespaceTest CPPFLAGS+= -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) CFLAGS ?= -O3 -CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 \ - -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef +CFLAGS += -g -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 \ + -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef CFLAGS += $(MOREFLAGS) FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 6cf6c4a0..1feec450 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -759,7 +759,7 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp if (maxTestSize >= srcBufferSize) maxTestSize = srcBufferSize-1; { int const compressionLevel = (FUZ_rand(&lseed) % 5) + 1; size_t const resetError = ZSTDMT_initCStream(zc, compressionLevel); - CHECK(ZSTD_isError(resetError), "ZSTD_resetCStream error : %s", ZSTD_getErrorName(resetError)); + CHECK(ZSTD_isError(resetError), "ZSTDMT_initCStream error : %s", ZSTD_getErrorName(resetError)); } } else { U32 const testLog = FUZ_rand(&lseed) % maxSrcLog; @@ -767,11 +767,18 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp maxTestSize = FUZ_rLogLength(&lseed, testLog); oldTestLog = testLog; /* random dictionary selection */ - dictSize = 0; - dict = NULL; - { size_t const initError = ZSTDMT_initCStream(zc, cLevel); - CHECK (ZSTD_isError(initError),"ZSTD_initCStream_advanced error : %s", ZSTD_getErrorName(initError)); - } } + dictSize = ((FUZ_rand(&lseed)&63)==1) ? FUZ_randomLength(&lseed, maxSampleLog) : 0; + { size_t const dictStart = FUZ_rand(&lseed) % (srcBufferSize - dictSize); + dict = srcBuffer + dictStart; + } + { U64 const pledgedSrcSize = (FUZ_rand(&lseed) & 3) ? 0 : maxTestSize; + ZSTD_parameters params = ZSTD_getParams(cLevel, pledgedSrcSize, dictSize); + DISPLAYLEVEL(5, "Init with windowLog = %u \n", params.cParams.windowLog); + params.fParams.checksumFlag = FUZ_rand(&lseed) & 1; + params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1; + { size_t const initError = ZSTDMT_initCStream_advanced(zc, dict, dictSize, params, pledgedSrcSize); + CHECK (ZSTD_isError(initError),"ZSTDMT_initCStream_advanced error : %s", ZSTD_getErrorName(initError)); + } } } /* multi-segments compression test */ XXH64_reset(&xxhState, 0); @@ -790,6 +797,7 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp DISPLAYLEVEL(5, "Sending %u bytes to compress \n", (U32)srcSize); { size_t const compressionError = ZSTDMT_compressStream(zc, &outBuff, &inBuff); CHECK (ZSTD_isError(compressionError), "compression error : %s", ZSTD_getErrorName(compressionError)); } + DISPLAYLEVEL(5, "%u bytes read by ZSTDMT_compressStream \n", (U32)inBuff.pos); XXH64_update(&xxhState, srcBuffer+srcStart, inBuff.pos); memcpy(copyBuffer+totalTestSize, srcBuffer+srcStart, inBuff.pos); @@ -924,8 +932,9 @@ int main(int argc, const char** argv) int testNb = 0; int proba = FUZ_COMPRESSIBILITY_DEFAULT; int result=0; - U32 mainPause = 0; - const char* programName = argv[0]; + int mainPause = 0; + int mtOnly = 0; + const char* const programName = argv[0]; ZSTD_customMem customMem = { allocFunction, freeFunction, NULL }; ZSTD_customMem customNULL = { NULL, NULL, NULL }; @@ -936,8 +945,10 @@ int main(int argc, const char** argv) /* Parsing commands. Aggregated commands are allowed */ if (argument[0]=='-') { - argument++; + if (!strcmp(argument, "--mt")) { mtOnly=1; continue; } + + argument++; while (*argument!=0) { switch(*argument) { @@ -1041,7 +1052,7 @@ int main(int argc, const char** argv) result = basicUnitTests(0, ((double)proba) / 100, customMem); /* use custom memory allocation functions */ } } - if (!result) result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100); + if (!result && !mtOnly) result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100); if (!result) result = fuzzerTests_MT(seed, nbTests, testNb, ((double)proba) / 100); if (mainPause) { From 500014af49609327001c0e7eedac432cc1f0426b Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Jan 2017 16:59:56 -0800 Subject: [PATCH 46/73] zstd cli can now compress using multi-threading added : command -T# added : ZSTD_resetCStream() (zstdmt_compress) added : FIO_setNbThreads() (fileio) --- lib/compress/zstdmt_compress.c | 7 +++ lib/compress/zstdmt_compress.h | 1 + programs/Makefile | 2 +- programs/bench.c | 30 +++++++++---- programs/bench.h | 6 +-- programs/fileio.c | 80 +++++++++++++++++++++++++--------- programs/fileio.h | 1 + programs/zstdcli.c | 44 +++++++++++-------- 8 files changed, 119 insertions(+), 52 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index dd1fd345..de503227 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -443,6 +443,13 @@ size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, const void* dict, size_t di return 0; } +/* ZSTDMT_resetCStream() : + * pledgedSrcSize is optional and can be zero == unknown */ +size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize) +{ + return ZSTDMT_initCStream_advanced(zcs, zcs->dict, zcs->dictSize, zcs->params, pledgedSrcSize); +} + size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0); return ZSTDMT_initCStream_advanced(zcs, NULL, 0, params, 0); diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index d1b01a79..759906db 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -20,6 +20,7 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx, /* === Streaming functions === */ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel); +size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown ; current limitation : no checksum */ diff --git a/programs/Makefile b/programs/Makefile index 02c924f3..f2a0ff26 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -22,7 +22,7 @@ else ALIGN_LOOP = endif -CPPFLAGS+= -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/dictBuilder +CPPFLAGS+= -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress -I$(ZSTDDIR)/dictBuilder CFLAGS ?= -O3 CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 \ -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef \ diff --git a/programs/bench.c b/programs/bench.c index 5299b471..74d26ca0 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -9,6 +9,14 @@ +/* ************************************** +* Tuning parameters +****************************************/ +#ifndef BMK_TIMETEST_DEFAULT_S /* default minimum time per test */ +#define BMK_TIMETEST_DEFAULT_S 3 +#endif + + /* ************************************** * Compiler Warnings ****************************************/ @@ -43,7 +51,6 @@ # define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT) #endif -#define NBSECONDS 3 #define TIMELOOP_MICROSEC 1*1000000ULL /* 1 second */ #define ACTIVEPERIOD_MICROSEC 70*1000000ULL /* 70 seconds */ #define COOLPERIOD_SEC 10 @@ -109,31 +116,36 @@ static clock_us_t BMK_clockMicroSec(void) /* ************************************* * Benchmark Parameters ***************************************/ -static U32 g_nbSeconds = NBSECONDS; -static size_t g_blockSize = 0; static int g_additionalParam = 0; static U32 g_decodeOnly = 0; -static U32 g_nbThreads = 1; void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; } void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; } -void BMK_SetNbSeconds(unsigned nbSeconds) +static U32 g_nbSeconds = BMK_TIMETEST_DEFAULT_S; +void BMK_setNbSeconds(unsigned nbSeconds) { g_nbSeconds = nbSeconds; - DISPLAYLEVEL(3, "- test >= %u seconds per compression / decompression -\n", g_nbSeconds); + DISPLAYLEVEL(3, "- test >= %u seconds per compression / decompression - \n", g_nbSeconds); } -void BMK_SetBlockSize(size_t blockSize) +static size_t g_blockSize = 0; +void BMK_setBlockSize(size_t blockSize) { g_blockSize = blockSize; - DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10)); + if (g_blockSize) DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10)); } void BMK_setDecodeOnlyMode(unsigned decodeFlag) { g_decodeOnly = (decodeFlag>0); } -void BMK_SetNbThreads(unsigned nbThreads) { g_nbThreads = nbThreads; } +static U32 g_nbThreads = 1; +void BMK_setNbThreads(unsigned nbThreads) { +#ifndef ZSTD_MULTITHREAD + if (nbThreads > 1) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n"); +#endif + g_nbThreads = nbThreads; +} /* ******************************************************** diff --git a/programs/bench.h b/programs/bench.h index 87850bcc..2918c02b 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -19,9 +19,9 @@ int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,const char* dic int cLevel, int cLevelLast, ZSTD_compressionParameters* compressionParams); /* Set Parameters */ -void BMK_SetNbSeconds(unsigned nbLoops); -void BMK_SetBlockSize(size_t blockSize); -void BMK_SetNbThreads(unsigned nbThreads); +void BMK_setNbSeconds(unsigned nbLoops); +void BMK_setBlockSize(size_t blockSize); +void BMK_setNbThreads(unsigned nbThreads); void BMK_setNotificationLevel(unsigned level); void BMK_setAdditionalParam(int additionalParam); void BMK_setDecodeOnlyMode(unsigned decodeFlag); diff --git a/programs/fileio.c b/programs/fileio.c index a112cc04..3864a5fa 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -34,11 +34,14 @@ #include "fileio.h" #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */ #include "zstd.h" -#ifdef ZSTD_GZDECOMPRESS -#include "zlib.h" -#if !defined(z_const) - #define z_const +#ifdef ZSTD_MULTITHREAD +# include "zstdmt_compress.h" #endif +#ifdef ZSTD_GZDECOMPRESS +# include "zlib.h" +# if !defined(z_const) +# define z_const +# endif #endif @@ -103,7 +106,13 @@ static U32 g_removeSrcFile = 0; void FIO_setRemoveSrcFile(unsigned flag) { g_removeSrcFile = (flag>0); } static U32 g_memLimit = 0; void FIO_setMemLimit(unsigned memLimit) { g_memLimit = memLimit; } - +static U32 g_nbThreads = 1; +void FIO_setNbThreads(unsigned nbThreads) { +#ifndef ZSTD_MULTITHREAD + if (nbThreads > 1) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n"); +#endif + g_nbThreads = nbThreads; +} /*-************************************* @@ -226,22 +235,30 @@ static size_t FIO_loadFile(void** bufferPtr, const char* fileName) * Compression ************************************************************************/ typedef struct { + FILE* srcFile; + FILE* dstFile; void* srcBuffer; size_t srcBufferSize; void* dstBuffer; size_t dstBufferSize; +#ifdef ZSTD_MULTITHREAD + ZSTDMT_CCtx* cctx; +#else ZSTD_CStream* cctx; - FILE* dstFile; - FILE* srcFile; +#endif } cRess_t; -static cRess_t FIO_createCResources(const char* dictFileName, int cLevel, +static cRess_t FIO_createCResources(const char* dictFileName, int cLevel, U64 srcSize, ZSTD_compressionParameters* comprParams) { cRess_t ress; memset(&ress, 0, sizeof(ress)); +#ifdef ZSTD_MULTITHREAD + ress.cctx = ZSTDMT_createCCtx(g_nbThreads); +#else ress.cctx = ZSTD_createCStream(); +#endif if (ress.cctx == NULL) EXM_THROW(30, "zstd: allocation error : can't create ZSTD_CStream"); ress.srcBufferSize = ZSTD_CStreamInSize(); ress.srcBuffer = malloc(ress.srcBufferSize); @@ -264,7 +281,11 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel, if (comprParams->searchLength) params.cParams.searchLength = comprParams->searchLength; if (comprParams->targetLength) params.cParams.targetLength = comprParams->targetLength; if (comprParams->strategy) params.cParams.strategy = (ZSTD_strategy)(comprParams->strategy - 1); +#ifdef ZSTD_MULTITHREAD + { size_t const errorCode = ZSTDMT_initCStream_advanced(ress.cctx, dictBuffer, dictBuffSize, params, srcSize); +#else { size_t const errorCode = ZSTD_initCStream_advanced(ress.cctx, dictBuffer, dictBuffSize, params, srcSize); +#endif if (ZSTD_isError(errorCode)) EXM_THROW(33, "Error initializing CStream : %s", ZSTD_getErrorName(errorCode)); } } free(dictBuffer); @@ -277,7 +298,11 @@ static void FIO_freeCResources(cRess_t ress) { free(ress.srcBuffer); free(ress.dstBuffer); +#ifdef ZSTD_MULTITHREAD + ZSTDMT_freeCCtx(ress.cctx); +#else ZSTD_freeCStream(ress.cctx); /* never fails */ +#endif } @@ -296,7 +321,11 @@ static int FIO_compressFilename_internal(cRess_t ress, U64 const fileSize = UTIL_getFileSize(srcFileName); /* init */ +#ifdef ZSTD_MULTITHREAD + { size_t const resetError = ZSTDMT_resetCStream(ress.cctx, fileSize); +#else { size_t const resetError = ZSTD_resetCStream(ress.cctx, fileSize); +#endif if (ZSTD_isError(resetError)) EXM_THROW(21, "Error initializing compression : %s", ZSTD_getErrorName(resetError)); } @@ -311,11 +340,14 @@ static int FIO_compressFilename_internal(cRess_t ress, /* Compress using buffered streaming */ { ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 }; ZSTD_outBuffer outBuff= { ress.dstBuffer, ress.dstBufferSize, 0 }; - { size_t const result = ZSTD_compressStream(ress.cctx, &outBuff, &inBuff); - if (ZSTD_isError(result)) EXM_THROW(23, "Compression error : %s ", ZSTD_getErrorName(result)); } - if (inBuff.pos != inBuff.size) - /* inBuff should be entirely consumed since buffer sizes are recommended ones */ - EXM_THROW(24, "Compression error : input block not fully consumed"); + while (inBuff.pos != inBuff.size) { /* note : is there any possibility of endless loop ? for example, if outBuff is not large enough ? */ +#ifdef ZSTD_MULTITHREAD + size_t const result = ZSTDMT_compressStream(ress.cctx, &outBuff, &inBuff); +#else + size_t const result = ZSTD_compressStream(ress.cctx, &outBuff, &inBuff); +#endif + if (ZSTD_isError(result)) EXM_THROW(23, "Compression error : %s ", ZSTD_getErrorName(result)); + } /* Write cBlock */ { size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile); @@ -326,13 +358,19 @@ static int FIO_compressFilename_internal(cRess_t ress, } /* End of Frame */ - { ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 }; - size_t const result = ZSTD_endStream(ress.cctx, &outBuff); - if (result!=0) EXM_THROW(26, "Compression error : cannot create frame end"); - - { size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile); - if (sizeCheck!=outBuff.pos) EXM_THROW(27, "Write error : cannot write frame end into %s", dstFileName); } - compressedfilesize += outBuff.pos; + { size_t result = 1; + while (result!=0) { /* note : is there any possibility of endless loop ? */ + ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 }; +#ifdef ZSTD_MULTITHREAD + result = ZSTDMT_endStream(ress.cctx, &outBuff); +#else + result = ZSTD_endStream(ress.cctx, &outBuff); +#endif + if (ZSTD_isError(result)) EXM_THROW(26, "Compression error during frame end : %s", ZSTD_getErrorName(result)); + { size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile); + if (sizeCheck!=outBuff.pos) EXM_THROW(27, "Write error : cannot write frame end into %s", dstFileName); } + compressedfilesize += outBuff.pos; + } } /* Status */ @@ -632,7 +670,7 @@ unsigned long long FIO_decompressFrame(dRess_t* ress, if (ZSTD_isError(readSizeHint)) EXM_THROW(36, "Decoding error : %s", ZSTD_getErrorName(readSizeHint)); /* Write block */ - storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, storedSkips); + storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, storedSkips); frameSize += outBuff.pos; DISPLAYUPDATE(2, "\rDecoded : %u MB... ", (U32)((alreadyDecoded+frameSize)>>20) ); diff --git a/programs/fileio.h b/programs/fileio.h index b7165833..9ef44929 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -40,6 +40,7 @@ void FIO_setDictIDFlag(unsigned dictIDFlag); void FIO_setChecksumFlag(unsigned checksumFlag); void FIO_setRemoveSrcFile(unsigned flag); void FIO_setMemLimit(unsigned memLimit); +void FIO_setNbThreads(unsigned nbThreads); /*-************************************* diff --git a/programs/zstdcli.c b/programs/zstdcli.c index c9d8100e..de25d0f0 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -110,12 +110,15 @@ static int usage_advanced(const char* programName) DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n"); DISPLAY( " -c : force write to standard output, even if it is the console\n"); #ifdef UTIL_HAS_CREATEFILELIST - DISPLAY( " -r : operate recursively on directories\n"); + DISPLAY( " -r : operate recursively on directories \n"); #endif #ifndef ZSTD_NOCOMPRESS DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); DISPLAY( "--no-dictID : don't write dictID into header (dictionary compression)\n"); - DISPLAY( "--[no-]check : integrity check (default:enabled)\n"); + DISPLAY( "--[no-]check : integrity check (default:enabled) \n"); +#ifdef ZSTD_MULTITHREAD + DISPLAY( " -T# : use # threads for compression (default:1) \n"); +#endif #endif #ifndef ZSTD_NODECOMPRESS DISPLAY( "--test : test compressed file integrity \n"); @@ -233,7 +236,10 @@ int main(int argCount, const char* argv[]) nextArgumentIsDictID=0, nextArgumentsAreFiles=0, ultra=0, - lastCommand = 0; + lastCommand = 0, + nbThreads = 1; + unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */ + size_t blockSize = 0; zstd_operation_mode operation = zom_compress; ZSTD_compressionParameters compressionParams; int cLevel = ZSTDCLI_CLEVEL_DEFAULT; @@ -396,39 +402,37 @@ int main(int argCount, const char* argv[]) #ifndef ZSTD_NOBENCH /* Benchmark */ - case 'b': operation=zom_bench; argument++; break; + case 'b': + operation=zom_bench; + argument++; + break; /* range bench (benchmark only) */ case 'e': - /* compression Level */ - argument++; - cLevelLast = readU32FromChar(&argument); - break; + /* compression Level */ + argument++; + cLevelLast = readU32FromChar(&argument); + break; /* Modify Nb Iterations (benchmark only) */ case 'i': argument++; - { U32 const iters = readU32FromChar(&argument); - BMK_setNotificationLevel(displayLevel); - BMK_SetNbSeconds(iters); - } + bench_nbSeconds = readU32FromChar(&argument); break; /* cut input into blocks (benchmark only) */ case 'B': argument++; - { size_t const bSize = readU32FromChar(&argument); - BMK_setNotificationLevel(displayLevel); - BMK_SetBlockSize(bSize); - } + blockSize = readU32FromChar(&argument); break; +#endif /* ZSTD_NOBENCH */ + /* nb of threads (hidden option) */ case 'T': argument++; - BMK_SetNbThreads(readU32FromChar(&argument)); + nbThreads = readU32FromChar(&argument); break; -#endif /* ZSTD_NOBENCH */ /* Dictionary Selection level */ case 's': @@ -518,6 +522,9 @@ int main(int argCount, const char* argv[]) if (operation==zom_bench) { #ifndef ZSTD_NOBENCH BMK_setNotificationLevel(displayLevel); + BMK_setBlockSize(blockSize); + BMK_setNbThreads(nbThreads); + BMK_setNbSeconds(bench_nbSeconds); BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel, cLevelLast, &compressionParams); #endif goto _end; @@ -569,6 +576,7 @@ int main(int argCount, const char* argv[]) FIO_setNotificationLevel(displayLevel); if (operation==zom_compress) { #ifndef ZSTD_NOCOMPRESS + FIO_setNbThreads(nbThreads); if ((filenameIdx==1) && outFileName) operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, &compressionParams); else From b459aad5b461c163c53accca48d7c6aafdcff021 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Jan 2017 17:33:37 -0800 Subject: [PATCH 47/73] renamed savedRep into repToConfirm --- lib/compress/zstd_compress.c | 22 +++++++++++----------- lib/compress/zstd_opt.h | 8 ++++---- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 84a4a021..d984af1f 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -63,7 +63,7 @@ struct ZSTD_CCtx_s { U32 loadedDictEnd; ZSTD_compressionStage_e stage; U32 rep[ZSTD_REP_NUM]; - U32 savedRep[ZSTD_REP_NUM]; + U32 repToConfirm[ZSTD_REP_NUM]; U32 dictID; ZSTD_parameters params; void* workSpace; @@ -742,7 +742,7 @@ _check_compressibility: if ((size_t)(op-ostart) >= maxCSize) return 0; } /* confirm repcodes */ - { int i; for (i=0; irep[i] = zc->savedRep[i]; } + { int i; for (i=0; irep[i] = zc->repToConfirm[i]; } return op - ostart; } @@ -1011,8 +1011,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, } } } /* save reps for next block */ - cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved; - cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved; + cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; + cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -1126,7 +1126,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, } } } /* save reps for next block */ - ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2; + ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2; /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -1280,8 +1280,8 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, } } } /* save reps for next block */ - cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved; - cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved; + cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; + cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -1430,7 +1430,7 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx, } } } /* save reps for next block */ - ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2; + ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2; /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -1962,8 +1962,8 @@ _storeSequence: } } /* Save reps for next block */ - ctx->savedRep[0] = offset_1 ? offset_1 : savedOffset; - ctx->savedRep[1] = offset_2 ? offset_2 : savedOffset; + ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset; + ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset; /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -2157,7 +2157,7 @@ _storeSequence: } } /* Save reps for next block */ - ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2; + ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2; /* Last Literals */ { size_t const lastLLSize = iend - anchor; diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index f071c4f3..8393e7b4 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -38,7 +38,7 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t src ssPtr->cachedLiterals = NULL; ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; - ssPtr->staticPrices = 0; + ssPtr->staticPrices = 0; if (ssPtr->litLengthSum == 0) { if (srcSize <= 1024) ssPtr->staticPrices = 1; @@ -56,7 +56,7 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t src for (u=0; u<=MaxLit; u++) { ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); - ssPtr->litSum += ssPtr->litFreq[u]; + ssPtr->litSum += ssPtr->litFreq[u]; } for (u=0; u<=MaxLL; u++) ssPtr->litLengthFreq[u] = 1; @@ -634,7 +634,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ } } /* for (cur=0; cur < last_pos; ) */ /* Save reps for next block */ - { int i; for (i=0; isavedRep[i] = rep[i]; } + { int i; for (i=0; irepToConfirm[i] = rep[i]; } /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -907,7 +907,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ } } /* for (cur=0; cur < last_pos; ) */ /* Save reps for next block */ - { int i; for (i=0; isavedRep[i] = rep[i]; } + { int i; for (i=0; irepToConfirm[i] = rep[i]; } /* Last Literals */ { size_t lastLLSize = iend - anchor; From 458c8a94b4330b300027217936d3e3f7343a9483 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 19 Jan 2017 17:44:15 -0800 Subject: [PATCH 48/73] minor refactoring : cleaner MT integration within bench --- programs/bench.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 74d26ca0..7dc98653 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -180,6 +180,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, size_t const maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ void* const compressedBuffer = malloc(maxCompressedSize); void* resultBuffer = malloc(srcSize); + ZSTDMT_CCtx* const mtctx = ZSTDMT_createCCtx(g_nbThreads); ZSTD_CCtx* const ctx = ZSTD_createCCtx(); ZSTD_DCtx* const dctx = ZSTD_createDCtx(); size_t const loadedCompressedSize = srcSize; @@ -255,8 +256,6 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, const char* const marks[NB_MARKS] = { " |", " /", " =", "\\" }; U32 markNb = 0; - ZSTDMT_CCtx* const mtcctx = ZSTDMT_createCCtx(g_nbThreads); - DISPLAYLEVEL(2, "\r%79s\r", ""); while (!cCompleted || !dCompleted) { @@ -300,15 +299,17 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, blockTable[blockNb].cPtr, blockTable[blockNb].cRoom, blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize, cdict); - } else if (1) { - rSize = ZSTDMT_compressCCtx(mtcctx, + } else { +#ifdef ZSTD_MULTITHREAD /* note : limitation : MT single-pass does not support compression with dictionary */ + rSize = ZSTDMT_compressCCtx(mtctx, blockTable[blockNb].cPtr, blockTable[blockNb].cRoom, blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize, cLevel); - } else { +#else rSize = ZSTD_compress_advanced (ctx, blockTable[blockNb].cPtr, blockTable[blockNb].cRoom, blockTable[blockNb].srcPtr,blockTable[blockNb].srcSize, NULL, 0, zparams); +#endif } if (ZSTD_isError(rSize)) EXM_THROW(1, "ZSTD_compress_usingCDict() failed : %s", ZSTD_getErrorName(rSize)); blockTable[blockNb].cSize = rSize; @@ -433,6 +434,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, free(blockTable); free(compressedBuffer); free(resultBuffer); + ZSTDMT_freeCCtx(mtctx); ZSTD_freeCCtx(ctx); ZSTD_freeDCtx(dctx); return 0; From 5fba09fa414c355e7c831a5725857779f23cc022 Mon Sep 17 00:00:00 2001 From: cyan4973 Date: Fri, 20 Jan 2017 12:23:30 -0800 Subject: [PATCH 49/73] updated util's time for Windows compatibility Correctly measures time on Posix systems when running with Multi-threading Todo : check Windows measurement under multi-threading --- lib/common/threading.c | 6 +++++ lib/compress/zstdmt_compress.c | 26 ++++++++++++++------ programs/bench.c | 43 ++++++++++++---------------------- programs/util.h | 22 +++++++++++------ 4 files changed, 55 insertions(+), 42 deletions(-) diff --git a/lib/common/threading.c b/lib/common/threading.c index 38bbab0d..b56e594b 100644 --- a/lib/common/threading.c +++ b/lib/common/threading.c @@ -15,6 +15,12 @@ * This file will hold wrapper for systems, which do not support pthreads */ +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4206) /* disable: C4206: translation unit is empty (when ZSTD_MULTITHREAD is not defined) */ +#endif + + #if defined(ZSTD_MULTITHREAD) && defined(_WIN32) /** diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index de503227..3674281a 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -2,11 +2,17 @@ /* ====== Tuning parameters ====== */ #ifndef ZSTDMT_SECTION_LOGSIZE_MIN -#define ZSTDMT_SECTION_LOGSIZE_MIN 20 /*< minimum size for a full compression job (20==2^20==1 MB) */ +# define ZSTDMT_SECTION_LOGSIZE_MIN 20 /*< minimum size for a full compression job (20==2^20==1 MB) */ #endif -/* ====== Dependencies ====== */ +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +/* ====== Dependencies ====== */ #include /* malloc */ #include /* memcpy */ #include /* threadpool */ @@ -14,6 +20,8 @@ #include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */ #include "zstdmt_compress.h" + +/* ====== Debug ====== */ #if 0 # include @@ -73,7 +81,7 @@ typedef struct buffer_s { static const buffer_t g_nullBuffer = { NULL, 0 }; typedef struct ZSTDMT_bufferPool_s { - unsigned totalBuffers;; + unsigned totalBuffers; unsigned nbBuffers; buffer_t bTable[1]; /* variable size */ } ZSTDMT_bufferPool; @@ -107,10 +115,13 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) free(buf.start); /* size conditions not respected : scratch this buffer and create a new one */ } /* create new buffer */ - { void* const start = malloc(bSize); + { buffer_t buffer; + void* const start = malloc(bSize); if (start==NULL) bSize = 0; - return (buffer_t) { start, bSize }; /* note : start can be NULL if malloc fails ! */ - } + buffer.start = start; /* note : start can be NULL if malloc fails ! */ + buffer.size = bSize; + return buffer; + } } /* store buffer for later re-use, up to pool capacity */ @@ -336,7 +347,8 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, for (u=0; ubuffPool, dstBufferCapacity) : (buffer_t){ dst, dstCapacity }; + buffer_t const dstAsBuffer = { dst, dstCapacity }; + buffer_t const dstBuffer = u ? ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity) : dstAsBuffer; ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool); if ((cctx==NULL) || (dstBuffer.start==NULL)) { diff --git a/programs/bench.c b/programs/bench.c index 7dc98653..2286ead9 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -95,23 +95,6 @@ static clock_t g_time = 0; exit(error); \ } -/* ************************************* -* Time -***************************************/ -/* for posix only - needs proper detection macros to setup */ -#include -#include - -typedef unsigned long long clock_us_t; -static clock_us_t BMK_clockMicroSec(void) -{ - static clock_t _ticksPerSecond = 0; - if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK); - - { struct tms junk; clock_t newTicks = (clock_t) times(&junk); (void)junk; - return ((((clock_us_t)newTicks)*(1000000))/_ticksPerSecond); } -} - /* ************************************* * Benchmark Parameters @@ -248,7 +231,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, /* Bench */ { U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL); U64 const crcOrig = g_decodeOnly ? 0 : XXH64(srcBuffer, srcSize, 0); - clock_us_t coolTime = BMK_clockMicroSec(); + UTIL_time_t coolTime, coolTick; U64 const maxTime = (g_nbSeconds * TIMELOOP_MICROSEC) + 1; U64 totalCTime=0, totalDTime=0; U32 cCompleted=g_decodeOnly, dCompleted=0; @@ -256,25 +239,28 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, const char* const marks[NB_MARKS] = { " |", " /", " =", "\\" }; U32 markNb = 0; + UTIL_initTimer(&coolTick); + UTIL_getTime(&coolTime); DISPLAYLEVEL(2, "\r%79s\r", ""); while (!cCompleted || !dCompleted) { /* overheat protection */ - if (BMK_clockMicroSec() - coolTime > ACTIVEPERIOD_MICROSEC) { + if (UTIL_clockSpanMicro(coolTime, coolTick) > ACTIVEPERIOD_MICROSEC) { DISPLAYLEVEL(2, "\rcooling down ... \r"); UTIL_sleep(COOLPERIOD_SEC); - coolTime = BMK_clockMicroSec(); + UTIL_getTime(&coolTime); } if (!g_decodeOnly) { - clock_us_t clockStart; + UTIL_time_t clockTick, clockStart; /* Compression */ DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize); /* warm up and erase result buffer */ UTIL_sleepMilli(1); /* give processor time to other processes */ UTIL_waitForNextTick(ticksPerSecond); - clockStart = BMK_clockMicroSec(); + UTIL_initTimer(&clockTick); + UTIL_getTime(&clockStart); if (!cCompleted) { /* still some time to do compression tests */ ZSTD_parameters zparams = ZSTD_getParams(cLevel, avgSize, dictBufferSize); @@ -315,9 +301,9 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, blockTable[blockNb].cSize = rSize; } nbLoops++; - } while (BMK_clockMicroSec() - clockStart < clockLoop); + } while (UTIL_clockSpanMicro(clockStart, clockTick) < clockLoop); ZSTD_freeCDict(cdict); - { clock_us_t const clockSpanMicro = BMK_clockMicroSec() - clockStart; + { U64 const clockSpanMicro = UTIL_clockSpanMicro(clockStart, clockTick); if (clockSpanMicro < fastestC*nbLoops) fastestC = clockSpanMicro / nbLoops; totalCTime += clockSpanMicro; cCompleted = (totalCTime >= maxTime); @@ -347,10 +333,11 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, if (!dCompleted) { U64 clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1; U32 nbLoops = 0; - clock_us_t clockStart; + UTIL_time_t clockStart, clockTick; ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictBufferSize); if (!ddict) EXM_THROW(2, "ZSTD_createDDict() allocation failure"); - clockStart = BMK_clockMicroSec(); + UTIL_initTimer(&clockTick); + UTIL_getTime(&clockStart); do { U32 blockNb; for (blockNb=0; blockNb= maxTime); diff --git a/programs/util.h b/programs/util.h index aaa4b7c1..651027ba 100644 --- a/programs/util.h +++ b/programs/util.h @@ -95,18 +95,26 @@ extern "C" { /*-**************************************** * Time functions ******************************************/ -#if !defined(_WIN32) - typedef clock_t UTIL_time_t; - UTIL_STATIC void UTIL_initTimer(UTIL_time_t* ticksPerSecond) { *ticksPerSecond=0; } - UTIL_STATIC void UTIL_getTime(UTIL_time_t* x) { *x = clock(); } - UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { (void)ticksPerSecond; return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } - UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { (void)ticksPerSecond; return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } -#else +#if (PLATFORM_POSIX_VERSION >= 1) +#include +#include /* times */ + typedef U64 UTIL_time_t; + UTIL_STATIC void UTIL_initTimer(UTIL_time_t* ticksPerSecond) { *ticksPerSecond=sysconf(_SC_CLK_TCK); } + UTIL_STATIC void UTIL_getTime(UTIL_time_t* x) { struct tms junk; clock_t newTicks = (clock_t) times(&junk); (void)junk; *x = (UTIL_time_t)newTicks; } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / ticksPerSecond; } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / ticksPerSecond; } +#elif defined(_WIN32) /* Windows */ typedef LARGE_INTEGER UTIL_time_t; UTIL_STATIC void UTIL_initTimer(UTIL_time_t* ticksPerSecond) { if (!QueryPerformanceFrequency(ticksPerSecond)) fprintf(stderr, "ERROR: QueryPerformance not present\n"); } UTIL_STATIC void UTIL_getTime(UTIL_time_t* x) { QueryPerformanceCounter(x); } UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; } UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; } +#else /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */ + typedef clock_t UTIL_time_t; + UTIL_STATIC void UTIL_initTimer(UTIL_time_t* ticksPerSecond) { *ticksPerSecond=0; } + UTIL_STATIC void UTIL_getTime(UTIL_time_t* x) { *x = clock(); } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { (void)ticksPerSecond; return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t ticksPerSecond, UTIL_time_t clockStart, UTIL_time_t clockEnd) { (void)ticksPerSecond; return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } #endif From 2e3b659ae1c051531c3e90a4e9d795b5701f1826 Mon Sep 17 00:00:00 2001 From: cyan4973 Date: Fri, 20 Jan 2017 14:00:41 -0800 Subject: [PATCH 50/73] fixed minor warnings (Visual, conversion, doxygen) --- lib/common/pool.c | 12 ++++++++++-- lib/compress/zstdmt_compress.c | 12 ++++++++++-- lib/compress/zstdmt_compress.h | 8 ++++++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/lib/common/pool.c b/lib/common/pool.c index f40ed39d..693217f2 100644 --- a/lib/common/pool.c +++ b/lib/common/pool.c @@ -7,13 +7,21 @@ * of patent rights can be found in the PATENTS file in the same directory. */ -#include "pool.h" + +/* ====== Dependencies ======= */ #include /* size_t */ #include /* malloc, calloc, free */ +#include "pool.h" + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + #ifdef ZSTD_MULTITHREAD -#include +#include /* pthread adaptation */ /* A job is a function and an opaque argument */ typedef struct POOL_job_s { diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 3674281a..48717de2 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -1,8 +1,16 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ /* ====== Tuning parameters ====== */ #ifndef ZSTDMT_SECTION_LOGSIZE_MIN -# define ZSTDMT_SECTION_LOGSIZE_MIN 20 /*< minimum size for a full compression job (20==2^20==1 MB) */ +# define ZSTDMT_SECTION_LOGSIZE_MIN 20 /* minimum size for a full compression job (20==2^20==1 MB) */ #endif @@ -444,7 +452,7 @@ size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, const void* dict, size_t di zcs->dictSize = dictSize; zcs->frameContentSize = pledgedSrcSize; zcs->targetSectionSize = (size_t)1 << MAX(ZSTDMT_SECTION_LOGSIZE_MIN, (zcs->params.cParams.windowLog + 2)); - zcs->inBuffSize = zcs->targetSectionSize + (1 << zcs->params.cParams.windowLog); + zcs->inBuffSize = zcs->targetSectionSize + ((size_t)1 << zcs->params.cParams.windowLog); zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation); zcs->inBuff.filled = 0; diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index 759906db..7d336db0 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -1,3 +1,11 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ /* === Dependencies === */ #include /* size_t */ From 326575c3a3023e15964947583ecd655bd8a775f4 Mon Sep 17 00:00:00 2001 From: cyan4973 Date: Fri, 20 Jan 2017 14:49:44 -0800 Subject: [PATCH 51/73] fixed VS2010 project --- build/VS2010/zstd/zstd.vcxproj | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/build/VS2010/zstd/zstd.vcxproj b/build/VS2010/zstd/zstd.vcxproj index 5b585268..9886af0b 100644 --- a/build/VS2010/zstd/zstd.vcxproj +++ b/build/VS2010/zstd/zstd.vcxproj @@ -21,11 +21,14 @@ + + + @@ -45,7 +48,10 @@ + + + @@ -66,6 +72,7 @@ + @@ -137,7 +144,7 @@ false - $(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath); + $(IncludePath);$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\compress;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(UniversalCRT_IncludePath); false $(LibraryPath); @@ -206,6 +213,7 @@ false false MultiThreaded + /DZSTD_MULTITHREAD %(AdditionalOptions) Console @@ -218,4 +226,4 @@ - + \ No newline at end of file From f0ffa237da8180d7aff82f41d43da12d881e62ea Mon Sep 17 00:00:00 2001 From: cyan4973 Date: Fri, 20 Jan 2017 15:24:06 -0800 Subject: [PATCH 52/73] fixed VS2008 project --- build/VS2008/zstd/zstd.vcproj | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/build/VS2008/zstd/zstd.vcproj b/build/VS2008/zstd/zstd.vcproj index ad64f869..f5b3f558 100644 --- a/build/VS2008/zstd/zstd.vcproj +++ b/build/VS2008/zstd/zstd.vcproj @@ -376,6 +376,14 @@ RelativePath="..\..\..\lib\decompress\huf_decompress.c" > + + + + @@ -428,6 +436,10 @@ RelativePath="..\..\..\programs\zstdcli.c" > + + - - @@ -470,6 +478,14 @@ RelativePath="..\..\..\lib\common\mem.h" > + + + + @@ -486,6 +502,10 @@ RelativePath="..\..\..\lib\zstd.h" > + + @@ -530,6 +550,10 @@ RelativePath="..\..\..\lib\legacy\zstd_v07.h" > + + From 317604e0addb03ea3aa202324d2625f2754497bd Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 20 Jan 2017 17:18:41 -0800 Subject: [PATCH 53/73] fixed : compilation of zstreamtest in dll mode --- lib/compress/zstdmt_compress.h | 20 ++++++++++---------- tests/.gitignore | 1 + 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index 7d336db0..7e9d0703 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -10,16 +10,16 @@ /* === Dependencies === */ #include /* size_t */ #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */ -#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer */ +#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */ /* === Simple one-pass functions === */ typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; -ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads); -size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* cctx); +ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads); +ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* cctx); -size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx, +ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel); @@ -27,12 +27,12 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx, /* === Streaming functions === */ -size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel); -size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ -size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, const void* dict, size_t dictSize, +ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel); +ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ +ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown ; current limitation : no checksum */ -size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ -size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ diff --git a/tests/.gitignore b/tests/.gitignore index 5041404d..53520238 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -6,6 +6,7 @@ fuzzer32 fuzzer-dll zbufftest zbufftest32 +zbufftest-dll zstreamtest zstreamtest32 zstreamtest-dll From f8804d1014da44e02eaa9e83883d0eb12d6308f4 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 20 Jan 2017 17:23:19 -0800 Subject: [PATCH 54/73] convert tabs to space joys of using multiple editors from multiple environments ... --- programs/bench.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 2286ead9..1ca40d6b 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -239,28 +239,28 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, const char* const marks[NB_MARKS] = { " |", " /", " =", "\\" }; U32 markNb = 0; - UTIL_initTimer(&coolTick); - UTIL_getTime(&coolTime); + UTIL_initTimer(&coolTick); + UTIL_getTime(&coolTime); DISPLAYLEVEL(2, "\r%79s\r", ""); while (!cCompleted || !dCompleted) { /* overheat protection */ - if (UTIL_clockSpanMicro(coolTime, coolTick) > ACTIVEPERIOD_MICROSEC) { + if (UTIL_clockSpanMicro(coolTime, coolTick) > ACTIVEPERIOD_MICROSEC) { DISPLAYLEVEL(2, "\rcooling down ... \r"); UTIL_sleep(COOLPERIOD_SEC); UTIL_getTime(&coolTime); } if (!g_decodeOnly) { - UTIL_time_t clockTick, clockStart; + UTIL_time_t clockTick, clockStart; /* Compression */ DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize); /* warm up and erase result buffer */ UTIL_sleepMilli(1); /* give processor time to other processes */ UTIL_waitForNextTick(ticksPerSecond); - UTIL_initTimer(&clockTick); - UTIL_getTime(&clockStart); + UTIL_initTimer(&clockTick); + UTIL_getTime(&clockStart); if (!cCompleted) { /* still some time to do compression tests */ ZSTD_parameters zparams = ZSTD_getParams(cLevel, avgSize, dictBufferSize); @@ -301,9 +301,9 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, blockTable[blockNb].cSize = rSize; } nbLoops++; - } while (UTIL_clockSpanMicro(clockStart, clockTick) < clockLoop); + } while (UTIL_clockSpanMicro(clockStart, clockTick) < clockLoop); ZSTD_freeCDict(cdict); - { U64 const clockSpanMicro = UTIL_clockSpanMicro(clockStart, clockTick); + { U64 const clockSpanMicro = UTIL_clockSpanMicro(clockStart, clockTick); if (clockSpanMicro < fastestC*nbLoops) fastestC = clockSpanMicro / nbLoops; totalCTime += clockSpanMicro; cCompleted = (totalCTime >= maxTime); @@ -336,8 +336,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, UTIL_time_t clockStart, clockTick; ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictBufferSize); if (!ddict) EXM_THROW(2, "ZSTD_createDDict() allocation failure"); - UTIL_initTimer(&clockTick); - UTIL_getTime(&clockStart); + UTIL_initTimer(&clockTick); + UTIL_getTime(&clockStart); do { U32 blockNb; for (blockNb=0; blockNb Date: Sat, 21 Jan 2017 21:56:36 -0800 Subject: [PATCH 55/73] minor : tab to spaces --- lib/compress/zstdmt_compress.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 48717de2..1ddb5387 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -124,12 +124,12 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) } /* create new buffer */ { buffer_t buffer; - void* const start = malloc(bSize); + void* const start = malloc(bSize); if (start==NULL) bSize = 0; - buffer.start = start; /* note : start can be NULL if malloc fails ! */ - buffer.size = bSize; - return buffer; - } + buffer.start = start; /* note : start can be NULL if malloc fails ! */ + buffer.size = bSize; + return buffer; + } } /* store buffer for later re-use, up to pool capacity */ @@ -355,7 +355,7 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, for (u=0; ubuffPool, dstBufferCapacity) : dstAsBuffer; ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool); From 0cf74fa95751d2ce7e61f394243d5f5e19329927 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 21 Jan 2017 22:06:49 -0800 Subject: [PATCH 56/73] optimized pool allocation by 1 slot --- lib/compress/zstdmt_compress.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 1ddb5387..8de54d4a 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -13,6 +13,8 @@ # define ZSTDMT_SECTION_LOGSIZE_MIN 20 /* minimum size for a full compression job (20==2^20==1 MB) */ #endif +#define ZSTDMT_NBTHREADS_MAX 128 + /* ====== Compiler specifics ====== */ #if defined(_MSC_VER) @@ -77,8 +79,6 @@ if (g_debugLevel>=MUTEX_WAIT_TIME_DLEVEL) { \ #endif -#define ZSTDMT_NBTHREADS_MAX 128 - /* ===== Buffer Pool ===== */ typedef struct buffer_s { @@ -97,9 +97,10 @@ typedef struct ZSTDMT_bufferPool_s { static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads) { unsigned const maxNbBuffers = 2*nbThreads + 2; - ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)calloc(1, sizeof(ZSTDMT_bufferPool) + maxNbBuffers * sizeof(buffer_t)); + ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)calloc(1, sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t)); if (bufPool==NULL) return NULL; bufPool->totalBuffers = maxNbBuffers; + bufPool->nbBuffers = 0; return bufPool; } @@ -164,9 +165,11 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) free(pool); } +/* ZSTDMT_createCCtxPool() : + * implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads) { - ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + nbThreads*sizeof(ZSTD_CCtx*)); + ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*)); if (!cctxPool) return NULL; cctxPool->totalCCtx = nbThreads; cctxPool->availCCtx = 0; From 9d6f7637ecb95372386aa836ca0f57cded446da4 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sat, 21 Jan 2017 22:14:08 -0800 Subject: [PATCH 57/73] protected (mutex) read to jobCompleted, as suggested by @terrelln --- lib/compress/zstdmt_compress.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 8de54d4a..4e09a208 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -541,8 +541,12 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu /* check if there is any data available to flush */ { unsigned const jobID = zcs->doneJobID & zcs->jobIDMask; - ZSTDMT_jobDescription job = zcs->jobs[jobID]; - if (job.jobCompleted) { /* job completed : output can be flushed */ + unsigned jobCompleted; + pthread_mutex_lock(&zcs->jobCompleted_mutex); + jobCompleted = zcs->jobs[jobID].jobCompleted; + pthread_mutex_unlock(&zcs->jobCompleted_mutex); + if (jobCompleted) { + ZSTDMT_jobDescription const job = zcs->jobs[jobID]; size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); DEBUGLOG(1, "flush %u bytes from job %u ", (U32)toWrite, zcs->doneJobID); ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); @@ -556,15 +560,13 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu } memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); output->pos += toWrite; - job.dstFlushed += toWrite; + zcs->jobs[jobID].dstFlushed += toWrite; DEBUGLOG(1, "remaining : %u bytes ", (U32)(job.cSize - job.dstFlushed)); - if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => go to next one */ + if (zcs->jobs[jobID].dstFlushed == job.cSize) { /* output buffer fully flushed => go to next one */ ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); zcs->jobs[jobID].dstBuff = g_nullBuffer; zcs->jobs[jobID].jobCompleted = 0; zcs->doneJobID++; - } else { - zcs->jobs[jobID].dstFlushed = job.dstFlushed; /* save flush level into zcs for later retrieval */ } } } /* recommended next input size : fill current input buffer */ From bd6bc2261237526ab133e5f1ab7262fa91745f69 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 22 Jan 2017 15:54:14 -0800 Subject: [PATCH 58/73] playtest.sh : changed sdiff into $DIFF --- tests/playTests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/playTests.sh b/tests/playTests.sh index 5bb882aa..35731f9c 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -135,14 +135,14 @@ $ZSTD -c world.tmp > world.zstd cat hello.zstd world.zstd > helloworld.zstd $ZSTD -dc helloworld.zstd > result.tmp cat result.tmp -sdiff helloworld.tmp result.tmp +$DIFF helloworld.tmp result.tmp $ECHO "frame concatenation without checksum" $ZSTD -c hello.tmp > hello.zstd --no-check $ZSTD -c world.tmp > world.zstd --no-check cat hello.zstd world.zstd > helloworld.zstd $ZSTD -dc helloworld.zstd > result.tmp cat result.tmp -sdiff helloworld.tmp result.tmp +$DIFF helloworld.tmp result.tmp rm ./*.tmp ./*.zstd $ECHO "frame concatenation tests completed" From c5933487227fcd9262d84efd2a0a4e5432b0c41c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 22 Jan 2017 16:40:06 -0800 Subject: [PATCH 59/73] ZSTDMT_initCStream_usingDict() can outlive dict Like ZSTD_initCStream_usingDict(), ZSTDMT_initCStream_usingDict() now keep a copy of dict internally. This way, dict can be released : it does not longer have to outlive all future compression sessions. --- lib/compress/zstd_compress.c | 3 +- lib/compress/zstdmt_compress.c | 51 ++++++++++++++++++++++++---------- lib/compress/zstdmt_compress.h | 4 +-- lib/zstd.h | 1 + 4 files changed, 41 insertions(+), 18 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e8a37511..3c69a1ae 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2603,7 +2603,6 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, si } } - /*! ZSTD_compressBegin_internal() : * @return : 0, or an error code */ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, @@ -2825,7 +2824,7 @@ static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) { return ZSTD_getParamsFromCCtx(cdict->refContext); } -size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, U64 pledgedSrcSize) +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize) { if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize)) else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize)); diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 4e09a208..283f7684 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -217,6 +217,7 @@ typedef struct { pthread_mutex_t* jobCompleted_mutex; pthread_cond_t* jobCompleted_cond; ZSTD_parameters params; + ZSTD_CDict* cdict; const void* dict; size_t dictSize; unsigned long long fullFrameSize; @@ -227,8 +228,13 @@ void ZSTDMT_compressChunk(void* jobDescription) { ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; buffer_t const dstBuff = job->dstBuff; - size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->dict, job->dictSize, job->params, job->fullFrameSize); - if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } + if (job->cdict) { + size_t const initError = ZSTD_compressBegin_usingCDict(job->cctx, job->cdict, job->fullFrameSize); + if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } + } else { + size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->dict, job->dictSize, job->params, job->fullFrameSize); + if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } + } if (!job->firstChunk) { /* flush frame header */ size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, 0); if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } @@ -271,8 +277,7 @@ struct ZSTDMT_CCtx_s { unsigned frameEnded; unsigned allJobsCompleted; unsigned long long frameContentSize; - const void* dict; - size_t dictSize; + ZSTD_CDict* cdict; ZSTDMT_jobDescription jobs[1]; /* variable size (must lies at the end) */ }; @@ -324,6 +329,7 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) { if (mtctx==NULL) return 0; /* compatible with free on NULL */ + ZSTD_freeCDict(mtctx->cdict); POOL_free(mtctx->factory); if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */ ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */ @@ -442,8 +448,12 @@ static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs) { } } -size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pledgedSrcSize) { + +static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, + const void* dict, size_t dictSize, unsigned updateDict, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + ZSTD_customMem const cmem = { NULL, NULL, NULL }; if (zcs->allJobsCompleted == 0) { /* previous job not correctly finished */ ZSTDMT_waitForAllJobsCompleted(zcs); ZSTDMT_releaseAllJobResources(zcs); @@ -451,8 +461,12 @@ size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, const void* dict, size_t di } params.fParams.checksumFlag = 0; /* current limitation : no checksum (to be lifted in a later version) */ zcs->params = params; - zcs->dict = dict; - zcs->dictSize = dictSize; + if (updateDict) { + ZSTD_freeCDict(zcs->cdict); zcs->cdict = NULL; + if (dict && dictSize) { + zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, 0, params, cmem); + if (zcs->cdict == NULL) return ERROR(memory_allocation); + } } zcs->frameContentSize = pledgedSrcSize; zcs->targetSectionSize = (size_t)1 << MAX(ZSTDMT_SECTION_LOGSIZE_MIN, (zcs->params.cParams.windowLog + 2)); zcs->inBuffSize = zcs->targetSectionSize + ((size_t)1 << zcs->params.cParams.windowLog); @@ -466,16 +480,23 @@ size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, const void* dict, size_t di return 0; } +size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + return ZSTDMT_initCStream_internal(zcs, dict, dictSize, 1, params, pledgedSrcSize); +} + /* ZSTDMT_resetCStream() : * pledgedSrcSize is optional and can be zero == unknown */ size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize) { - return ZSTDMT_initCStream_advanced(zcs, zcs->dict, zcs->dictSize, zcs->params, pledgedSrcSize); + return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize); } size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0); - return ZSTDMT_initCStream_advanced(zcs, NULL, 0, params, 0); + return ZSTDMT_initCStream_internal(zcs, NULL, 0, 1, params, 0); } @@ -510,8 +531,9 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; zcs->jobs[jobID].srcSize = zcs->targetSectionSize; zcs->jobs[jobID].params = zcs->params; - zcs->jobs[jobID].dict = zcs->nextJobID == 0 ? zcs->dict : NULL; - zcs->jobs[jobID].dictSize = zcs->nextJobID == 0 ? zcs->dictSize : 0; + zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL; + zcs->jobs[jobID].dict = NULL; + zcs->jobs[jobID].dictSize = 0; zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize; zcs->jobs[jobID].dstBuff = dstBuffer; zcs->jobs[jobID].cctx = cctx; @@ -598,8 +620,9 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; zcs->jobs[jobID].srcSize = srcSize; zcs->jobs[jobID].params = zcs->params; - zcs->jobs[jobID].dict = zcs->nextJobID == 0 ? zcs->dict : NULL; - zcs->jobs[jobID].dictSize = zcs->nextJobID == 0 ? zcs->dictSize : 0; + zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL; + zcs->jobs[jobID].dict = NULL; + zcs->jobs[jobID].dictSize = 0; zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize; zcs->jobs[jobID].dstBuff = dstBuffer; zcs->jobs[jobID].cctx = cctx; diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index 7e9d0703..bdc4caab 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -29,8 +29,8 @@ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx, ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel); ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ -ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown ; current limitation : no checksum */ +ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, const void* dict, size_t dictSize, /**< dict can be released after init */ + ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< params current limitation : no checksum ; pledgedSrcSize is optional and can be zero == unknown */ ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); diff --git a/lib/zstd.h b/lib/zstd.h index a0d5c785..52d65206 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -572,6 +572,7 @@ ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize); ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); From 1a2547f6540c8f208fe116072cd935960acfe62a Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 22 Jan 2017 23:49:52 -0800 Subject: [PATCH 60/73] ZSTDMT_compressStream() becomes blocking when required to ensure forward progresses In some (rare) cases, job list could be blocked by a first job still being processed, while all following ones are completed, waiting to be flushed. In such case, the current job-table implementation is unable to accept new job. As a consequence, a call to ZSTDMT_compressStream() can be useless (nothing read, nothing flushed), with the risk to trigger a busy-wait on the caller side (needlessly loop over ZSTDMT_compressStream() ). In such a case, ZSTDMT_compressStream() will block until the first job is completed and ready to flush. It ensures some forward progress by guaranteeing it will flush at least a part of the completed job. Energy-wasting busy-wait is avoided. --- lib/compress/zstdmt_compress.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 283f7684..176f940c 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -565,6 +565,10 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu { unsigned const jobID = zcs->doneJobID & zcs->jobIDMask; unsigned jobCompleted; pthread_mutex_lock(&zcs->jobCompleted_mutex); + while (zcs->jobs[jobID].jobCompleted == 0 && zcs->inBuff.filled == zcs->inBuffSize) { + /* when no new job could be started, block until there is something to flush, ensuring forward progress */ + pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); + } jobCompleted = zcs->jobs[jobID].jobCompleted; pthread_mutex_unlock(&zcs->jobCompleted_mutex); if (jobCompleted) { From 84581ff8d7cb7bcd18091c6bfada1764baa3c206 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 23 Jan 2017 00:56:54 -0800 Subject: [PATCH 61/73] ZSTDMT_compressCCtx : fallback to single-thread mode when nbChunks==1 --- lib/compress/zstdmt_compress.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 176f940c..18ed7441 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -360,6 +360,15 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, DEBUGLOG(2, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize); params.fParams.contentSizeFlag = 1; + if (nbChunks==1) { /* fallback to single-thread mode */ + size_t result; + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool); + if (!cctx) return ERROR(memory_allocation); + result = ZSTD_compressCCtx(mtctx->cctxPool->cctx[0], dst, dstCapacity, src, srcSize, compressionLevel); + ZSTDMT_releaseCCtx(mtctx->cctxPool, cctx); + return result; + } + { unsigned u; for (u=0; udoneJobID < zcs->nextJobID) { unsigned const jobID = zcs->doneJobID & zcs->jobIDMask; @@ -708,6 +715,3 @@ size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output) { return ZSTDMT_flushStream_internal(zcs, output, 1); } - - -#endif From 1cbf251e43c116d06d7e15ca5f77e84fea440be1 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 23 Jan 2017 01:43:58 -0800 Subject: [PATCH 62/73] ZSTDMT streaming : fall back to (regular) single thread mode when nbThreads==1 --- lib/compress/zstdmt_compress.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 18ed7441..135d274f 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -172,7 +172,10 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads) ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) calloc(1, sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*)); if (!cctxPool) return NULL; cctxPool->totalCCtx = nbThreads; - cctxPool->availCCtx = 0; + cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */ + cctxPool->cctx[0] = ZSTD_createCCtx(); + if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; } + DEBUGLOG(1, "cctxPool created, with %u threads", nbThreads); return cctxPool; } @@ -278,6 +281,7 @@ struct ZSTDMT_CCtx_s { unsigned allJobsCompleted; unsigned long long frameContentSize; ZSTD_CDict* cdict; + ZSTD_CStream* cstream; ZSTDMT_jobDescription jobs[1]; /* variable size (must lies at the end) */ }; @@ -287,7 +291,7 @@ ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) U32 const minNbJobs = nbThreads + 2; U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1; U32 const nbJobs = 1 << nbJobsLog2; - DEBUGLOG(4, "nbThreads : %u ; minNbJobs : %u ; nbJobsLog2 : %u ; nbJobs : %u \n", + DEBUGLOG(5, "nbThreads : %u ; minNbJobs : %u ; nbJobsLog2 : %u ; nbJobs : %u \n", nbThreads, minNbJobs, nbJobsLog2, nbJobs); if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL; cctx = (ZSTDMT_CCtx*) calloc(1, sizeof(ZSTDMT_CCtx) + nbJobs*sizeof(ZSTDMT_jobDescription)); @@ -302,8 +306,14 @@ ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) ZSTDMT_freeCCtx(cctx); return NULL; } + if (nbThreads==1) { + cctx->cstream = ZSTD_createCStream(); + if (!cctx->cstream) { + ZSTDMT_freeCCtx(cctx); return NULL; + } } pthread_mutex_init(&cctx->jobCompleted_mutex, NULL); /* Todo : check init function return */ pthread_cond_init(&cctx->jobCompleted_cond, NULL); + DEBUGLOG(4, "mt_cctx created, for %u threads \n", nbThreads); return cctx; } @@ -329,11 +339,12 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) { if (mtctx==NULL) return 0; /* compatible with free on NULL */ - ZSTD_freeCDict(mtctx->cdict); POOL_free(mtctx->factory); if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */ ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */ ZSTDMT_freeCCtxPool(mtctx->cctxPool); + ZSTD_freeCDict(mtctx->cdict); + ZSTD_freeCStream(mtctx->cstream); pthread_mutex_destroy(&mtctx->jobCompleted_mutex); pthread_cond_destroy(&mtctx->jobCompleted_cond); free(mtctx); @@ -361,12 +372,8 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, params.fParams.contentSizeFlag = 1; if (nbChunks==1) { /* fallback to single-thread mode */ - size_t result; - ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool); - if (!cctx) return ERROR(memory_allocation); - result = ZSTD_compressCCtx(mtctx->cctxPool->cctx[0], dst, dstCapacity, src, srcSize, compressionLevel); - ZSTDMT_releaseCCtx(mtctx->cctxPool, cctx); - return result; + ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0]; + return ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel); } { unsigned u; @@ -461,6 +468,7 @@ static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, ZSTD_parameters params, unsigned long long pledgedSrcSize) { ZSTD_customMem const cmem = { NULL, NULL, NULL }; + if (zcs->nbThreads==1) return ZSTD_initCStream_advanced(zcs->cstream, dict, dictSize, params, pledgedSrcSize); if (zcs->allJobsCompleted == 0) { /* previous job not correctly finished */ ZSTDMT_waitForAllJobsCompleted(zcs); ZSTDMT_releaseAllJobResources(zcs); @@ -498,6 +506,7 @@ size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, * pledgedSrcSize is optional and can be zero == unknown */ size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize) { + if (zcs->nbThreads==1) return ZSTD_resetCStream(zcs->cstream, pledgedSrcSize); return ZSTDMT_initCStream_internal(zcs, NULL, 0, 0, zcs->params, pledgedSrcSize); } @@ -510,6 +519,7 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) { if (zcs->frameEnded) return ERROR(stage_wrong); /* current frame being ended. Only flush is allowed. Restart with init */ + if (zcs->nbThreads==1) return ZSTD_compressStream(zcs->cstream, output, input); /* fill input buffer */ { size_t const toLoad = MIN(input->size - input->pos, zcs->inBuffSize - zcs->inBuff.filled); @@ -708,10 +718,12 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output) { + if (zcs->nbThreads==1) return ZSTD_flushStream(zcs->cstream, output); return ZSTDMT_flushStream_internal(zcs, output, 0); } size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output) { + if (zcs->nbThreads==1) return ZSTD_endStream(zcs->cstream, output); return ZSTDMT_flushStream_internal(zcs, output, 1); } From 94364bf87a320fed426b2312adbb74e403dc62e5 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 23 Jan 2017 11:43:51 -0800 Subject: [PATCH 63/73] refactor ZSTDMT streaming flush code now shared by both ZSTDMT_compressStream() and ZSTDMT_flushStream() --- lib/compress/zstdmt_compress.c | 73 +++++++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 10 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 135d274f..9e7754b8 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -244,8 +244,8 @@ void ZSTDMT_compressChunk(void* jobDescription) ZSTD_invalidateRepCodes(job->cctx); } - DEBUGLOG(3, "Compressing : "); - DEBUG_PRINTHEX(3, job->srcStart, 12); + DEBUGLOG(4, "Compressing : "); + DEBUG_PRINTHEX(4, job->srcStart, 12); job->cSize = (job->lastChunk) ? /* last chunk signal */ ZSTD_compressEnd(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, job->srcSize) : ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, job->srcSize); @@ -516,6 +516,54 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { } +/* ZSTDMT_flushNextJob() : + * output : will be updated with amount of data flushed . + * blockToFlush : the function will block and wait if there is no data available to flush . + * @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more */ +static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush) +{ + unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask; + if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */ + PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex); + while (zcs->jobs[wJobID].jobCompleted==0) { + DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID); /* block when nothing available to flush */ + if (!blockToFlush) { pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; } /* nothing ready to be flushed => skip */ + pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); + } + pthread_mutex_unlock(&zcs->jobCompleted_mutex); + /* compression job completed : output can be flushed */ + { ZSTDMT_jobDescription job = zcs->jobs[wJobID]; + size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); + DEBUGLOG(4, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID); + ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); + zcs->jobs[wJobID].cctx = NULL; + ZSTDMT_releaseBuffer(zcs->buffPool, job.src); + zcs->jobs[wJobID].srcStart = NULL; + zcs->jobs[wJobID].src = g_nullBuffer; + if (ZSTD_isError(job.cSize)) { + ZSTDMT_waitForAllJobsCompleted(zcs); + ZSTDMT_releaseAllJobResources(zcs); + return job.cSize; + } + memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); + output->pos += toWrite; + job.dstFlushed += toWrite; + if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */ + ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); + zcs->jobs[wJobID].dstBuff = g_nullBuffer; + zcs->jobs[wJobID].jobCompleted = 0; + zcs->doneJobID++; + } else { + zcs->jobs[wJobID].dstFlushed = job.dstFlushed; + } + /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */ + if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed); + if (zcs->doneJobID < zcs->nextJobID) return 1; /* still some buffer to flush */ + zcs->allJobsCompleted = zcs->frameEnded; /* frame completed and entirely flushed */ + return 0; /* everything flushed */ +} } + + size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) { if (zcs->frameEnded) return ERROR(stage_wrong); /* current frame being ended. Only flush is allowed. Restart with init */ @@ -579,6 +627,8 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu } /* check if there is any data available to flush */ + ZSTDMT_flushNextJob(zcs, output, (zcs->inBuff.filled == zcs->inBuffSize)); /* we'll block if it wasn't possible to create new job due to saturation */ +#if 0 { unsigned const jobID = zcs->doneJobID & zcs->jobIDMask; unsigned jobCompleted; pthread_mutex_lock(&zcs->jobCompleted_mutex); @@ -611,7 +661,7 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu zcs->jobs[jobID].jobCompleted = 0; zcs->doneJobID++; } } } - +#endif /* recommended next input size : fill current input buffer */ return zcs->inBuffSize - zcs->inBuff.filled; } @@ -671,25 +721,27 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp zcs->frameEnded = 1; } - DEBUGLOG(1, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask); + DEBUGLOG(3, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask); POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */ zcs->nextJobID++; } /* check if there is any data available to flush */ - DEBUGLOG(1, "zcs->doneJobID : %u ; zcs->nextJobID : %u ", zcs->doneJobID, zcs->nextJobID); - if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */ + DEBUGLOG(5, "zcs->doneJobID : %u ; zcs->nextJobID : %u ", zcs->doneJobID, zcs->nextJobID); + return ZSTDMT_flushNextJob(zcs, output, 1); + +#if 0 { unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask; PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex); while (zcs->jobs[wJobID].jobCompleted==0) { - DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID); /* we want to block when waiting for data to flush */ + DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID); /* block when nothing available to flush */ pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); } pthread_mutex_unlock(&zcs->jobCompleted_mutex); - { /* job completed : output can be flushed */ - ZSTDMT_jobDescription job = zcs->jobs[wJobID]; + /* compression job completed : output can be flushed */ + { ZSTDMT_jobDescription job = zcs->jobs[wJobID]; size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); - DEBUGLOG(1, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID); + DEBUGLOG(4, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID); ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); zcs->jobs[wJobID].cctx = NULL; /* release cctx for future task */ ZSTDMT_releaseBuffer(zcs->buffPool, job.src); zcs->jobs[wJobID].srcStart = NULL; zcs->jobs[wJobID].src = g_nullBuffer; if (ZSTD_isError(job.cSize)) { @@ -713,6 +765,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp zcs->allJobsCompleted = zcs->frameEnded; return 0; } } +#endif } From 3488a4a473f0631a2ea493bbf8f0fa2637019e4d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 24 Jan 2017 11:48:40 -0800 Subject: [PATCH 64/73] ZSTDMT now supports frame checksum --- lib/compress/zstdmt_compress.c | 144 +++++++++++---------------------- lib/compress/zstdmt_compress.h | 6 +- 2 files changed, 52 insertions(+), 98 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 9e7754b8..0b91ad4e 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -29,6 +29,8 @@ #include "threading.h" /* mutex */ #include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */ #include "zstdmt_compress.h" +#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#include "xxhash.h" /* ====== Debug ====== */ @@ -217,6 +219,7 @@ typedef struct { unsigned firstChunk; unsigned lastChunk; unsigned jobCompleted; + unsigned jobScanned; pthread_mutex_t* jobCompleted_mutex; pthread_cond_t* jobCompleted_cond; ZSTD_parameters params; @@ -254,6 +257,7 @@ void ZSTDMT_compressChunk(void* jobDescription) _endJob: PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex); job->jobCompleted = 1; + job->jobScanned = 0; pthread_cond_signal(job->jobCompleted_cond); pthread_mutex_unlock(job->jobCompleted_mutex); } @@ -273,6 +277,7 @@ struct ZSTDMT_CCtx_s { size_t inBuffSize; inBuff_t inBuff; ZSTD_parameters params; + XXH64_state_t xxhState; unsigned nbThreads; unsigned jobIDMask; unsigned doneJobID; @@ -474,7 +479,6 @@ static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, ZSTDMT_releaseAllJobResources(zcs); zcs->allJobsCompleted = 1; } - params.fParams.checksumFlag = 0; /* current limitation : no checksum (to be lifted in a later version) */ zcs->params = params; if (updateDict) { ZSTD_freeCDict(zcs->cdict); zcs->cdict = NULL; @@ -492,6 +496,7 @@ static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, zcs->nextJobID = 0; zcs->frameEnded = 0; zcs->allJobsCompleted = 0; + if (params.fParams.checksumFlag) XXH64_reset(&zcs->xxhState, 0); return 0; } @@ -518,7 +523,7 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { /* ZSTDMT_flushNextJob() : * output : will be updated with amount of data flushed . - * blockToFlush : the function will block and wait if there is no data available to flush . + * blockToFlush : if >0, the function will block and wait if there is no data available to flush . * @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more */ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush) { @@ -526,28 +531,43 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */ PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex); while (zcs->jobs[wJobID].jobCompleted==0) { - DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID); /* block when nothing available to flush */ - if (!blockToFlush) { pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; } /* nothing ready to be flushed => skip */ - pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); + DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID); + if (!blockToFlush) { pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; } /* nothing ready to be flushed => skip */ + pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); /* block when nothing available to flush */ } pthread_mutex_unlock(&zcs->jobCompleted_mutex); /* compression job completed : output can be flushed */ { ZSTDMT_jobDescription job = zcs->jobs[wJobID]; - size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); - DEBUGLOG(4, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID); - ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); - zcs->jobs[wJobID].cctx = NULL; - ZSTDMT_releaseBuffer(zcs->buffPool, job.src); - zcs->jobs[wJobID].srcStart = NULL; - zcs->jobs[wJobID].src = g_nullBuffer; - if (ZSTD_isError(job.cSize)) { - ZSTDMT_waitForAllJobsCompleted(zcs); - ZSTDMT_releaseAllJobResources(zcs); - return job.cSize; + if (!job.jobScanned) { + if (ZSTD_isError(job.cSize)) { + DEBUGLOG(5, "compression error detected "); + ZSTDMT_waitForAllJobsCompleted(zcs); + ZSTDMT_releaseAllJobResources(zcs); + return job.cSize; + } + ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); + zcs->jobs[wJobID].cctx = NULL; + DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag); + if (zcs->params.fParams.checksumFlag) { + XXH64_update(&zcs->xxhState, job.srcStart, job.srcSize); + if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */ + U32 const checksum = (U32)XXH64_digest(&zcs->xxhState); + DEBUGLOG(4, "writing checksum : %08X \n", checksum); + MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum); + job.cSize += 4; + zcs->jobs[wJobID].cSize += 4; + } } + ZSTDMT_releaseBuffer(zcs->buffPool, job.src); + zcs->jobs[wJobID].srcStart = NULL; + zcs->jobs[wJobID].src = g_nullBuffer; + zcs->jobs[wJobID].jobScanned = 1; + } + { size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); + DEBUGLOG(4, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID); + memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); + output->pos += toWrite; + job.dstFlushed += toWrite; } - memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); - output->pos += toWrite; - job.dstFlushed += toWrite; if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */ ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); zcs->jobs[wJobID].dstBuff = g_nullBuffer; @@ -583,7 +603,7 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool); unsigned const jobID = zcs->nextJobID & zcs->jobIDMask; - if ((cctx==NULL) || (dstBuffer.start==NULL)) { + if ((cctx==NULL) || (dstBuffer.start==NULL)) { /* cannot get resources for next job */ zcs->jobs[jobID].jobCompleted = 1; zcs->nextJobID++; ZSTDMT_waitForAllJobsCompleted(zcs); @@ -591,11 +611,12 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu return ERROR(memory_allocation); } - DEBUGLOG(1, "preparing job %u to compress %u bytes \n", (U32)zcs->nextJobID, (U32)zcs->targetSectionSize); + DEBUGLOG(4, "preparing job %u to compress %u bytes \n", (U32)zcs->nextJobID, (U32)zcs->targetSectionSize); zcs->jobs[jobID].src = zcs->inBuff.buffer; zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; zcs->jobs[jobID].srcSize = zcs->targetSectionSize; zcs->jobs[jobID].params = zcs->params; + if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; /* do not calculate checksum within sections, just keep it in header for first section */ zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL; zcs->jobs[jobID].dict = NULL; zcs->jobs[jobID].dictSize = 0; @@ -626,44 +647,11 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu zcs->nextJobID++; } - /* check if there is any data available to flush */ + /* check for data to flush */ ZSTDMT_flushNextJob(zcs, output, (zcs->inBuff.filled == zcs->inBuffSize)); /* we'll block if it wasn't possible to create new job due to saturation */ -#if 0 - { unsigned const jobID = zcs->doneJobID & zcs->jobIDMask; - unsigned jobCompleted; - pthread_mutex_lock(&zcs->jobCompleted_mutex); - while (zcs->jobs[jobID].jobCompleted == 0 && zcs->inBuff.filled == zcs->inBuffSize) { - /* when no new job could be started, block until there is something to flush, ensuring forward progress */ - pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); - } - jobCompleted = zcs->jobs[jobID].jobCompleted; - pthread_mutex_unlock(&zcs->jobCompleted_mutex); - if (jobCompleted) { - ZSTDMT_jobDescription const job = zcs->jobs[jobID]; - size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); - DEBUGLOG(1, "flush %u bytes from job %u ", (U32)toWrite, zcs->doneJobID); - ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); - zcs->jobs[jobID].cctx = NULL; - ZSTDMT_releaseBuffer(zcs->buffPool, job.src); - zcs->jobs[jobID].srcStart = NULL; zcs->jobs[jobID].src = g_nullBuffer; - if (ZSTD_isError(job.cSize)) { - ZSTDMT_waitForAllJobsCompleted(zcs); - ZSTDMT_releaseAllJobResources(zcs); - return job.cSize; - } - memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); - output->pos += toWrite; - zcs->jobs[jobID].dstFlushed += toWrite; - DEBUGLOG(1, "remaining : %u bytes ", (U32)(job.cSize - job.dstFlushed)); - if (zcs->jobs[jobID].dstFlushed == job.cSize) { /* output buffer fully flushed => go to next one */ - ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); - zcs->jobs[jobID].dstBuff = g_nullBuffer; - zcs->jobs[jobID].jobCompleted = 0; - zcs->doneJobID++; - } } } -#endif + /* recommended next input size : fill current input buffer */ - return zcs->inBuffSize - zcs->inBuff.filled; + return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */ } @@ -671,7 +659,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp { size_t const srcSize = zcs->inBuff.filled; - DEBUGLOG(1, "flushing : %u bytes to compress", (U32)srcSize); + DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize); if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded)) && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { size_t const dstBufferCapacity = ZSTD_compressBound(srcSize); @@ -691,6 +679,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; zcs->jobs[jobID].srcSize = srcSize; zcs->jobs[jobID].params = zcs->params; + if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; /* do not calculate checksum within sections, just keep it in header for first section */ zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL; zcs->jobs[jobID].dict = NULL; zcs->jobs[jobID].dictSize = 0; @@ -719,6 +708,8 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp zcs->inBuff.buffer = g_nullBuffer; zcs->inBuff.filled = 0; zcs->frameEnded = 1; + if (zcs->nextJobID == 0) + zcs->params.fParams.checksumFlag = 0; /* single chunk : checksum is calculated directly within worker thread */ } DEBUGLOG(3, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask); @@ -729,43 +720,6 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp /* check if there is any data available to flush */ DEBUGLOG(5, "zcs->doneJobID : %u ; zcs->nextJobID : %u ", zcs->doneJobID, zcs->nextJobID); return ZSTDMT_flushNextJob(zcs, output, 1); - -#if 0 - { unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask; - PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex); - while (zcs->jobs[wJobID].jobCompleted==0) { - DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID); /* block when nothing available to flush */ - pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); - } - pthread_mutex_unlock(&zcs->jobCompleted_mutex); - /* compression job completed : output can be flushed */ - { ZSTDMT_jobDescription job = zcs->jobs[wJobID]; - size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); - DEBUGLOG(4, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID); - ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); zcs->jobs[wJobID].cctx = NULL; /* release cctx for future task */ - ZSTDMT_releaseBuffer(zcs->buffPool, job.src); zcs->jobs[wJobID].srcStart = NULL; zcs->jobs[wJobID].src = g_nullBuffer; - if (ZSTD_isError(job.cSize)) { - ZSTDMT_waitForAllJobsCompleted(zcs); - ZSTDMT_releaseAllJobResources(zcs); - return job.cSize; - } - memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite); - output->pos += toWrite; - job.dstFlushed += toWrite; - if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => next one */ - ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); zcs->jobs[wJobID].dstBuff = g_nullBuffer; - zcs->jobs[wJobID].jobCompleted = 0; - zcs->doneJobID++; - } else { - zcs->jobs[wJobID].dstFlushed = job.dstFlushed; - } - /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */ - if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed); - if ((zcs->doneJobID < zcs->nextJobID) || (zcs->inBuff.filled)) return 1; /* still some buffer to flush */ - zcs->allJobsCompleted = zcs->frameEnded; - return 0; - } } -#endif } diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index bdc4caab..84d25f73 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -28,9 +28,9 @@ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx, /* === Streaming functions === */ ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel); -ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ -ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, const void* dict, size_t dictSize, /**< dict can be released after init */ - ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< params current limitation : no checksum ; pledgedSrcSize is optional and can be zero == unknown */ +ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ +ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, const void* dict, size_t dictSize, /**< dict can be released after init, a local copy is preserved within zcs */ + ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); From 512cbe8c10b59b957ecb107b119af95720b6d470 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 24 Jan 2017 17:02:26 -0800 Subject: [PATCH 65/73] zstdmt cli and API allow selection of section sizes By default, section sizes are 4x window size. This new setting allow manual selection of section sizes. The larger they are, the (slightly) better the compression ratio, but also the higher the memory allocation cost, and eventually the lesser the nb of possible threads, since each section is compressed by a single thread. It also introduces a prototype to set generic parameters, ZSTDMT_setMTCtxParameter() The idea is that it's possible to add enums to extend the list of parameters that can be set this way. This is more long-term oriented than a fixed-size struct. Consider it as a test. --- lib/compress/zstdmt_compress.c | 25 +++++++++++++++++----- lib/compress/zstdmt_compress.h | 38 +++++++++++++++++++++++++++------- programs/fileio.c | 14 ++++++++++++- programs/fileio.h | 1 + programs/zstdcli.c | 2 ++ 5 files changed, 67 insertions(+), 13 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 0b91ad4e..1baccf0f 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -9,10 +9,6 @@ /* ====== Tuning parameters ====== */ -#ifndef ZSTDMT_SECTION_LOGSIZE_MIN -# define ZSTDMT_SECTION_LOGSIZE_MIN 20 /* minimum size for a full compression job (20==2^20==1 MB) */ -#endif - #define ZSTDMT_NBTHREADS_MAX 128 @@ -285,6 +281,7 @@ struct ZSTDMT_CCtx_s { unsigned frameEnded; unsigned allJobsCompleted; unsigned long long frameContentSize; + size_t sectionSize; ZSTD_CDict* cdict; ZSTD_CStream* cstream; ZSTDMT_jobDescription jobs[1]; /* variable size (must lies at the end) */ @@ -304,6 +301,7 @@ ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) cctx->nbThreads = nbThreads; cctx->jobIDMask = nbJobs - 1; cctx->allJobsCompleted = 1; + cctx->sectionSize = 0; cctx->factory = POOL_create(nbThreads, 1); cctx->buffPool = ZSTDMT_createBufferPool(nbThreads); cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads); @@ -356,6 +354,22 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) return 0; } +unsigned ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value) +{ + switch(parameter) + { + case ZSTDMT_p_sectionSize : + mtctx->sectionSize = value; + return 0; + default : + return ERROR(compressionParameter_unsupported); + } +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, void* dst, size_t dstCapacity, @@ -487,7 +501,8 @@ static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, if (zcs->cdict == NULL) return ERROR(memory_allocation); } } zcs->frameContentSize = pledgedSrcSize; - zcs->targetSectionSize = (size_t)1 << MAX(ZSTDMT_SECTION_LOGSIZE_MIN, (zcs->params.cParams.windowLog + 2)); + zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2); + zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize); zcs->inBuffSize = zcs->targetSectionSize + ((size_t)1 << zcs->params.cParams.windowLog); zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation); diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index 84d25f73..c00782e9 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -7,6 +7,10 @@ * of patent rights can be found in the PATENTS file in the same directory. */ + +/* Note : All prototypes defined in this file shall be considered experimental. + * There is no guarantee of API continuity (yet) on any of these prototypes */ + /* === Dependencies === */ #include /* size_t */ #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */ @@ -27,12 +31,32 @@ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx, /* === Streaming functions === */ -ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel); -ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ -ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* zcs, const void* dict, size_t dictSize, /**< dict can be released after init, a local copy is preserved within zcs */ - ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ +ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel); +ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ -ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ -ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ + + +/* === Advanced functions and parameters === */ + +#ifndef ZSTDMT_SECTION_SIZE_MIN +# define ZSTDMT_SECTION_SIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */ +#endif + +ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, const void* dict, size_t dictSize, /**< dict can be released after init, a local copy is preserved within zcs */ + ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ + +/* ZSDTMT_parameter : + * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */ +typedef enum { ZSTDMT_p_sectionSize /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */ + } ZSDTMT_parameter; + +/* ZSTDMT_setMTCtxParameter() : + * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter. + * The function must be called typically after ZSTD_createCCtx(). + * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ +ZSTDLIB_API unsigned ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value); diff --git a/programs/fileio.c b/programs/fileio.c index 3864a5fa..86db12ac 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -113,6 +113,16 @@ void FIO_setNbThreads(unsigned nbThreads) { #endif g_nbThreads = nbThreads; } +static U32 g_blockSize = 0; +void FIO_setBlockSize(unsigned blockSize) { + if (blockSize && g_nbThreads==1) + DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n"); +#ifdef ZSTD_MULTITHREAD + if (blockSize-1 < ZSTDMT_SECTION_SIZE_MIN-1) /* intentional underflow */ + DISPLAYLEVEL(2, "Note : minimum block size is %u KB \n", (ZSTDMT_SECTION_SIZE_MIN>>10)); +#endif + g_blockSize = blockSize; +} /*-************************************* @@ -283,10 +293,12 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel, if (comprParams->strategy) params.cParams.strategy = (ZSTD_strategy)(comprParams->strategy - 1); #ifdef ZSTD_MULTITHREAD { size_t const errorCode = ZSTDMT_initCStream_advanced(ress.cctx, dictBuffer, dictBuffSize, params, srcSize); + if (ZSTD_isError(errorCode)) EXM_THROW(33, "Error initializing CStream : %s", ZSTD_getErrorName(errorCode)); + ZSTDMT_setMTCtxParameter(ress.cctx, ZSTDMT_p_sectionSize, g_blockSize); #else { size_t const errorCode = ZSTD_initCStream_advanced(ress.cctx, dictBuffer, dictBuffSize, params, srcSize); -#endif if (ZSTD_isError(errorCode)) EXM_THROW(33, "Error initializing CStream : %s", ZSTD_getErrorName(errorCode)); +#endif } } free(dictBuffer); } diff --git a/programs/fileio.h b/programs/fileio.h index 9ef44929..19f09c33 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -41,6 +41,7 @@ void FIO_setChecksumFlag(unsigned checksumFlag); void FIO_setRemoveSrcFile(unsigned flag); void FIO_setMemLimit(unsigned memLimit); void FIO_setNbThreads(unsigned nbThreads); +void FIO_setBlockSize(unsigned blockSize); /*-************************************* diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 785ecede..549dad01 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -118,6 +118,7 @@ static int usage_advanced(const char* programName) DISPLAY( "--[no-]check : integrity check (default:enabled) \n"); #ifdef ZSTD_MULTITHREAD DISPLAY( " -T# : use # threads for compression (default:1) \n"); + DISPLAY( " -B# : select size of independent sections (default:0==automatic) \n"); #endif #endif #ifndef ZSTD_NODECOMPRESS @@ -625,6 +626,7 @@ int main(int argCount, const char* argv[]) if (operation==zom_compress) { #ifndef ZSTD_NOCOMPRESS FIO_setNbThreads(nbThreads); + FIO_setBlockSize((U32)blockSize); if ((filenameIdx==1) && outFileName) operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, &compressionParams); else From f14a669054dc5bc88ed6ecae31c1304d6d10e75f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 24 Jan 2017 17:41:49 -0800 Subject: [PATCH 66/73] refactor job creation code shared accross ZSTDMT_{compress,flush,end}Stream(), for easier maintenance --- lib/compress/zstdmt_compress.c | 167 +++++++++++++-------------------- 1 file changed, 65 insertions(+), 102 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 1baccf0f..e5790807 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -536,10 +536,71 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { } +static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame) +{ + size_t const dstBufferCapacity = ZSTD_compressBound(srcSize); + buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity); + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool); + unsigned const jobID = zcs->nextJobID & zcs->jobIDMask; + + if ((cctx==NULL) || (dstBuffer.start==NULL)) { + zcs->jobs[jobID].jobCompleted = 1; + zcs->nextJobID++; + ZSTDMT_waitForAllJobsCompleted(zcs); + ZSTDMT_releaseAllJobResources(zcs); + return ERROR(memory_allocation); + } + + DEBUGLOG(4, "preparing job %u to compress %u bytes \n", zcs->nextJobID, (U32)srcSize); + zcs->jobs[jobID].src = zcs->inBuff.buffer; + zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; + zcs->jobs[jobID].srcSize = srcSize; + zcs->jobs[jobID].params = zcs->params; + if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; /* do not calculate checksum within sections, just keep it in header for first section */ + zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL; + zcs->jobs[jobID].dict = NULL; + zcs->jobs[jobID].dictSize = 0; + zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize; + zcs->jobs[jobID].dstBuff = dstBuffer; + zcs->jobs[jobID].cctx = cctx; + zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0); + zcs->jobs[jobID].lastChunk = endFrame; + zcs->jobs[jobID].jobCompleted = 0; + zcs->jobs[jobID].dstFlushed = 0; + zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex; + zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond; + + /* get a new buffer for next input */ + if (!endFrame) { + zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); + if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */ + zcs->jobs[jobID].jobCompleted = 1; + zcs->nextJobID++; + ZSTDMT_waitForAllJobsCompleted(zcs); + ZSTDMT_releaseAllJobResources(zcs); + return ERROR(memory_allocation); + } + zcs->inBuff.filled -= srcSize; + memcpy(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + srcSize, zcs->inBuff.filled); + } else { + zcs->inBuff.buffer = g_nullBuffer; + zcs->inBuff.filled = 0; + zcs->frameEnded = 1; + if (zcs->nextJobID == 0) + zcs->params.fParams.checksumFlag = 0; /* single chunk : checksum is calculated directly within worker thread */ + } + + DEBUGLOG(3, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask); + POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */ + zcs->nextJobID++; + return 0; +} + + /* ZSTDMT_flushNextJob() : * output : will be updated with amount of data flushed . * blockToFlush : if >0, the function will block and wait if there is no data available to flush . - * @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more */ + * @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more, or an error code */ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush) { unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask; @@ -613,57 +674,11 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu if ( (zcs->inBuff.filled == zcs->inBuffSize) /* filled enough : let's compress */ && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { /* avoid overwriting job round buffer */ - size_t const dstBufferCapacity = ZSTD_compressBound(zcs->targetSectionSize); - buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity); - ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool); - unsigned const jobID = zcs->nextJobID & zcs->jobIDMask; - - if ((cctx==NULL) || (dstBuffer.start==NULL)) { /* cannot get resources for next job */ - zcs->jobs[jobID].jobCompleted = 1; - zcs->nextJobID++; - ZSTDMT_waitForAllJobsCompleted(zcs); - ZSTDMT_releaseAllJobResources(zcs); - return ERROR(memory_allocation); - } - - DEBUGLOG(4, "preparing job %u to compress %u bytes \n", (U32)zcs->nextJobID, (U32)zcs->targetSectionSize); - zcs->jobs[jobID].src = zcs->inBuff.buffer; - zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; - zcs->jobs[jobID].srcSize = zcs->targetSectionSize; - zcs->jobs[jobID].params = zcs->params; - if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; /* do not calculate checksum within sections, just keep it in header for first section */ - zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL; - zcs->jobs[jobID].dict = NULL; - zcs->jobs[jobID].dictSize = 0; - zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize; - zcs->jobs[jobID].dstBuff = dstBuffer; - zcs->jobs[jobID].cctx = cctx; - zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0); - zcs->jobs[jobID].lastChunk = 0; - zcs->jobs[jobID].jobCompleted = 0; - zcs->jobs[jobID].dstFlushed = 0; - zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex; - zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond; - - /* get a new buffer for next input - save remaining into it */ - zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); - if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */ - zcs->jobs[jobID].jobCompleted = 1; - zcs->nextJobID++; - ZSTDMT_waitForAllJobsCompleted(zcs); - ZSTDMT_releaseAllJobResources(zcs); - return ERROR(memory_allocation); - } - zcs->inBuff.filled = (U32)(zcs->inBuffSize - zcs->targetSectionSize); - memcpy(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->targetSectionSize, zcs->inBuff.filled); - - DEBUGLOG(3, "posting job %u (%u bytes) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask); - POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* This call is blocking if all workers are busy */ - zcs->nextJobID++; + CHECK_F( ZSTDMT_createCompressionJob(zcs, zcs->targetSectionSize, 0) ); } /* check for data to flush */ - ZSTDMT_flushNextJob(zcs, output, (zcs->inBuff.filled == zcs->inBuffSize)); /* we'll block if it wasn't possible to create new job due to saturation */ + CHECK_F( ZSTDMT_flushNextJob(zcs, output, (zcs->inBuff.filled == zcs->inBuffSize)) ); /* block if it wasn't possible to create new job due to saturation */ /* recommended next input size : fill current input buffer */ return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */ @@ -677,59 +692,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize); if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded)) && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { - size_t const dstBufferCapacity = ZSTD_compressBound(srcSize); - buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity); - ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool); - unsigned const jobID = zcs->nextJobID & zcs->jobIDMask; - - if ((cctx==NULL) || (dstBuffer.start==NULL)) { - zcs->jobs[jobID].jobCompleted = 1; - zcs->nextJobID++; - ZSTDMT_waitForAllJobsCompleted(zcs); - ZSTDMT_releaseAllJobResources(zcs); - return ERROR(memory_allocation); - } - - zcs->jobs[jobID].src = zcs->inBuff.buffer; - zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; - zcs->jobs[jobID].srcSize = srcSize; - zcs->jobs[jobID].params = zcs->params; - if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; /* do not calculate checksum within sections, just keep it in header for first section */ - zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL; - zcs->jobs[jobID].dict = NULL; - zcs->jobs[jobID].dictSize = 0; - zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize; - zcs->jobs[jobID].dstBuff = dstBuffer; - zcs->jobs[jobID].cctx = cctx; - zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0); - zcs->jobs[jobID].lastChunk = endFrame; - zcs->jobs[jobID].jobCompleted = 0; - zcs->jobs[jobID].dstFlushed = 0; - zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex; - zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond; - - /* get a new buffer for next input */ - if (!endFrame) { - zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); - zcs->inBuff.filled = 0; - if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */ - zcs->jobs[jobID].jobCompleted = 1; - zcs->nextJobID++; - ZSTDMT_waitForAllJobsCompleted(zcs); - ZSTDMT_releaseAllJobResources(zcs); - return ERROR(memory_allocation); - } - } else { - zcs->inBuff.buffer = g_nullBuffer; - zcs->inBuff.filled = 0; - zcs->frameEnded = 1; - if (zcs->nextJobID == 0) - zcs->params.fParams.checksumFlag = 0; /* single chunk : checksum is calculated directly within worker thread */ - } - - DEBUGLOG(3, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, (U32)zcs->jobs[jobID].srcSize, zcs->jobs[jobID].lastChunk, zcs->doneJobID, zcs->doneJobID & zcs->jobIDMask); - POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */ - zcs->nextJobID++; + CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) ); } /* check if there is any data available to flush */ From dc8dae596a5ddc52d8fe98491855119999a8b8e2 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 24 Jan 2017 22:32:12 -0800 Subject: [PATCH 67/73] overlapped section, for improved compression Sections 2+ read a bit of data from previous section in order to improve compression ratio. This also costs some CPU, to reference read data. Read data is currently fixed to window>>3 size --- lib/compress/zstdmt_compress.c | 43 +++++++++++++++++++++------------- lib/compress/zstdmt_compress.h | 2 +- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index e5790807..99b2e68f 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -209,6 +209,7 @@ typedef struct { buffer_t src; const void* srcStart; size_t srcSize; + size_t dictSize; buffer_t dstBuff; size_t cSize; size_t dstFlushed; @@ -220,8 +221,6 @@ typedef struct { pthread_cond_t* jobCompleted_cond; ZSTD_parameters params; ZSTD_CDict* cdict; - const void* dict; - size_t dictSize; unsigned long long fullFrameSize; } ZSTDMT_jobDescription; @@ -229,16 +228,18 @@ typedef struct { void ZSTDMT_compressChunk(void* jobDescription) { ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; + const void* const src = (const char*)job->srcStart + job->dictSize; buffer_t const dstBuff = job->dstBuff; + DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize); if (job->cdict) { size_t const initError = ZSTD_compressBegin_usingCDict(job->cctx, job->cdict, job->fullFrameSize); if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } } else { - size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->dict, job->dictSize, job->params, job->fullFrameSize); + size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize); if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } } if (!job->firstChunk) { /* flush frame header */ - size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, 0); + size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0); if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } ZSTD_invalidateRepCodes(job->cctx); } @@ -246,8 +247,8 @@ void ZSTDMT_compressChunk(void* jobDescription) DEBUGLOG(4, "Compressing : "); DEBUG_PRINTHEX(4, job->srcStart, 12); job->cSize = (job->lastChunk) ? /* last chunk signal */ - ZSTD_compressEnd(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, job->srcSize) : - ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, job->srcStart, job->srcSize); + ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) : + ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize); DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk); _endJob: @@ -271,6 +272,8 @@ struct ZSTDMT_CCtx_s { pthread_cond_t jobCompleted_cond; size_t targetSectionSize; size_t inBuffSize; + size_t dictSize; + size_t targetDictSize; inBuff_t inBuff; ZSTD_parameters params; XXH64_state_t xxhState; @@ -354,7 +357,7 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) return 0; } -unsigned ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value) +size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value) { switch(parameter) { @@ -503,10 +506,14 @@ static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, zcs->frameContentSize = pledgedSrcSize; zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2); zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize); - zcs->inBuffSize = zcs->targetSectionSize + ((size_t)1 << zcs->params.cParams.windowLog); + //zcs->targetDictSize = ((size_t)1 << zcs->params.cParams.windowLog); /* full window size, for test */ + zcs->targetDictSize = ((size_t)1 << zcs->params.cParams.windowLog) >> 3; /* fixed currently */ + //zcs->targetDictSize = 0; + zcs->inBuffSize = zcs->targetSectionSize + ((size_t)1 << zcs->params.cParams.windowLog) /* margin */ + zcs->targetDictSize; zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation); zcs->inBuff.filled = 0; + zcs->dictSize = 0; zcs->doneJobID = 0; zcs->nextJobID = 0; zcs->frameEnded = 0; @@ -551,15 +558,14 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi return ERROR(memory_allocation); } - DEBUGLOG(4, "preparing job %u to compress %u bytes \n", zcs->nextJobID, (U32)srcSize); + DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ", zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize); zcs->jobs[jobID].src = zcs->inBuff.buffer; zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start; zcs->jobs[jobID].srcSize = srcSize; + zcs->jobs[jobID].dictSize = zcs->dictSize; /* note : zcs->inBuff.filled is presumed >= srcSize + dictSize */ zcs->jobs[jobID].params = zcs->params; if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; /* do not calculate checksum within sections, just keep it in header for first section */ zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL; - zcs->jobs[jobID].dict = NULL; - zcs->jobs[jobID].dictSize = 0; zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize; zcs->jobs[jobID].dstBuff = dstBuffer; zcs->jobs[jobID].cctx = cctx; @@ -572,6 +578,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi /* get a new buffer for next input */ if (!endFrame) { + size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize); zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */ zcs->jobs[jobID].jobCompleted = 1; @@ -580,8 +587,12 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi ZSTDMT_releaseAllJobResources(zcs); return ERROR(memory_allocation); } - zcs->inBuff.filled -= srcSize; - memcpy(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + srcSize, zcs->inBuff.filled); + DEBUGLOG(5, "inBuff filled to %u", (U32)zcs->inBuff.filled); + zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize; + DEBUGLOG(5, "new job : filled to %u, with %u dict and %u src", (U32)zcs->inBuff.filled, (U32)newDictSize, (U32)(zcs->inBuff.filled - newDictSize)); + memmove(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize, zcs->inBuff.filled); + DEBUGLOG(5, "new inBuff pre-filled"); + zcs->dictSize = newDictSize; } else { zcs->inBuff.buffer = g_nullBuffer; zcs->inBuff.filled = 0; @@ -625,7 +636,7 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi zcs->jobs[wJobID].cctx = NULL; DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag); if (zcs->params.fParams.checksumFlag) { - XXH64_update(&zcs->xxhState, job.srcStart, job.srcSize); + XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize); if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */ U32 const checksum = (U32)XXH64_digest(&zcs->xxhState); DEBUGLOG(4, "writing checksum : %08X \n", checksum); @@ -689,10 +700,10 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* outp { size_t const srcSize = zcs->inBuff.filled; - DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize); + if (srcSize) DEBUGLOG(1, "flushing : %u bytes left to compress", (U32)srcSize); if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded)) && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { - CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) ); + CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize - zcs->dictSize, endFrame) ); } /* check if there is any data available to flush */ diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index c00782e9..1288c1ed 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -59,4 +59,4 @@ typedef enum { ZSTDMT_p_sectionSize /* size of input "section". Each section * The function must be called typically after ZSTD_createCCtx(). * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions. * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ -ZSTDLIB_API unsigned ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value); +ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value); From 943cff9c37e999c77ccd11cf5dec29a6f010fe3d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 25 Jan 2017 12:31:07 -0800 Subject: [PATCH 68/73] fixed zstdmt cli freeze issue with large nb of threads fileio.c was continually pushing more content without giving a chance to flush compressed one. It would block the job queue when input data was accumulated too fast (requiring to define many threads). Fixed : fileio flushes whatever it can after each input attempt. --- lib/compress/zstdmt_compress.c | 10 +++++++--- programs/fileio.c | 15 +++++++-------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 99b2e68f..04e0adfc 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -233,6 +233,7 @@ void ZSTDMT_compressChunk(void* jobDescription) DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize); if (job->cdict) { size_t const initError = ZSTD_compressBegin_usingCDict(job->cctx, job->cdict, job->fullFrameSize); + if (job->cdict) DEBUGLOG(3, "using CDict "); if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } } else { size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize); @@ -296,6 +297,7 @@ ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) U32 const minNbJobs = nbThreads + 2; U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1; U32 const nbJobs = 1 << nbJobsLog2; + //nbThreads = 1; /* for tests */ DEBUGLOG(5, "nbThreads : %u ; minNbJobs : %u ; nbJobsLog2 : %u ; nbJobs : %u \n", nbThreads, minNbJobs, nbJobsLog2, nbJobs); if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL; @@ -490,6 +492,7 @@ static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, ZSTD_parameters params, unsigned long long pledgedSrcSize) { ZSTD_customMem const cmem = { NULL, NULL, NULL }; + DEBUGLOG(3, "Started new compression, with windowLog : %u", params.cParams.windowLog); if (zcs->nbThreads==1) return ZSTD_initCStream_advanced(zcs->cstream, dict, dictSize, params, pledgedSrcSize); if (zcs->allJobsCompleted == 0) { /* previous job not correctly finished */ ZSTDMT_waitForAllJobsCompleted(zcs); @@ -596,6 +599,7 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi } else { zcs->inBuff.buffer = g_nullBuffer; zcs->inBuff.filled = 0; + zcs->dictSize = 0; zcs->frameEnded = 1; if (zcs->nextJobID == 0) zcs->params.fParams.checksumFlag = 0; /* single chunk : checksum is calculated directly within worker thread */ @@ -698,12 +702,12 @@ size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBu static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned endFrame) { - size_t const srcSize = zcs->inBuff.filled; + size_t const srcSize = zcs->inBuff.filled - zcs->dictSize; - if (srcSize) DEBUGLOG(1, "flushing : %u bytes left to compress", (U32)srcSize); + if (srcSize) DEBUGLOG(4, "flushing : %u bytes left to compress", (U32)srcSize); if ( ((srcSize > 0) || (endFrame && !zcs->frameEnded)) && (zcs->nextJobID <= zcs->doneJobID + zcs->jobIDMask) ) { - CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize - zcs->dictSize, endFrame) ); + CHECK_F( ZSTDMT_createCompressionJob(zcs, srcSize, endFrame) ); } /* check if there is any data available to flush */ diff --git a/programs/fileio.c b/programs/fileio.c index 86db12ac..db2bb55d 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -349,23 +349,22 @@ static int FIO_compressFilename_internal(cRess_t ress, readsize += inSize; DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(readsize>>20)); - /* Compress using buffered streaming */ { ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 }; - ZSTD_outBuffer outBuff= { ress.dstBuffer, ress.dstBufferSize, 0 }; while (inBuff.pos != inBuff.size) { /* note : is there any possibility of endless loop ? for example, if outBuff is not large enough ? */ + ZSTD_outBuffer outBuff= { ress.dstBuffer, ress.dstBufferSize, 0 }; #ifdef ZSTD_MULTITHREAD size_t const result = ZSTDMT_compressStream(ress.cctx, &outBuff, &inBuff); #else size_t const result = ZSTD_compressStream(ress.cctx, &outBuff, &inBuff); #endif if (ZSTD_isError(result)) EXM_THROW(23, "Compression error : %s ", ZSTD_getErrorName(result)); - } - /* Write cBlock */ - { size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile); - if (sizeCheck!=outBuff.pos) EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName); } - compressedfilesize += outBuff.pos; - } + /* Write compressed stream */ + if (outBuff.pos) { + size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile); + if (sizeCheck!=outBuff.pos) EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName); + compressedfilesize += outBuff.pos; + } } } DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", (U32)(readsize>>20), (double)compressedfilesize/readsize*100); } From bb0027405afb197aff767d1a3d9a759e88d105ed Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 25 Jan 2017 16:25:38 -0800 Subject: [PATCH 69/73] fixed zstdmt corruption issue when enabling overlapped sections see Asana board for detailed explanation on why and how to fix it --- lib/compress/zstd_compress.c | 38 +++++++++++++++++++++++--------- lib/compress/zstdmt_compress.c | 1 + lib/decompress/zstd_decompress.c | 2 +- lib/zstd.h | 8 ++++++- programs/fileio.c | 2 +- tests/zstreamtest.c | 13 ++++++----- 6 files changed, 46 insertions(+), 18 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 3c69a1ae..95c7e1a7 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -60,7 +60,8 @@ struct ZSTD_CCtx_s { U32 nextToUpdate; /* index from which to continue dictionary update */ U32 nextToUpdate3; /* index from which to continue dictionary update */ U32 hashLog3; /* dispatch table : larger == faster, more memory */ - U32 loadedDictEnd; + U32 loadedDictEnd; /* index of end of dictionary */ + U32 forceWindow; /* force back-references to respect limit of 1<customMem), &customMem, sizeof(customMem)); + cctx->customMem = customMem; return cctx; } @@ -118,6 +119,15 @@ size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) return sizeof(*cctx) + cctx->workSpaceSize; } +size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value) +{ + switch(param) + { + case ZSTD_p_forceWindow : cctx->forceWindow = value; return 0; + default: return ERROR(parameter_unknown); + } +} + const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */ { return &(ctx->seqStore); @@ -748,6 +758,13 @@ _check_compressibility: } +#if 0 /* for debug */ +# define STORESEQ_DEBUG +#include /* fprintf */ +U32 g_startDebug = 0; +const BYTE* g_start = NULL; +#endif + /*! ZSTD_storeSeq() : Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. `offsetCode` : distance to match, or 0 == repCode. @@ -755,13 +772,14 @@ _check_compressibility: */ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode) { -#if 0 /* for debug */ - static const BYTE* g_start = NULL; - const U32 pos = (U32)((const BYTE*)literals - g_start); - if (g_start==NULL) g_start = (const BYTE*)literals; - //if ((pos > 1) && (pos < 50000)) - printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", - pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); +#ifdef STORESEQ_DEBUG + if (g_startDebug) { + const U32 pos = (U32)((const BYTE*)literals - g_start); + if (g_start==NULL) g_start = (const BYTE*)literals; + if ((pos > 1895000) && (pos < 1895300)) + fprintf(stderr, "Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", + pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); + } #endif /* copy Literals */ ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); @@ -2305,7 +2323,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, else cctx->nextToUpdate -= correction; } - if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) { + if ((U32)(ip+blockSize - cctx->base) > (cctx->forceWindow ? 0 : cctx->loadedDictEnd) + maxDist) { /* enforce maxDist */ U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist; if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit; diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 04e0adfc..988e133d 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -238,6 +238,7 @@ void ZSTDMT_compressChunk(void* jobDescription) } else { size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize); if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } + ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1); } if (!job->firstChunk) { /* flush frame header */ size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0); diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index c53f3c3d..9c04503d 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1973,7 +1973,7 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, switch(paramType) { default : return ERROR(parameter_unknown); - case ZSTDdsp_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break; + case DStream_p_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break; } return 0; } diff --git a/lib/zstd.h b/lib/zstd.h index 52d65206..7a0aa330 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -401,6 +401,12 @@ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); * Gives the amount of memory used by a given ZSTD_CCtx */ ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); +/*! ZSTD_setCCtxParameter() : + * Set advanced parameters, selected through enum ZSTD_CCtxParameter + * @result : 0, or an error code (which can be tested with ZSTD_isError()) */ +typedef enum { ZSTD_p_forceWindow } ZSTD_CCtxParameter; +size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value); + /*! ZSTD_createCDict_byReference() : * Create a digested dictionary for compression * Dictionary content is simply referenced, and therefore stays in dictBuffer. @@ -519,7 +525,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); /*===== Advanced Streaming decompression functions =====*/ -typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e; +typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e; ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: a dict will not be used if dict == NULL or dictSize < 8 */ ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); diff --git a/programs/fileio.c b/programs/fileio.c index db2bb55d..f18e418a 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -530,7 +530,7 @@ static dRess_t FIO_createDResources(const char* dictFileName) /* Allocation */ ress.dctx = ZSTD_createDStream(); if (ress.dctx==NULL) EXM_THROW(60, "Can't create ZSTD_DStream"); - ZSTD_setDStreamParameter(ress.dctx, ZSTDdsp_maxWindowSize, g_memLimit); + ZSTD_setDStreamParameter(ress.dctx, DStream_p_maxWindowSize, g_memLimit); ress.srcBufferSize = ZSTD_DStreamInSize(); ress.srcBuffer = malloc(ress.srcBufferSize); ress.dstBufferSize = ZSTD_DStreamOutSize(); diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 9efba323..2cb6d65e 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -225,7 +225,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo inBuff2 = inBuff; DISPLAYLEVEL(3, "test%3i : decompress %u bytes : ", testNb++, COMPRESSIBLE_NOISE_LENGTH); ZSTD_initDStream_usingDict(zd, CNBuffer, 128 KB); - { size_t const r = ZSTD_setDStreamParameter(zd, ZSTDdsp_maxWindowSize, 1000000000); /* large limit */ + { size_t const r = ZSTD_setDStreamParameter(zd, DStream_p_maxWindowSize, 1000000000); /* large limit */ if (ZSTD_isError(r)) goto _output_error; } { size_t const remaining = ZSTD_decompressStream(zd, &outBuff, &inBuff); if (remaining != 0) goto _output_error; } /* should reach end of frame == 0; otherwise, some data left, or an error */ @@ -426,7 +426,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo /* Memory restriction */ DISPLAYLEVEL(3, "test%3i : maxWindowSize < frame requirement : ", testNb++); ZSTD_initDStream_usingDict(zd, CNBuffer, 128 KB); - { size_t const r = ZSTD_setDStreamParameter(zd, ZSTDdsp_maxWindowSize, 1000); /* too small limit */ + { size_t const r = ZSTD_setDStreamParameter(zd, DStream_p_maxWindowSize, 1000); /* too small limit */ if (ZSTD_isError(r)) goto _output_error; } inBuff.src = compressedBuffer; inBuff.size = cSize; @@ -466,6 +466,10 @@ static size_t findDiff(const void* buf1, const void* buf2, size_t max) if (b1[u] != b2[u]) break; } DISPLAY("Error at position %u / %u \n", (U32)u, (U32)max); + DISPLAY(" %02X %02X %02X :%02X: %02X %02X %02X %02X %02X \n", + b1[u-3], b1[u-2], b1[u-1], b1[u-0], b1[u+1], b1[u+2], b1[u+3], b1[u+4], b1[u+5]); + DISPLAY(" %02X %02X %02X :%02X: %02X %02X %02X %02X %02X \n", + b2[u-3], b2[u-2], b2[u-1], b2[u-0], b2[u+1], b2[u+2], b2[u+3], b2[u+4], b2[u+5]); return u; } @@ -902,9 +906,8 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest, double comp decompressionResult = ZSTD_decompressStream(zd, &outBuff, &inBuff); CHECK (ZSTD_isError(decompressionResult), "decompression error : %s", ZSTD_getErrorName(decompressionResult)); } - CHECK (decompressionResult != 0, "frame not fully decoded"); - CHECK (outBuff.pos != totalTestSize, "decompressed data : wrong size") - CHECK (inBuff.pos != cSize, "compressed data should be fully read") + CHECK (outBuff.pos != totalTestSize, "decompressed data : wrong size (%u != %u)", (U32)outBuff.pos, (U32)totalTestSize); + CHECK (inBuff.pos != cSize, "compressed data should be fully read (%u != %u)", (U32)inBuff.pos, (U32)cSize); { U64 const crcDest = XXH64(dstBuffer, totalTestSize, 0); if (crcDest!=crcOrig) findDiff(copyBuffer, dstBuffer, totalTestSize); CHECK (crcDest!=crcOrig, "decompressed data corrupted"); From 06e7697f964e486b062b92e0656749dc54b73d47 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 25 Jan 2017 16:39:03 -0800 Subject: [PATCH 70/73] added test of new parameter ZSTD_p_forceWindow --- lib/compress/zstd_compress.c | 6 +++--- lib/compress/zstdmt_compress.h | 5 +++-- lib/zstd.h | 4 +++- tests/fuzzer.c | 1 + 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 95c7e1a7..b6cf3764 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -123,7 +123,7 @@ size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned { switch(param) { - case ZSTD_p_forceWindow : cctx->forceWindow = value; return 0; + case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0; default: return ERROR(parameter_unknown); } } @@ -2323,7 +2323,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, else cctx->nextToUpdate -= correction; } - if ((U32)(ip+blockSize - cctx->base) > (cctx->forceWindow ? 0 : cctx->loadedDictEnd) + maxDist) { + if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) { /* enforce maxDist */ U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist; if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit; @@ -2477,7 +2477,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t zc->dictBase = zc->base; zc->base += ip - zc->nextSrc; zc->nextToUpdate = zc->dictLimit; - zc->loadedDictEnd = (U32)(iend - zc->base); + zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base); zc->nextSrc = iend; if (srcSize <= HASH_READ_SIZE) return 0; diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index 1288c1ed..4757e3e0 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -51,8 +51,9 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, const void* d /* ZSDTMT_parameter : * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */ -typedef enum { ZSTDMT_p_sectionSize /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */ - } ZSDTMT_parameter; +typedef enum { + ZSTDMT_p_sectionSize /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */ +} ZSDTMT_parameter; /* ZSTDMT_setMTCtxParameter() : * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter. diff --git a/lib/zstd.h b/lib/zstd.h index 7a0aa330..8325710b 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -404,7 +404,9 @@ ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); /*! ZSTD_setCCtxParameter() : * Set advanced parameters, selected through enum ZSTD_CCtxParameter * @result : 0, or an error code (which can be tested with ZSTD_isError()) */ -typedef enum { ZSTD_p_forceWindow } ZSTD_CCtxParameter; +typedef enum { + ZSTD_p_forceWindow /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0)*/ +} ZSTD_CCtxParameter; size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value); /*! ZSTD_createCDict_byReference() : diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 00cfb057..60546c07 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -755,6 +755,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, U32 const maxD CHECK (ZSTD_isError(errorCode), "ZSTD_copyCCtx error : %s", ZSTD_getErrorName(errorCode)); } } XXH64_reset(&xxhState, 0); + ZSTD_setCCtxParameter(ctx, ZSTD_p_forceWindow, FUZ_rand(&lseed) & 1); { U32 const nbChunks = (FUZ_rand(&lseed) & 127) + 2; U32 n; for (totalTestSize=0, cSize=0, n=0 ; n Date: Wed, 25 Jan 2017 17:01:13 -0800 Subject: [PATCH 71/73] CLI : automatically set overlap size to max (windowSize) for max compression level --- lib/compress/zstdmt_compress.c | 10 ++++++---- lib/compress/zstdmt_compress.h | 3 ++- programs/fileio.c | 6 +++++- programs/fileio.h | 3 ++- programs/zstdcli.c | 1 + tests/zstreamtest.c | 4 ++-- 6 files changed, 18 insertions(+), 9 deletions(-) diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c index 988e133d..5f0bf2ab 100644 --- a/lib/compress/zstdmt_compress.c +++ b/lib/compress/zstdmt_compress.c @@ -285,6 +285,7 @@ struct ZSTDMT_CCtx_s { unsigned nextJobID; unsigned frameEnded; unsigned allJobsCompleted; + unsigned overlapWrLog; unsigned long long frameContentSize; size_t sectionSize; ZSTD_CDict* cdict; @@ -298,7 +299,6 @@ ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) U32 const minNbJobs = nbThreads + 2; U32 const nbJobsLog2 = ZSTD_highbit32(minNbJobs) + 1; U32 const nbJobs = 1 << nbJobsLog2; - //nbThreads = 1; /* for tests */ DEBUGLOG(5, "nbThreads : %u ; minNbJobs : %u ; nbJobsLog2 : %u ; nbJobs : %u \n", nbThreads, minNbJobs, nbJobsLog2, nbJobs); if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL; @@ -308,6 +308,7 @@ ZSTDMT_CCtx *ZSTDMT_createCCtx(unsigned nbThreads) cctx->jobIDMask = nbJobs - 1; cctx->allJobsCompleted = 1; cctx->sectionSize = 0; + cctx->overlapWrLog = 3; cctx->factory = POOL_create(nbThreads, 1); cctx->buffPool = ZSTDMT_createBufferPool(nbThreads); cctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads); @@ -367,6 +368,9 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, case ZSTDMT_p_sectionSize : mtctx->sectionSize = value; return 0; + case ZSTDMT_p_overlapSectionRLog : + mtctx->overlapWrLog = value; + return 0; default : return ERROR(compressionParameter_unsupported); } @@ -510,9 +514,7 @@ static size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, zcs->frameContentSize = pledgedSrcSize; zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2); zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize); - //zcs->targetDictSize = ((size_t)1 << zcs->params.cParams.windowLog); /* full window size, for test */ - zcs->targetDictSize = ((size_t)1 << zcs->params.cParams.windowLog) >> 3; /* fixed currently */ - //zcs->targetDictSize = 0; + zcs->targetDictSize = zcs->overlapWrLog < 10 ? (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapWrLog) : 0; zcs->inBuffSize = zcs->targetSectionSize + ((size_t)1 << zcs->params.cParams.windowLog) /* margin */ + zcs->targetDictSize; zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation); diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h index 4757e3e0..92de52d6 100644 --- a/lib/compress/zstdmt_compress.h +++ b/lib/compress/zstdmt_compress.h @@ -52,7 +52,8 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, const void* d /* ZSDTMT_parameter : * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */ typedef enum { - ZSTDMT_p_sectionSize /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */ + ZSTDMT_p_sectionSize, /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */ + ZSTDMT_p_overlapSectionRLog /* reverse log of overlapped section; 0 == use a complete window, 3(default) == use 1/8th of window, values >=10 means no overlap */ } ZSDTMT_parameter; /* ZSTDMT_setMTCtxParameter() : diff --git a/programs/fileio.c b/programs/fileio.c index f18e418a..ac7dffb3 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -7,6 +7,7 @@ * of patent rights can be found in the PATENTS file in the same directory. */ + /* ************************************* * Compiler Options ***************************************/ @@ -266,10 +267,13 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel, #ifdef ZSTD_MULTITHREAD ress.cctx = ZSTDMT_createCCtx(g_nbThreads); + if (ress.cctx == NULL) EXM_THROW(30, "zstd: allocation error : can't create ZSTD_CStream"); + if (cLevel==ZSTD_maxCLevel()) + ZSTDMT_setMTCtxParameter(ress.cctx, ZSTDMT_p_overlapSectionRLog, 0); /* use complete window for overlap */ #else ress.cctx = ZSTD_createCStream(); -#endif if (ress.cctx == NULL) EXM_THROW(30, "zstd: allocation error : can't create ZSTD_CStream"); +#endif ress.srcBufferSize = ZSTD_CStreamInSize(); ress.srcBuffer = malloc(ress.srcBufferSize); ress.dstBufferSize = ZSTD_CStreamOutSize(); diff --git a/programs/fileio.h b/programs/fileio.h index 19f09c33..11178bcc 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -12,12 +12,13 @@ #define FILEIO_H_23981798732 #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ -#include "zstd.h" /* ZSTD_compressionParameters */ +#include "zstd.h" /* ZSTD_* */ #if defined (__cplusplus) extern "C" { #endif + /* ************************************* * Special i/o constants **************************************/ diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 549dad01..64f2c919 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -20,6 +20,7 @@ #endif + /*-************************************ * Dependencies **************************************/ diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index 2cb6d65e..bef8734c 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -992,8 +992,8 @@ int main(int argc, const char** argv) int mainPause = 0; int mtOnly = 0; const char* const programName = argv[0]; - ZSTD_customMem customMem = { allocFunction, freeFunction, NULL }; - ZSTD_customMem customNULL = { NULL, NULL, NULL }; + ZSTD_customMem const customMem = { allocFunction, freeFunction, NULL }; + ZSTD_customMem const customNULL = { NULL, NULL, NULL }; /* Check command line */ for(argNb=1; argNb Date: Thu, 26 Jan 2017 09:16:56 -0800 Subject: [PATCH 72/73] fixed clang documentation warning --- lib/zstd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/zstd.h b/lib/zstd.h index 8325710b..5c80bbac 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -401,12 +401,12 @@ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); * Gives the amount of memory used by a given ZSTD_CCtx */ ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); -/*! ZSTD_setCCtxParameter() : - * Set advanced parameters, selected through enum ZSTD_CCtxParameter - * @result : 0, or an error code (which can be tested with ZSTD_isError()) */ typedef enum { ZSTD_p_forceWindow /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0)*/ } ZSTD_CCtxParameter; +/*! ZSTD_setCCtxParameter() : + * Set advanced parameters, selected through enum ZSTD_CCtxParameter + * @result : 0, or an error code (which can be tested with ZSTD_isError()) */ size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value); /*! ZSTD_createCDict_byReference() : From ef33d005329aca32183888c8334b85fced9e5caf Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 26 Jan 2017 12:24:21 -0800 Subject: [PATCH 73/73] fixed : ZSTD_setCCtxParameter() properly exposed in DLL --- lib/zstd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd.h b/lib/zstd.h index 5c80bbac..f5cbf4b4 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -407,7 +407,7 @@ typedef enum { /*! ZSTD_setCCtxParameter() : * Set advanced parameters, selected through enum ZSTD_CCtxParameter * @result : 0, or an error code (which can be tested with ZSTD_isError()) */ -size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value); +ZSTDLIB_API size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value); /*! ZSTD_createCDict_byReference() : * Create a digested dictionary for compression