Merge pull request #1658 from facebook/memset

memset() rather than reduceIndex()
dev
Yann Collet 2019-07-01 15:01:43 -07:00 committed by GitHub
commit 857e608b51
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 60 additions and 26 deletions

View File

@ -692,13 +692,13 @@ size_t ZSTD_CCtxParams_getParameter(
*value = CCtxParams->compressionLevel; *value = CCtxParams->compressionLevel;
break; break;
case ZSTD_c_windowLog : case ZSTD_c_windowLog :
*value = CCtxParams->cParams.windowLog; *value = (int)CCtxParams->cParams.windowLog;
break; break;
case ZSTD_c_hashLog : case ZSTD_c_hashLog :
*value = CCtxParams->cParams.hashLog; *value = (int)CCtxParams->cParams.hashLog;
break; break;
case ZSTD_c_chainLog : case ZSTD_c_chainLog :
*value = CCtxParams->cParams.chainLog; *value = (int)CCtxParams->cParams.chainLog;
break; break;
case ZSTD_c_searchLog : case ZSTD_c_searchLog :
*value = CCtxParams->cParams.searchLog; *value = CCtxParams->cParams.searchLog;
@ -1326,15 +1326,17 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl
typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
typedef enum { ZSTD_resetTarget_CDict, ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e;
static void* static void*
ZSTD_reset_matchState(ZSTD_matchState_t* ms, ZSTD_reset_matchState(ZSTD_matchState_t* ms,
void* ptr, void* ptr,
const ZSTD_compressionParameters* cParams, const ZSTD_compressionParameters* cParams,
ZSTD_compResetPolicy_e const crp, U32 const forCCtx) ZSTD_compResetPolicy_e const crp, ZSTD_resetTarget_e const forWho)
{ {
size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
size_t const hSize = ((size_t)1) << cParams->hashLog; size_t const hSize = ((size_t)1) << cParams->hashLog;
U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
size_t const h3Size = ((size_t)1) << hashLog3; size_t const h3Size = ((size_t)1) << hashLog3;
size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
@ -1348,7 +1350,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
ZSTD_invalidateMatchState(ms); ZSTD_invalidateMatchState(ms);
/* opt parser space */ /* opt parser space */
if (forCCtx && (cParams->strategy >= ZSTD_btopt)) { if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
DEBUGLOG(4, "reserving optimal parser space"); DEBUGLOG(4, "reserving optimal parser space");
ms->opt.litFreq = (unsigned*)ptr; ms->opt.litFreq = (unsigned*)ptr;
ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits); ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits);
@ -1376,6 +1378,19 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
return ptr; return ptr;
} }
/* ZSTD_indexTooCloseToMax() :
* minor optimization : prefer memset() rather than reduceIndex()
* which is measurably slow in some circumstances (reported for Visual Studio).
* Works when re-using a context for a lot of smallish inputs :
* if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
* memset() will be triggered before reduceIndex().
*/
#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
{
return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
}
#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */ #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */
#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large
* during at least this number of times, * during at least this number of times,
@ -1399,13 +1414,21 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
if (ZSTD_equivalentParams(zc->appliedParams, params, if (ZSTD_equivalentParams(zc->appliedParams, params,
zc->inBuffSize, zc->inBuffSize,
zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit, zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit,
zbuff, pledgedSrcSize)) { zbuff, pledgedSrcSize) ) {
DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%zu)", DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> consider continue mode");
zc->appliedParams.cParams.windowLog, zc->blockSize);
zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */ zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */
if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) {
DEBUGLOG(4, "continue mode confirmed (wLog1=%u, blockSize1=%zu)",
zc->appliedParams.cParams.windowLog, zc->blockSize);
if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) {
/* prefer a reset, faster than a rescale */
ZSTD_reset_matchState(&zc->blockState.matchState,
zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32,
&params.cParams,
crp, ZSTD_resetTarget_CCtx);
}
return ZSTD_continueCCtx(zc, params, pledgedSrcSize); return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
} } } } }
DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx");
if (params.ldmParams.enableLdm) { if (params.ldmParams.enableLdm) {
@ -1448,7 +1471,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
if (workSpaceTooSmall || workSpaceWasteful) { if (workSpaceTooSmall || workSpaceWasteful) {
DEBUGLOG(4, "Need to resize workSpaceSize from %zuKB to %zuKB", DEBUGLOG(4, "Resize workSpaceSize from %zuKB to %zuKB",
zc->workSpaceSize >> 10, zc->workSpaceSize >> 10,
neededSpace >> 10); neededSpace >> 10);
@ -1490,7 +1513,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32; ptr = ZSTD_reset_matchState(&zc->blockState.matchState,
zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32,
&params.cParams,
crp, ZSTD_resetTarget_CCtx);
/* ldm hash table */ /* ldm hash table */
/* initialize bucketOffsets table later for pointer alignment */ /* initialize bucketOffsets table later for pointer alignment */
@ -1508,8 +1534,6 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
} }
assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, &params.cParams, crp, /* forCCtx */ 1);
/* sequences storage */ /* sequences storage */
zc->seqStore.maxNbSeq = maxNbSeq; zc->seqStore.maxNbSeq = maxNbSeq;
zc->seqStore.sequencesStart = (seqDef*)ptr; zc->seqStore.sequencesStart = (seqDef*)ptr;
@ -3558,10 +3582,10 @@ static size_t ZSTD_initCDict_internal(
/* Reset the state to no dictionary */ /* Reset the state to no dictionary */
ZSTD_reset_compressedBlockState(&cdict->cBlockState); ZSTD_reset_compressedBlockState(&cdict->cBlockState);
{ void* const end = ZSTD_reset_matchState( { void* const end = ZSTD_reset_matchState(&cdict->matchState,
&cdict->matchState, (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32,
(U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, &cParams,
&cParams, ZSTDcrp_continue, /* forCCtx */ 0); ZSTDcrp_continue, ZSTD_resetTarget_CDict);
assert(end == (char*)cdict->workspace + cdict->workspaceSize); assert(end == (char*)cdict->workspace + cdict->workspaceSize);
(void)end; (void)end;
} }
@ -4071,7 +4095,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
case zcss_flush: case zcss_flush:
DEBUGLOG(5, "flush stage"); DEBUGLOG(5, "flush stage");
{ size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
size_t const flushed = ZSTD_limitCopy(op, oend-op, size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
zcs->outBuff + zcs->outBuffFlushedSize, toFlush); zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
(unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
@ -4265,7 +4289,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */
/* single thread mode : attempt to calculate remaining to flush more precisely */ /* single thread mode : attempt to calculate remaining to flush more precisely */
{ size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4; size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
return toFlush; return toFlush;

View File

@ -563,6 +563,9 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64
/*-************************************* /*-*************************************
* Round buffer management * Round buffer management
***************************************/ ***************************************/
#if (ZSTD_WINDOWLOG_MAX_64 > 31)
# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
#endif
/* Max current allowed */ /* Max current allowed */
#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
/* Maximum chunk size before overflow correction needs to be called again */ /* Maximum chunk size before overflow correction needs to be called again */

View File

@ -1129,9 +1129,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
assert(flushed <= produced); assert(flushed <= produced);
assert(jobPtr->consumed <= jobPtr->src.size);
toFlush = produced - flushed; toFlush = produced - flushed;
if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) { /* if toFlush==0, nothing is available to flush.
/* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */ * However, jobID is expected to still be active:
* if jobID was already completed and fully flushed,
* ZSTDMT_flushProduced() should have already moved onto next job.
* Therefore, some input has not yet been consumed. */
if (toFlush==0) {
assert(jobPtr->consumed < jobPtr->src.size); assert(jobPtr->consumed < jobPtr->src.size);
} }
} }

View File

@ -15,7 +15,6 @@
***************************************/ ***************************************/
#include <stdlib.h> /* malloc, free */ #include <stdlib.h> /* malloc, free */
#include <string.h> /* memset */ #include <string.h> /* memset */
#undef NDEBUG /* assert must not be disabled */
#include <assert.h> /* assert */ #include <assert.h> /* assert */
#include "timefn.h" /* UTIL_time_t, UTIL_getTime */ #include "timefn.h" /* UTIL_time_t, UTIL_getTime */
@ -54,6 +53,9 @@
return retValue; \ return retValue; \
} }
/* Abort execution if a condition is not met */
#define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }
/* ************************************* /* *************************************
* Benchmarking an arbitrary function * Benchmarking an arbitrary function
@ -68,13 +70,13 @@ int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
* check outcome validity first, using BMK_isValid_runResult() */ * check outcome validity first, using BMK_isValid_runResult() */
BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome) BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
{ {
assert(outcome.error_tag_never_ever_use_directly == 0); CONTROL(outcome.error_tag_never_ever_use_directly == 0);
return outcome.internal_never_ever_use_directly; return outcome.internal_never_ever_use_directly;
} }
size_t BMK_extract_errorResult(BMK_runOutcome_t outcome) size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
{ {
assert(outcome.error_tag_never_ever_use_directly != 0); CONTROL(outcome.error_tag_never_ever_use_directly != 0);
return outcome.error_result_never_ever_use_directly; return outcome.error_result_never_ever_use_directly;
} }