Merge pull request #876 from facebook/srcSize

CLI Fix : srcSize written in frame headers when compressing multiple files
dev
Yann Collet 2017-10-02 15:02:05 -07:00 committed by GitHub
commit 004fd34fd9
2 changed files with 32 additions and 28 deletions

View File

@ -511,7 +511,7 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
{
DEBUGLOG(5, " setting pledgedSrcSize to %u", (U32)pledgedSrcSize);
DEBUGLOG(4, " setting pledgedSrcSize to %u", (U32)pledgedSrcSize);
if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
return 0;
@ -523,7 +523,7 @@ size_t ZSTD_CCtx_loadDictionary_advanced(
{
if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
if (cctx->staticSize) return ERROR(memory_allocation); /* no malloc for static CCtx */
DEBUGLOG(5, "load dictionary of size %u", (U32)dictSize);
DEBUGLOG(4, "load dictionary of size %u", (U32)dictSize);
ZSTD_freeCDict(cctx->cdictLocal); /* in case one already exists */
if (dict==NULL || dictSize==0) { /* no dictionary mode */
cctx->cdictLocal = NULL;
@ -791,13 +791,13 @@ static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1,
static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize)
{
U32 const end = (U32)(cctx->nextSrc - cctx->base);
DEBUGLOG(5, "continue mode");
DEBUGLOG(4, "continue mode");
cctx->appliedParams = params;
cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
cctx->consumedSrcSize = 0;
if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
cctx->appliedParams.fParams.contentSizeFlag = 0;
DEBUGLOG(5, "pledged content size : %u ; flag : %u",
DEBUGLOG(4, "pledged content size : %u ; flag : %u",
(U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag);
cctx->lowLimit = end;
cctx->dictLimit = end;
@ -821,11 +821,13 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
ZSTD_compResetPolicy_e const crp,
ZSTD_buffered_policy_e const zbuff)
{
DEBUGLOG(4, "ZSTD_resetCCtx_internal");
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
DEBUGLOG(4, "pledgedSrcSize: %u", (U32)pledgedSrcSize);
if (crp == ZSTDcrp_continue) {
if (ZSTD_equivalentParams(params, zc->appliedParams)) {
DEBUGLOG(5, "ZSTD_equivalentParams()==1");
DEBUGLOG(4, "ZSTD_equivalentParams()==1");
assert(!(params.ldmParams.enableLdm &&
params.ldmParams.hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET));
zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
@ -2010,8 +2012,6 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
ZSTD_buffered_policy_e zbuff)
{
DEBUGLOG(4, "ZSTD_compressBegin_internal");
DEBUGLOG(4, "dict ? %s", dict ? "dict" : (cdict ? "cdict" : "none"));
DEBUGLOG(4, "dictMode : %u", (U32)dictMode);
/* params are supposed to be fully validated at this point */
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
@ -2123,9 +2123,9 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
if (ZSTD_isError(endResult)) return endResult;
if (cctx->appliedParams.fParams.contentSizeFlag) { /* control src size */
DEBUGLOG(5, "end of frame : controlling src size");
DEBUGLOG(4, "end of frame : controlling src size");
if (cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1) {
DEBUGLOG(5, "error : pledgedSrcSize = %u, while realSrcSize = %u",
DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize = %u",
(U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize);
return ERROR(srcSize_wrong);
} }
@ -2484,7 +2484,7 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
ZSTD_CCtx_params params = zcs->requestedParams;
params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
params.cParams = ZSTD_getCParamsFromCCtxParams(params, pledgedSrcSize, 0);
DEBUGLOG(5, "ZSTD_resetCStream");
DEBUGLOG(4, "ZSTD_resetCStream");
return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, zcs->cdict, params, pledgedSrcSize);
}
@ -2496,6 +2496,7 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize)
{
DEBUGLOG(4, "ZSTD_initCStream_internal");
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
@ -2754,6 +2755,7 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp)
{
DEBUGLOG(5, "ZSTD_compress_generic");
/* check conditions */
if (output->pos > output->size) return ERROR(GENERIC);
if (input->pos > input->size) return ERROR(GENERIC);
@ -2767,6 +2769,7 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
DEBUGLOG(4, "ZSTD_compress_generic : transparent init stage");
#ifdef ZSTD_MULTITHREAD
if (params.nbThreads > 1) {
@ -2795,7 +2798,7 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
#ifdef ZSTD_MULTITHREAD
if (cctx->appliedParams.nbThreads > 1) {
size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
DEBUGLOG(5, "ZSTDMT_compressStream_generic : %u", (U32)flushMin);
DEBUGLOG(5, "ZSTDMT_compressStream_generic result : %u", (U32)flushMin);
if ( ZSTD_isError(flushMin)
|| (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
ZSTD_startNewCompression(cctx);

View File

@ -401,7 +401,7 @@ typedef struct {
} cRess_t;
static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
U64 srcSize, int srcIsRegularFile,
U64 srcSize,
ZSTD_compressionParameters* comprParams) {
cRess_t ress;
memset(&ress, 0, sizeof(ress));
@ -439,10 +439,9 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
#ifdef ZSTD_NEWAPI
{ /* frame parameters */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_contentSizeFlag, srcIsRegularFile) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_dictIDFlag, g_dictIDFlag) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_checksumFlag, g_checksumFlag) );
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) );
(void)srcSize;
/* compression level */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionLevel, cLevel) );
/* long distance matching */
@ -465,13 +464,13 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_targetLength, comprParams->targetLength) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_compressionStrategy, (U32)comprParams->strategy) );
/* multi-threading */
DISPLAYLEVEL(5,"set nb threads = %u \n", g_nbThreads);
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_p_nbThreads, g_nbThreads) );
/* dictionary */
CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) );
}
#elif defined(ZSTD_MULTITHREAD)
{ ZSTD_parameters params = ZSTD_getParams(cLevel, srcSize, dictBuffSize);
params.fParams.contentSizeFlag = srcIsRegularFile;
params.fParams.checksumFlag = g_checksumFlag;
params.fParams.noDictIDFlag = !g_dictIDFlag;
if (comprParams->windowLog) params.cParams.windowLog = comprParams->windowLog;
@ -486,7 +485,6 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
}
#else
{ ZSTD_parameters params = ZSTD_getParams(cLevel, srcSize, dictBuffSize);
params.fParams.contentSizeFlag = srcIsRegularFile;
params.fParams.checksumFlag = g_checksumFlag;
params.fParams.noDictIDFlag = !g_dictIDFlag;
if (comprParams->windowLog) params.cParams.windowLog = comprParams->windowLog;
@ -776,6 +774,7 @@ static int FIO_compressFilename_internal(cRess_t ress,
U64 readsize = 0;
U64 compressedfilesize = 0;
U64 const fileSize = UTIL_getFileSize(srcFileName);
DISPLAYLEVEL(5, "%s: %u bytes \n", srcFileName, (U32)fileSize);
switch (g_compressionType) {
case FIO_zstdCompression:
@ -815,11 +814,12 @@ static int FIO_compressFilename_internal(cRess_t ress,
/* init */
#ifdef ZSTD_NEWAPI
/* nothing, reset is implied */
if (fileSize!=0) /* when src is stdin, fileSize==0, but is effectively unknown */
ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize); /* note : fileSize==0 means "empty" */
#elif defined(ZSTD_MULTITHREAD)
CHECK( ZSTDMT_resetCStream(ress.cctx, fileSize) );
CHECK( ZSTDMT_resetCStream(ress.cctx, fileSize) ); /* note : fileSize==0 means "unknown" */
#else
CHECK( ZSTD_resetCStream(ress.cctx, fileSize) );
CHECK( ZSTD_resetCStream(ress.cctx, fileSize) ); /* note : fileSize==0 means "unknown" */
#endif
/* Main compression loop */
@ -868,10 +868,10 @@ static int FIO_compressFilename_internal(cRess_t ress,
/* End of Frame */
{ size_t result = 1;
while (result!=0) { /* note : is there any possibility of endless loop ? */
while (result != 0) {
ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 };
#ifdef ZSTD_NEWAPI
ZSTD_inBuffer inBuff = { NULL, 0, 0};
ZSTD_inBuffer inBuff = { NULL, 0, 0 };
result = ZSTD_compress_generic(ress.cctx,
&outBuff, &inBuff, ZSTD_e_end);
#elif defined(ZSTD_MULTITHREAD)
@ -879,11 +879,14 @@ static int FIO_compressFilename_internal(cRess_t ress,
#else
result = ZSTD_endStream(ress.cctx, &outBuff);
#endif
if (ZSTD_isError(result))
if (ZSTD_isError(result)) {
EXM_THROW(26, "Compression error during frame end : %s",
ZSTD_getErrorName(result));
{ size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile);
if (sizeCheck!=outBuff.pos) EXM_THROW(27, "Write error : cannot write frame end into %s", dstFileName); }
}
{ size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile);
if (sizeCheck!=outBuff.pos)
EXM_THROW(27, "Write error : cannot write frame end into %s", dstFileName);
}
compressedfilesize += outBuff.pos;
}
}
@ -981,9 +984,8 @@ int FIO_compressFilename(const char* dstFileName, const char* srcFileName,
{
clock_t const start = clock();
U64 const srcSize = UTIL_getFileSize(srcFileName);
int const isRegularFile = UTIL_isRegularFile(srcFileName);
cRess_t const ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, isRegularFile, comprParams);
cRess_t const ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams);
int const result = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel);
double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC;
@ -1004,8 +1006,7 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile
char* dstFileName = (char*)malloc(FNSPACE);
size_t const suffixSize = suffix ? strlen(suffix) : 0;
U64 const srcSize = (nbFiles != 1) ? 0 : UTIL_getFileSize(inFileNamesTable[0]) ;
int const isRegularFile = (nbFiles > 1) ? 0 : UTIL_isRegularFile(inFileNamesTable[0]); /* won't write frame content size when nbFiles > 1 */
cRess_t ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, isRegularFile, comprParams);
cRess_t ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams);
/* init */
if (dstFileName==NULL)