Add --stream-size=# command

This commit is contained in:
Nick Magerko 2019-08-15 23:57:55 -07:00
parent c9072ee674
commit af0c9501d1
4 changed files with 46 additions and 4 deletions

View File

@ -304,6 +304,7 @@ struct FIO_prefs_s {
int ldmMinMatch; int ldmMinMatch;
int ldmBucketSizeLog; int ldmBucketSizeLog;
int ldmHashRateLog; int ldmHashRateLog;
size_t streamSrcSize;
size_t targetCBlockSize; size_t targetCBlockSize;
ZSTD_literalCompressionMode_e literalCompressionMode; ZSTD_literalCompressionMode_e literalCompressionMode;
@ -349,6 +350,7 @@ FIO_prefs_t* FIO_createPreferences(void)
ret->ldmMinMatch = 0; ret->ldmMinMatch = 0;
ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET; ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
ret->streamSrcSize = 0;
ret->targetCBlockSize = 0; ret->targetCBlockSize = 0;
ret->literalCompressionMode = ZSTD_lcm_auto; ret->literalCompressionMode = ZSTD_lcm_auto;
return ret; return ret;
@ -418,6 +420,10 @@ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
prefs->rsyncable = rsyncable; prefs->rsyncable = rsyncable;
} }
void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) {
prefs->streamSrcSize = streamSrcSize;
}
void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) { void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
prefs->targetCBlockSize = targetCBlockSize; prefs->targetCBlockSize = targetCBlockSize;
} }
@ -698,9 +704,20 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
#endif #endif
/* dictionary */ /* dictionary */
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) ); /* set the value temporarily for dictionary loading, to adapt compression parameters */ /* set the pledged size for dictionary loading, to adapt compression parameters */
if (srcSize == ZSTD_CONTENTSIZE_UNKNOWN && prefs->streamSrcSize > 0) {
/* unknown source size; use the declared stream size and disable writing this size to frame during compression */
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 0) );
} else {
/* use the known source size for adaption */
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) );
}
CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) ); CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) );
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) ); /* reset */ if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN || prefs->streamSrcSize == 0) {
/* reset pledge when src size is known or stream size is declared */
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) );
}
free(dictBuffer); free(dictBuffer);
} }

View File

@ -71,6 +71,7 @@ void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog);
void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag); void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag);
void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable); void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable);
void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize);
void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize); void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize);
void FIO_setLiteralCompressionMode( void FIO_setLiteralCompressionMode(
FIO_prefs_t* const prefs, FIO_prefs_t* const prefs,

View File

@ -141,6 +141,7 @@ static int usage_advanced(const char* programName)
DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1); DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n"); DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n");
DISPLAY( "--stream-size=# : optimize compression parameters for streaming input of given number of bytes \n");
DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n"); DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n");
#ifdef ZSTD_MULTITHREAD #ifdef ZSTD_MULTITHREAD
DISPLAY( " -T# : spawns # compression threads (default: 1, 0==# cores) \n"); DISPLAY( " -T# : spawns # compression threads (default: 1, 0==# cores) \n");
@ -588,6 +589,7 @@ int main(int argCount, const char* argv[])
const char* suffix = ZSTD_EXTENSION; const char* suffix = ZSTD_EXTENSION;
unsigned maxDictSize = g_defaultMaxDictSize; unsigned maxDictSize = g_defaultMaxDictSize;
unsigned dictID = 0; unsigned dictID = 0;
size_t streamSrcSize = 0;
size_t targetCBlockSize = 0; size_t targetCBlockSize = 0;
int dictCLevel = g_defaultDictCLevel; int dictCLevel = g_defaultDictCLevel;
unsigned dictSelect = g_defaultSelectivityLevel; unsigned dictSelect = g_defaultSelectivityLevel;
@ -745,6 +747,7 @@ int main(int argCount, const char* argv[])
if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; } if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--long")) { if (longCommandWArg(&argument, "--long")) {
unsigned ldmWindowLog = 0; unsigned ldmWindowLog = 0;
@ -1150,6 +1153,7 @@ int main(int argCount, const char* argv[])
FIO_setAdaptMin(prefs, adaptMin); FIO_setAdaptMin(prefs, adaptMin);
FIO_setAdaptMax(prefs, adaptMax); FIO_setAdaptMax(prefs, adaptMax);
FIO_setRsyncable(prefs, rsyncable); FIO_setRsyncable(prefs, rsyncable);
FIO_setStreamSrcSize(prefs, streamSrcSize);
FIO_setTargetCBlockSize(prefs, targetCBlockSize); FIO_setTargetCBlockSize(prefs, targetCBlockSize);
FIO_setLiteralCompressionMode(prefs, literalCompressionMode); FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
if (adaptMin > cLevel) cLevel = adaptMin; if (adaptMin > cLevel) cLevel = adaptMin;
@ -1160,7 +1164,7 @@ int main(int argCount, const char* argv[])
else else
operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams); operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
#else #else
(void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */ (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)streamSrcSize; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */
DISPLAY("Compression not supported \n"); DISPLAY("Compression not supported \n");
#endif #endif
} else { /* decompression or test */ } else { /* decompression or test */

View File

@ -108,7 +108,6 @@ else
fi fi
println "\n===> simple tests " println "\n===> simple tests "
./datagen > tmp ./datagen > tmp
@ -1020,4 +1019,25 @@ test -f dictionary
rm -f tmp* dictionary rm -f tmp* dictionary
println "\n===> stream-size mode"
./datagen -g11000 > tmp
println "test : basic file compression vs sized streaming compression"
$ZSTD -14 -f tmp -o tmp.zst |& tee file.out
cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11000 |& tee stream_sized.out
file_ratio=$(cat file.out | awk '{print $4}' | sed 's/%//g')
stream_sized_ratio=$(cat stream_sized.out | awk '{print $4}' | sed 's/%//g')
rm file.out stream_sized.out
ratio_diff=$(echo $file_ratio - $stream_sized_ratio | bc)
if [ $(echo "(100 * $ratio_diff) > 5" | bc -l) == 1 ]
then
die "greater than 0.05% difference between file and sized-streaming compression"
fi
println "test : incorrect stream size"
cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size"
rm -f tmp* rm -f tmp*