Adding new cli endpoint --patch-from= (#1940)

* Adding new cli endpoint --diff-from=

* Appveyor conversion nit

* Using bool set trick instead of direct set

* Removing --diff-from and only leaving --diff-from=#

* Throwing error when both dictFileName vars are set

* Clean up syntax

* Renaming diff-from to patch-from

* Revering comma separated syntax clean up

* Updating playtests with patch-from

* Uncommenting accidentally commented

* Updating remaining docs and var names to be patch-from instead of diff-from

* Constifying

* Using existing log2 function and removing newly created one

* Argument order (moving prefs to end)

* Using comma separated syntax

* Moving to outside #ifndef
dev
Bimba Shrestha 2020-01-10 14:25:24 -08:00 committed by Yann Collet
parent d1cc9d2797
commit f25a6e9f8f
5 changed files with 87 additions and 33 deletions

View File

@ -77,6 +77,7 @@
#define FNSPACE 30
#define PATCHFROM_WINDOWSIZE_EXTRA_BYTES 1 KB
/*-*************************************
* Macros
@ -321,6 +322,7 @@ struct FIO_prefs_s {
int nbWorkers;
int excludeCompressedFiles;
int patchFromMode;
};
@ -487,6 +489,10 @@ void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) {
prefs->ldmHashRateLog = ldmHashRateLog;
}
void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value)
{
prefs->patchFromMode = value != 0;
}
/*-*************************************
* Functions
@ -624,7 +630,7 @@ FIO_openDstFile(FIO_prefs_t* const prefs,
* @return : loaded size
* if fileName==NULL, returns 0 and a NULL pointer
*/
static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName)
static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_prefs_t* const prefs)
{
FILE* fileHandle;
U64 fileSize;
@ -638,9 +644,12 @@ static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName)
if (fileHandle==NULL) EXM_THROW(31, "%s: %s", fileName, strerror(errno));
fileSize = UTIL_getFileSize(fileName);
if (fileSize > DICTSIZE_MAX) {
EXM_THROW(32, "Dictionary file %s is too large (> %u MB)",
fileName, DICTSIZE_MAX >> 20); /* avoid extreme cases */
{
size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
if (fileSize > dictSizeMax) {
EXM_THROW(32, "Dictionary file %s is too large (> %u bytes)",
fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
}
}
*bufferPtr = malloc((size_t)fileSize);
if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
@ -743,6 +752,20 @@ FIO_createFilename_fromOutDir(const char* path, const char* outDirName, const si
return result;
}
/* FIO_highbit64() :
* gives position of highest bit.
* note : only works for v > 0 !
*/
static unsigned FIO_highbit64(unsigned long long v)
{
unsigned count = 0;
assert(v != 0);
v >>= 1;
while (v) { v >>= 1; count++; }
return count;
}
#ifndef ZSTD_NOCOMPRESS
/* **********************************************************************
@ -760,8 +783,8 @@ typedef struct {
} cRess_t;
static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
const char* dictFileName, int cLevel,
ZSTD_compressionParameters comprParams) {
const char* dictFileName, const size_t maxSrcFileSize,
int cLevel, ZSTD_compressionParameters comprParams) {
cRess_t ress;
memset(&ress, 0, sizeof(ress));
@ -779,7 +802,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
/* Advanced parameters, including dictionary */
{ void* dictBuffer;
size_t const dictBuffSize = FIO_createDictBuffer(&dictBuffer, dictFileName); /* works with dictFileName==NULL */
size_t const dictBuffSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs); /* works with dictFileName==NULL */
if (dictFileName && (dictBuffer==NULL))
EXM_THROW(32, "allocation error : can't create dictBuffer");
ress.dictFileName = dictFileName;
@ -787,6 +810,10 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog)
comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT;
if (prefs->patchFromMode) {
comprParams.windowLog = FIO_highbit64((unsigned long long)maxSrcFileSize + PATCHFROM_WINDOWSIZE_EXTRA_BYTES);
}
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 1) ); /* always enable content size when available (note: supposed to be default) */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
@ -1515,7 +1542,7 @@ int FIO_compressFilename(FIO_prefs_t* const prefs, const char* dstFileName,
const char* srcFileName, const char* dictFileName,
int compressionLevel, ZSTD_compressionParameters comprParams)
{
cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
cRess_t const ress = FIO_createCResources(prefs, dictFileName, (size_t)UTIL_getFileSize(srcFileName), compressionLevel, comprParams);
int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
@ -1563,6 +1590,15 @@ FIO_determineCompressedName(const char* srcFileName, const char* outDirName, con
return dstFileNameBuffer;
}
static size_t FIO_getLargestFileSize(const char** inFileNames, unsigned nbFiles)
{
size_t i, fileSize, maxFileSize = 0;
for (i = 0; i < nbFiles; i++) {
fileSize = (size_t)UTIL_getFileSize(inFileNames[i]);
maxFileSize = fileSize > maxFileSize ? fileSize : maxFileSize;
}
return maxFileSize;
}
/* FIO_compressMultipleFilenames() :
* compress nbFiles files
@ -1578,7 +1614,9 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
ZSTD_compressionParameters comprParams)
{
int error = 0;
cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
cRess_t ress = FIO_createCResources(prefs, dictFileName,
FIO_getLargestFileSize(inFileNamesTable, nbFiles),
compressionLevel, comprParams);
/* init */
assert(outFileName != NULL || suffix != NULL);
@ -1648,7 +1686,7 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi
/* dictionary */
{ void* dictBuffer;
size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName);
size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs);
CHECK( ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize) );
free(dictBuffer);
}
@ -1793,19 +1831,6 @@ static int FIO_passThrough(const FIO_prefs_t* const prefs,
return 0;
}
/* FIO_highbit64() :
* gives position of highest bit.
* note : only works for v > 0 !
*/
static unsigned FIO_highbit64(unsigned long long v)
{
unsigned count = 0;
assert(v != 0);
v >>= 1;
while (v) { v >>= 1; count++; }
return count;
}
/* FIO_zstdErrorHelp() :
* detailed error message when requested window size is too large */
static void

View File

@ -94,6 +94,7 @@ void FIO_setLiteralCompressionMode(
void FIO_setNoProgress(unsigned noProgress);
void FIO_setNotificationLevel(int level);
void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles);
void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value);
/*-*************************************
* Single File functions

View File

@ -122,11 +122,20 @@ the last one takes effect.
Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
`--memory=windowSize` needs to be passed to the decompressor.
* `--patch-from=FILE`:
Specify the file to be used as a reference point for zstd's diff engine.
This is effectively dictionary compression with some convenient parameter
selection, namely that windowSize > srcSize.
Note: cannot use both this and -D together
* `-M#`, `--memory=#`:
Set a memory usage limit for decompression. By default, Zstandard uses 128 MB
Set a memory usage limit. By default, Zstandard uses 128 MB for decompression
as the maximum amount of memory the decompressor is allowed to use, but you can
override this manually if need be in either direction (ie. you can increase or
decrease it).
This is also used during compression when using with --patch-from=. In this case,
this parameter overrides that maximum size allowed for a dictionary. (128 MB).
* `-T#`, `--threads=#`:
Compress using `#` working threads (default: 1).
If `#` is 0, attempt to detect and use the number of physical CPU cores.

View File

@ -597,6 +597,7 @@ int main(int const argCount, const char* argv[])
const char* outFileName = NULL;
const char* outDirName = NULL;
const char* dictFileName = NULL;
const char* patchFromDictFileName = NULL;
const char* suffix = ZSTD_EXTENSION;
unsigned maxDictSize = g_defaultMaxDictSize;
unsigned dictID = 0;
@ -618,7 +619,7 @@ int main(int const argCount, const char* argv[])
/* init */
(void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */
(void)memLimit; /* not used when ZSTD_NODECOMPRESS set */
(void)memLimit;
assert(argCount >= 1);
if ((filenames==NULL) || (file_of_names==NULL)) { DISPLAY("zstd: allocation error \n"); exit(1); }
programName = lastNameFromPath(programName);
@ -758,6 +759,7 @@ int main(int const argCount, const char* argv[])
if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--output-dir-flat=")) { outDirName = argument; continue; }
if (longCommandWArg(&argument, "--patch-from=")) { patchFromDictFileName = argument; continue; }
if (longCommandWArg(&argument, "--long")) {
unsigned ldmWindowLog = 0;
ldmFlag = 1;
@ -868,7 +870,7 @@ int main(int const argCount, const char* argv[])
/* destination file name */
case 'o': nextArgumentIsOutFileName=1; lastCommand=1; argument++; break;
/* limit decompression memory */
/* limit memory */
case 'M':
argument++;
memLimit = readU32FromChar(&argument);
@ -1167,12 +1169,28 @@ int main(int const argCount, const char* argv[])
} }
#endif
if (dictFileName != NULL && patchFromDictFileName != NULL) {
DISPLAY("error : can't use -D and --patch-from=# at the same time \n");
CLEAN_RETURN(1);
}
/* No status message in pipe mode (stdin - stdout) or multi-files mode */
if (!strcmp(filenames->fileNames[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (g_displayLevel==2)) g_displayLevel=1;
if ((filenames->tableSize > 1) & (g_displayLevel==2)) g_displayLevel=1;
/* IO Stream/File */
FIO_setNotificationLevel(g_displayLevel);
FIO_setPatchFromMode(prefs, patchFromDictFileName != NULL);
if (patchFromDictFileName != NULL) {
dictFileName = patchFromDictFileName;
}
if (memLimit == 0) {
if (compressionParams.windowLog == 0) {
memLimit = (U32)1 << g_defaultMaxWindowLog;
} else {
memLimit = (U32)1 << (compressionParams.windowLog & 31);
} }
FIO_setMemLimit(prefs, memLimit);
if (operation==zom_compress) {
#ifndef ZSTD_NOCOMPRESS
FIO_setNbWorkers(prefs, nbWorkers);
@ -1204,13 +1222,6 @@ int main(int const argCount, const char* argv[])
#endif
} else { /* decompression or test */
#ifndef ZSTD_NODECOMPRESS
if (memLimit == 0) {
if (compressionParams.windowLog == 0) {
memLimit = (U32)1 << g_defaultMaxWindowLog;
} else {
memLimit = (U32)1 << (compressionParams.windowLog & 31);
} }
FIO_setMemLimit(prefs, memLimit);
if (filenames->tableSize == 1 && outFileName) {
operationResult = FIO_decompressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName);
} else {

View File

@ -1202,6 +1202,14 @@ then
$ZSTD -f -vv --rsyncable --single-thread tmp && die "--rsyncable must fail with --single-thread"
fi
println "\n===> patch-from tests"
./datagen -g1000 -P50 > tmp_dict
./datagen -g1000 -P10 > tmp_patch
$ZSTD --memory=10000 --patch-from=tmp_dict tmp_patch -o tmp_patch_diff
$ZSTD -d --memory=10000 --patch-from=tmp_dict tmp_patch_diff -o tmp_patch_recon
$DIFF -s tmp_patch_recon tmp_patch
rm -rf tmp_*
println "\n===> large files tests "