Adding new cli endpoint --patch-from= (#1940)
* Adding new cli endpoint --diff-from= * Appveyor conversion nit * Using bool set trick instead of direct set * Removing --diff-from and only leaving --diff-from=# * Throwing error when both dictFileName vars are set * Clean up syntax * Renaming diff-from to patch-from * Revering comma separated syntax clean up * Updating playtests with patch-from * Uncommenting accidentally commented * Updating remaining docs and var names to be patch-from instead of diff-from * Constifying * Using existing log2 function and removing newly created one * Argument order (moving prefs to end) * Using comma separated syntax * Moving to outside #ifndef
This commit is contained in:
parent
d1cc9d2797
commit
f25a6e9f8f
@ -77,6 +77,7 @@
|
|||||||
|
|
||||||
#define FNSPACE 30
|
#define FNSPACE 30
|
||||||
|
|
||||||
|
#define PATCHFROM_WINDOWSIZE_EXTRA_BYTES 1 KB
|
||||||
|
|
||||||
/*-*************************************
|
/*-*************************************
|
||||||
* Macros
|
* Macros
|
||||||
@ -321,6 +322,7 @@ struct FIO_prefs_s {
|
|||||||
int nbWorkers;
|
int nbWorkers;
|
||||||
|
|
||||||
int excludeCompressedFiles;
|
int excludeCompressedFiles;
|
||||||
|
int patchFromMode;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -487,6 +489,10 @@ void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) {
|
|||||||
prefs->ldmHashRateLog = ldmHashRateLog;
|
prefs->ldmHashRateLog = ldmHashRateLog;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value)
|
||||||
|
{
|
||||||
|
prefs->patchFromMode = value != 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*-*************************************
|
/*-*************************************
|
||||||
* Functions
|
* Functions
|
||||||
@ -624,7 +630,7 @@ FIO_openDstFile(FIO_prefs_t* const prefs,
|
|||||||
* @return : loaded size
|
* @return : loaded size
|
||||||
* if fileName==NULL, returns 0 and a NULL pointer
|
* if fileName==NULL, returns 0 and a NULL pointer
|
||||||
*/
|
*/
|
||||||
static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName)
|
static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_prefs_t* const prefs)
|
||||||
{
|
{
|
||||||
FILE* fileHandle;
|
FILE* fileHandle;
|
||||||
U64 fileSize;
|
U64 fileSize;
|
||||||
@ -638,9 +644,12 @@ static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName)
|
|||||||
if (fileHandle==NULL) EXM_THROW(31, "%s: %s", fileName, strerror(errno));
|
if (fileHandle==NULL) EXM_THROW(31, "%s: %s", fileName, strerror(errno));
|
||||||
|
|
||||||
fileSize = UTIL_getFileSize(fileName);
|
fileSize = UTIL_getFileSize(fileName);
|
||||||
if (fileSize > DICTSIZE_MAX) {
|
{
|
||||||
EXM_THROW(32, "Dictionary file %s is too large (> %u MB)",
|
size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
|
||||||
fileName, DICTSIZE_MAX >> 20); /* avoid extreme cases */
|
if (fileSize > dictSizeMax) {
|
||||||
|
EXM_THROW(32, "Dictionary file %s is too large (> %u bytes)",
|
||||||
|
fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
|
||||||
|
}
|
||||||
}
|
}
|
||||||
*bufferPtr = malloc((size_t)fileSize);
|
*bufferPtr = malloc((size_t)fileSize);
|
||||||
if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
|
if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
|
||||||
@ -743,6 +752,20 @@ FIO_createFilename_fromOutDir(const char* path, const char* outDirName, const si
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* FIO_highbit64() :
|
||||||
|
* gives position of highest bit.
|
||||||
|
* note : only works for v > 0 !
|
||||||
|
*/
|
||||||
|
static unsigned FIO_highbit64(unsigned long long v)
|
||||||
|
{
|
||||||
|
unsigned count = 0;
|
||||||
|
assert(v != 0);
|
||||||
|
v >>= 1;
|
||||||
|
while (v) { v >>= 1; count++; }
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifndef ZSTD_NOCOMPRESS
|
#ifndef ZSTD_NOCOMPRESS
|
||||||
|
|
||||||
/* **********************************************************************
|
/* **********************************************************************
|
||||||
@ -760,8 +783,8 @@ typedef struct {
|
|||||||
} cRess_t;
|
} cRess_t;
|
||||||
|
|
||||||
static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
|
static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
|
||||||
const char* dictFileName, int cLevel,
|
const char* dictFileName, const size_t maxSrcFileSize,
|
||||||
ZSTD_compressionParameters comprParams) {
|
int cLevel, ZSTD_compressionParameters comprParams) {
|
||||||
cRess_t ress;
|
cRess_t ress;
|
||||||
memset(&ress, 0, sizeof(ress));
|
memset(&ress, 0, sizeof(ress));
|
||||||
|
|
||||||
@ -779,7 +802,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
|
|||||||
|
|
||||||
/* Advanced parameters, including dictionary */
|
/* Advanced parameters, including dictionary */
|
||||||
{ void* dictBuffer;
|
{ void* dictBuffer;
|
||||||
size_t const dictBuffSize = FIO_createDictBuffer(&dictBuffer, dictFileName); /* works with dictFileName==NULL */
|
size_t const dictBuffSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs); /* works with dictFileName==NULL */
|
||||||
if (dictFileName && (dictBuffer==NULL))
|
if (dictFileName && (dictBuffer==NULL))
|
||||||
EXM_THROW(32, "allocation error : can't create dictBuffer");
|
EXM_THROW(32, "allocation error : can't create dictBuffer");
|
||||||
ress.dictFileName = dictFileName;
|
ress.dictFileName = dictFileName;
|
||||||
@ -787,6 +810,10 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
|
|||||||
if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog)
|
if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog)
|
||||||
comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT;
|
comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT;
|
||||||
|
|
||||||
|
if (prefs->patchFromMode) {
|
||||||
|
comprParams.windowLog = FIO_highbit64((unsigned long long)maxSrcFileSize + PATCHFROM_WINDOWSIZE_EXTRA_BYTES);
|
||||||
|
}
|
||||||
|
|
||||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 1) ); /* always enable content size when available (note: supposed to be default) */
|
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 1) ); /* always enable content size when available (note: supposed to be default) */
|
||||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) );
|
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) );
|
||||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
|
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
|
||||||
@ -1515,7 +1542,7 @@ int FIO_compressFilename(FIO_prefs_t* const prefs, const char* dstFileName,
|
|||||||
const char* srcFileName, const char* dictFileName,
|
const char* srcFileName, const char* dictFileName,
|
||||||
int compressionLevel, ZSTD_compressionParameters comprParams)
|
int compressionLevel, ZSTD_compressionParameters comprParams)
|
||||||
{
|
{
|
||||||
cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
|
cRess_t const ress = FIO_createCResources(prefs, dictFileName, (size_t)UTIL_getFileSize(srcFileName), compressionLevel, comprParams);
|
||||||
int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
|
int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
|
||||||
|
|
||||||
|
|
||||||
@ -1563,6 +1590,15 @@ FIO_determineCompressedName(const char* srcFileName, const char* outDirName, con
|
|||||||
return dstFileNameBuffer;
|
return dstFileNameBuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t FIO_getLargestFileSize(const char** inFileNames, unsigned nbFiles)
|
||||||
|
{
|
||||||
|
size_t i, fileSize, maxFileSize = 0;
|
||||||
|
for (i = 0; i < nbFiles; i++) {
|
||||||
|
fileSize = (size_t)UTIL_getFileSize(inFileNames[i]);
|
||||||
|
maxFileSize = fileSize > maxFileSize ? fileSize : maxFileSize;
|
||||||
|
}
|
||||||
|
return maxFileSize;
|
||||||
|
}
|
||||||
|
|
||||||
/* FIO_compressMultipleFilenames() :
|
/* FIO_compressMultipleFilenames() :
|
||||||
* compress nbFiles files
|
* compress nbFiles files
|
||||||
@ -1578,7 +1614,9 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
|
|||||||
ZSTD_compressionParameters comprParams)
|
ZSTD_compressionParameters comprParams)
|
||||||
{
|
{
|
||||||
int error = 0;
|
int error = 0;
|
||||||
cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
|
cRess_t ress = FIO_createCResources(prefs, dictFileName,
|
||||||
|
FIO_getLargestFileSize(inFileNamesTable, nbFiles),
|
||||||
|
compressionLevel, comprParams);
|
||||||
|
|
||||||
/* init */
|
/* init */
|
||||||
assert(outFileName != NULL || suffix != NULL);
|
assert(outFileName != NULL || suffix != NULL);
|
||||||
@ -1648,7 +1686,7 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi
|
|||||||
|
|
||||||
/* dictionary */
|
/* dictionary */
|
||||||
{ void* dictBuffer;
|
{ void* dictBuffer;
|
||||||
size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName);
|
size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs);
|
||||||
CHECK( ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize) );
|
CHECK( ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize) );
|
||||||
free(dictBuffer);
|
free(dictBuffer);
|
||||||
}
|
}
|
||||||
@ -1793,19 +1831,6 @@ static int FIO_passThrough(const FIO_prefs_t* const prefs,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* FIO_highbit64() :
|
|
||||||
* gives position of highest bit.
|
|
||||||
* note : only works for v > 0 !
|
|
||||||
*/
|
|
||||||
static unsigned FIO_highbit64(unsigned long long v)
|
|
||||||
{
|
|
||||||
unsigned count = 0;
|
|
||||||
assert(v != 0);
|
|
||||||
v >>= 1;
|
|
||||||
while (v) { v >>= 1; count++; }
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* FIO_zstdErrorHelp() :
|
/* FIO_zstdErrorHelp() :
|
||||||
* detailed error message when requested window size is too large */
|
* detailed error message when requested window size is too large */
|
||||||
static void
|
static void
|
||||||
|
@ -94,6 +94,7 @@ void FIO_setLiteralCompressionMode(
|
|||||||
void FIO_setNoProgress(unsigned noProgress);
|
void FIO_setNoProgress(unsigned noProgress);
|
||||||
void FIO_setNotificationLevel(int level);
|
void FIO_setNotificationLevel(int level);
|
||||||
void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles);
|
void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles);
|
||||||
|
void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value);
|
||||||
|
|
||||||
/*-*************************************
|
/*-*************************************
|
||||||
* Single File functions
|
* Single File functions
|
||||||
|
@ -122,11 +122,20 @@ the last one takes effect.
|
|||||||
|
|
||||||
Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
|
Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
|
||||||
`--memory=windowSize` needs to be passed to the decompressor.
|
`--memory=windowSize` needs to be passed to the decompressor.
|
||||||
|
* `--patch-from=FILE`:
|
||||||
|
Specify the file to be used as a reference point for zstd's diff engine.
|
||||||
|
This is effectively dictionary compression with some convenient parameter
|
||||||
|
selection, namely that windowSize > srcSize.
|
||||||
|
|
||||||
|
Note: cannot use both this and -D together
|
||||||
* `-M#`, `--memory=#`:
|
* `-M#`, `--memory=#`:
|
||||||
Set a memory usage limit for decompression. By default, Zstandard uses 128 MB
|
Set a memory usage limit. By default, Zstandard uses 128 MB for decompression
|
||||||
as the maximum amount of memory the decompressor is allowed to use, but you can
|
as the maximum amount of memory the decompressor is allowed to use, but you can
|
||||||
override this manually if need be in either direction (ie. you can increase or
|
override this manually if need be in either direction (ie. you can increase or
|
||||||
decrease it).
|
decrease it).
|
||||||
|
|
||||||
|
This is also used during compression when using with --patch-from=. In this case,
|
||||||
|
this parameter overrides that maximum size allowed for a dictionary. (128 MB).
|
||||||
* `-T#`, `--threads=#`:
|
* `-T#`, `--threads=#`:
|
||||||
Compress using `#` working threads (default: 1).
|
Compress using `#` working threads (default: 1).
|
||||||
If `#` is 0, attempt to detect and use the number of physical CPU cores.
|
If `#` is 0, attempt to detect and use the number of physical CPU cores.
|
||||||
|
@ -597,6 +597,7 @@ int main(int const argCount, const char* argv[])
|
|||||||
const char* outFileName = NULL;
|
const char* outFileName = NULL;
|
||||||
const char* outDirName = NULL;
|
const char* outDirName = NULL;
|
||||||
const char* dictFileName = NULL;
|
const char* dictFileName = NULL;
|
||||||
|
const char* patchFromDictFileName = NULL;
|
||||||
const char* suffix = ZSTD_EXTENSION;
|
const char* suffix = ZSTD_EXTENSION;
|
||||||
unsigned maxDictSize = g_defaultMaxDictSize;
|
unsigned maxDictSize = g_defaultMaxDictSize;
|
||||||
unsigned dictID = 0;
|
unsigned dictID = 0;
|
||||||
@ -618,7 +619,7 @@ int main(int const argCount, const char* argv[])
|
|||||||
|
|
||||||
/* init */
|
/* init */
|
||||||
(void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */
|
(void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */
|
||||||
(void)memLimit; /* not used when ZSTD_NODECOMPRESS set */
|
(void)memLimit;
|
||||||
assert(argCount >= 1);
|
assert(argCount >= 1);
|
||||||
if ((filenames==NULL) || (file_of_names==NULL)) { DISPLAY("zstd: allocation error \n"); exit(1); }
|
if ((filenames==NULL) || (file_of_names==NULL)) { DISPLAY("zstd: allocation error \n"); exit(1); }
|
||||||
programName = lastNameFromPath(programName);
|
programName = lastNameFromPath(programName);
|
||||||
@ -758,6 +759,7 @@ int main(int const argCount, const char* argv[])
|
|||||||
if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
|
if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
|
||||||
if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; }
|
if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; }
|
||||||
if (longCommandWArg(&argument, "--output-dir-flat=")) { outDirName = argument; continue; }
|
if (longCommandWArg(&argument, "--output-dir-flat=")) { outDirName = argument; continue; }
|
||||||
|
if (longCommandWArg(&argument, "--patch-from=")) { patchFromDictFileName = argument; continue; }
|
||||||
if (longCommandWArg(&argument, "--long")) {
|
if (longCommandWArg(&argument, "--long")) {
|
||||||
unsigned ldmWindowLog = 0;
|
unsigned ldmWindowLog = 0;
|
||||||
ldmFlag = 1;
|
ldmFlag = 1;
|
||||||
@ -868,7 +870,7 @@ int main(int const argCount, const char* argv[])
|
|||||||
/* destination file name */
|
/* destination file name */
|
||||||
case 'o': nextArgumentIsOutFileName=1; lastCommand=1; argument++; break;
|
case 'o': nextArgumentIsOutFileName=1; lastCommand=1; argument++; break;
|
||||||
|
|
||||||
/* limit decompression memory */
|
/* limit memory */
|
||||||
case 'M':
|
case 'M':
|
||||||
argument++;
|
argument++;
|
||||||
memLimit = readU32FromChar(&argument);
|
memLimit = readU32FromChar(&argument);
|
||||||
@ -1167,12 +1169,28 @@ int main(int const argCount, const char* argv[])
|
|||||||
} }
|
} }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (dictFileName != NULL && patchFromDictFileName != NULL) {
|
||||||
|
DISPLAY("error : can't use -D and --patch-from=# at the same time \n");
|
||||||
|
CLEAN_RETURN(1);
|
||||||
|
}
|
||||||
|
|
||||||
/* No status message in pipe mode (stdin - stdout) or multi-files mode */
|
/* No status message in pipe mode (stdin - stdout) or multi-files mode */
|
||||||
if (!strcmp(filenames->fileNames[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (g_displayLevel==2)) g_displayLevel=1;
|
if (!strcmp(filenames->fileNames[0], stdinmark) && outFileName && !strcmp(outFileName,stdoutmark) && (g_displayLevel==2)) g_displayLevel=1;
|
||||||
if ((filenames->tableSize > 1) & (g_displayLevel==2)) g_displayLevel=1;
|
if ((filenames->tableSize > 1) & (g_displayLevel==2)) g_displayLevel=1;
|
||||||
|
|
||||||
/* IO Stream/File */
|
/* IO Stream/File */
|
||||||
FIO_setNotificationLevel(g_displayLevel);
|
FIO_setNotificationLevel(g_displayLevel);
|
||||||
|
FIO_setPatchFromMode(prefs, patchFromDictFileName != NULL);
|
||||||
|
if (patchFromDictFileName != NULL) {
|
||||||
|
dictFileName = patchFromDictFileName;
|
||||||
|
}
|
||||||
|
if (memLimit == 0) {
|
||||||
|
if (compressionParams.windowLog == 0) {
|
||||||
|
memLimit = (U32)1 << g_defaultMaxWindowLog;
|
||||||
|
} else {
|
||||||
|
memLimit = (U32)1 << (compressionParams.windowLog & 31);
|
||||||
|
} }
|
||||||
|
FIO_setMemLimit(prefs, memLimit);
|
||||||
if (operation==zom_compress) {
|
if (operation==zom_compress) {
|
||||||
#ifndef ZSTD_NOCOMPRESS
|
#ifndef ZSTD_NOCOMPRESS
|
||||||
FIO_setNbWorkers(prefs, nbWorkers);
|
FIO_setNbWorkers(prefs, nbWorkers);
|
||||||
@ -1204,13 +1222,6 @@ int main(int const argCount, const char* argv[])
|
|||||||
#endif
|
#endif
|
||||||
} else { /* decompression or test */
|
} else { /* decompression or test */
|
||||||
#ifndef ZSTD_NODECOMPRESS
|
#ifndef ZSTD_NODECOMPRESS
|
||||||
if (memLimit == 0) {
|
|
||||||
if (compressionParams.windowLog == 0) {
|
|
||||||
memLimit = (U32)1 << g_defaultMaxWindowLog;
|
|
||||||
} else {
|
|
||||||
memLimit = (U32)1 << (compressionParams.windowLog & 31);
|
|
||||||
} }
|
|
||||||
FIO_setMemLimit(prefs, memLimit);
|
|
||||||
if (filenames->tableSize == 1 && outFileName) {
|
if (filenames->tableSize == 1 && outFileName) {
|
||||||
operationResult = FIO_decompressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName);
|
operationResult = FIO_decompressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1202,6 +1202,14 @@ then
|
|||||||
$ZSTD -f -vv --rsyncable --single-thread tmp && die "--rsyncable must fail with --single-thread"
|
$ZSTD -f -vv --rsyncable --single-thread tmp && die "--rsyncable must fail with --single-thread"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
println "\n===> patch-from tests"
|
||||||
|
|
||||||
|
./datagen -g1000 -P50 > tmp_dict
|
||||||
|
./datagen -g1000 -P10 > tmp_patch
|
||||||
|
$ZSTD --memory=10000 --patch-from=tmp_dict tmp_patch -o tmp_patch_diff
|
||||||
|
$ZSTD -d --memory=10000 --patch-from=tmp_dict tmp_patch_diff -o tmp_patch_recon
|
||||||
|
$DIFF -s tmp_patch_recon tmp_patch
|
||||||
|
rm -rf tmp_*
|
||||||
|
|
||||||
println "\n===> large files tests "
|
println "\n===> large files tests "
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user