diff --git a/programs/fileio.c b/programs/fileio.c index 828878c6..c0d6494e 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -319,6 +319,8 @@ struct FIO_prefs_s { /* Computation resources preferences */ unsigned memLimit; int nbWorkers; + + int excludeCompressedFiles; }; @@ -359,6 +361,7 @@ FIO_prefs_t* FIO_createPreferences(void) ret->srcSizeHint = 0; ret->testMode = 0; ret->literalCompressionMode = ZSTD_lcm_auto; + ret->excludeCompressedFiles = 0; return ret; } @@ -402,6 +405,8 @@ void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) { prefs->nbWorkers = nbWorkers; } +void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; } + void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) { if (blockSize && prefs->nbWorkers==0) DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n"); @@ -1425,6 +1430,21 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs, return result; } +/* List used to compare file extensions (used with --exclude-compressed flag) +* Different from the suffixList and should only apply to ZSTD compress operationResult +*/ +static const char *compressedFileExtensions[] = { + ZSTD_EXTENSION, + TZSTD_EXTENSION, + GZ_EXTENSION, + TGZ_EXTENSION, + LZMA_EXTENSION, + XZ_EXTENSION, + TXZ_EXTENSION, + LZ4_EXTENSION, + TLZ4_EXTENSION, + NULL +}; /*! FIO_compressFilename_srcFile() : * @return : 0 : compression completed correctly, @@ -1451,6 +1471,15 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs, return 1; } + /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used + * YES => ZSTD will skip compression of the file and will return 0. + * NO => ZSTD will resume with compress operation. + */ + if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) { + DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName); + return 0; + } + ress.srcFile = FIO_openSrcFile(srcFileName); if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */ diff --git a/programs/fileio.h b/programs/fileio.h index af2c5d9d..a7da089f 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -93,6 +93,7 @@ void FIO_setLiteralCompressionMode( void FIO_setNoProgress(unsigned noProgress); void FIO_setNotificationLevel(int level); +void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles); /*-************************************* * Single File functions diff --git a/programs/util.c b/programs/util.c index 58705880..2143d178 100644 --- a/programs/util.c +++ b/programs/util.c @@ -326,6 +326,27 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char #endif /* #ifdef _WIN32 */ +int UTIL_isCompressedFile(const char *inputName, const char *extensionList[]) +{ + const char* ext = UTIL_getFileExtension(inputName); + while(*extensionList!=NULL) + { + const int isCompressedExtension = strcmp(ext,*extensionList); + if(isCompressedExtension==0) + return 1; + ++extensionList; + } + return 0; +} + +/*Utility function to get file extension from file */ +const char* UTIL_getFileExtension(const char* infilename) +{ + const char* extension = strrchr(infilename, '.'); + if(!extension || extension==infilename) return ""; + return extension; +} + /* * UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories, * and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb). diff --git a/programs/util.h b/programs/util.h index 71ba0d4f..0d0642cb 100644 --- a/programs/util.h +++ b/programs/util.h @@ -40,7 +40,6 @@ extern "C" { #include /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */ #include "mem.h" /* U32, U64 */ - /*-************************************************************ * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW ***************************************************************/ @@ -135,6 +134,8 @@ U32 UTIL_isDirectory(const char* infilename); int UTIL_getFileStat(const char* infilename, stat_t* statbuf); int UTIL_isSameFile(const char* file1, const char* file2); int UTIL_compareStr(const void *p1, const void *p2); +int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]); +const char* UTIL_getFileExtension(const char* infilename); U32 UTIL_isFIFO(const char* infilename); U32 UTIL_isLink(const char* infilename); diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 3d5c4280..5463c019 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -136,6 +136,7 @@ static int usage_advanced(const char* programName) DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n"); DISPLAY( " -c : force write to standard output, even if it is the console\n"); DISPLAY( " -l : print information about zstd compressed files \n"); + DISPLAY( "--exclude-compressed: only compress files that are not previously compressed \n"); #ifndef ZSTD_NOCOMPRESS DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); @@ -708,7 +709,7 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_lcm_huffman; continue; } if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; } if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; } - + if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; } /* long commands with arguments */ #ifndef ZSTD_NODICT if (longCommandWArg(&argument, "--train-cover")) { diff --git a/tests/playTests.sh b/tests/playTests.sh index c1da1650..4672ecaa 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -215,6 +215,37 @@ $ZSTD tmp -c --compress-literals -19 | $ZSTD -t $ZSTD -b --fast=1 -i0e1 tmp --compress-literals $ZSTD -b --fast=1 -i0e1 tmp --no-compress-literals +println "test: --exclude-compressed flag" +rm -rf precompressedFilterTestDir +mkdir -p precompressedFilterTestDir +./datagen $size > precompressedFilterTestDir/input.5 +./datagen $size > precompressedFilterTestDir/input.6 +$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir +sleep 5 +./datagen $size > precompressedFilterTestDir/input.7 +./datagen $size > precompressedFilterTestDir/input.8 +$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir +test ! -f precompressedFilterTestDir/input.5.zst.zst +test ! -f precompressedFilterTestDir/input.6.zst.zst +file1timestamp=`date -r precompressedFilterTestDir/input.5.zst +%s` +file2timestamp=`date -r precompressedFilterTestDir/input.7.zst +%s` +if [[ $file2timestamp -ge $file1timestamp ]]; then + println "Test is successful. input.5.zst is precompressed and therefore not compressed/modified again." +else + println "Test is not successful" +fi +#File Extension check. +./datagen $size > precompressedFilterTestDir/input.zstbar +$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir +#ZSTD should compress input.zstbar +test -f precompressedFilterTestDir/input.zstbar.zst +#Check without the --exclude-compressed flag +$ZSTD --long --rm -r precompressedFilterTestDir +#Files should get compressed again without the --exclude-compressed flag. +test -f precompressedFilterTestDir/input.5.zst.zst +test -f precompressedFilterTestDir/input.6.zst.zst +println "Test completed" + println "test : file removal" $ZSTD -f --rm tmp test ! -f tmp # tmp should no longer be present