From 55ee7d56e4693057922cbd52b26b8ec1d4220bee Mon Sep 17 00:00:00 2001 From: Shashank Tavildar Date: Fri, 25 Oct 2019 15:49:11 -0700 Subject: [PATCH 01/11] Added --exclude-compressed flag feature that skips compression of precompressed files --- programs/fileio.c | 7 ++++++- programs/util.c | 21 +++++++++++++++++++++ programs/util.h | 4 ++++ programs/zstdcli.c | 3 ++- tests/playTests.sh | 19 +++++++++++++++++++ 5 files changed, 52 insertions(+), 2 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 0365f144..5172aa54 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1453,7 +1453,12 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs, ress.srcFile = FIO_openSrcFile(srcFileName); if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */ - + if (g_excludeCompressedFiles && !UTIL_isPrecompressedFile(srcFileName)) { /* precompressed file (--exclude-compressed). DO NOT COMPRESS */ + DISPLAYLEVEL(4, "Precompressed file: %s \n", srcFileName); + fclose(ress.srcFile); + ress.srcFile = NULL; + return 0; + } result = FIO_compressFilename_dstFile(prefs, ress, dstFileName, srcFileName, compressionLevel); fclose(ress.srcFile); diff --git a/programs/util.c b/programs/util.c index 58705880..830f2039 100644 --- a/programs/util.c +++ b/programs/util.c @@ -326,6 +326,27 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char #endif /* #ifdef _WIN32 */ +/* Check if the file is precompressed (.zst, .lz4, .gz, .xz). +YES => Skip the file (return 0) +NO => return 1 +*/ +int UTIL_isPrecompressedFile(const char *inputName) +{ + return compareExtensions(inputName,compressedFileExtensions); +} + +int compareExtensions(const char* infilename, const char extensionList[4][10]) +{ + int i=0; + //char* ext = strchr(infilename, '.'); + for(i=0;i<4;i++) + { + char* ext = strstr(infilename,extensionList[i]); + if(ext) + return 0; + } + return 1; +} /* * UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories, * and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb). diff --git a/programs/util.h b/programs/util.h index 71ba0d4f..0da6255c 100644 --- a/programs/util.h +++ b/programs/util.h @@ -127,6 +127,8 @@ extern int g_utilDisplayLevel; typedef struct stat stat_t; #endif +int g_excludeCompressedFiles; +static const char compressedFileExtensions[4][10] = {".zst",".gz",".xz",".lz4"}; int UTIL_fileExist(const char* filename); int UTIL_isRegularFile(const char* infilename); @@ -135,6 +137,8 @@ U32 UTIL_isDirectory(const char* infilename); int UTIL_getFileStat(const char* infilename, stat_t* statbuf); int UTIL_isSameFile(const char* file1, const char* file2); int UTIL_compareStr(const void *p1, const void *p2); +int UTIL_isPrecompressedFile(const char* infilename); +int compareExtensions(const char* infilename, const char extensionList[4][10]); U32 UTIL_isFIFO(const char* infilename); U32 UTIL_isLink(const char* infilename); diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 3d5c4280..11116afc 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -118,6 +118,7 @@ static int usage(const char* programName) #endif DISPLAY( " -D file: use `file` as Dictionary \n"); DISPLAY( " -o file: result stored into `file` (only if 1 input file) \n"); + DISPLAY( "--exclude-compressed: only compress files that are not previously compressed \n"); DISPLAY( " -f : overwrite output without prompting and (de)compress links \n"); DISPLAY( "--rm : remove source file(s) after successful de/compression \n"); DISPLAY( " -k : preserve source file(s) (default) \n"); @@ -708,7 +709,7 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_lcm_huffman; continue; } if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; } if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; } - + if (!strcmp(argument, "--exclude-compressed")) { g_excludeCompressedFiles = 1; continue; } /* long commands with arguments */ #ifndef ZSTD_NODICT if (longCommandWArg(&argument, "--train-cover")) { diff --git a/tests/playTests.sh b/tests/playTests.sh index f68ee81a..0946c032 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -215,6 +215,25 @@ $ZSTD tmp -c --compress-literals -19 | $ZSTD -t $ZSTD -b --fast=1 -i0e1 tmp --compress-literals $ZSTD -b --fast=1 -i0e1 tmp --no-compress-literals +println "test: --exclude-compressed flag" +mkdir precompressedFilterTestDir +./datagen $size > precompressedFilterTestDir/input.5 +./datagen $size > precompressedFilterTestDir/input.6 +$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir +sleep 5 +./datagen $size > precompressedFilterTestDir/input.7 +./datagen $size > precompressedFilterTestDir/input.8 +$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir +file1timestamp=`date -r precompressedFilterTestDir/input.5.zst +%s` +file2timestamp=`date -r precompressedFilterTestDir/input.7.zst +%s` +if [[ $file2timestamp -ge $file1timestamp ]]; then + println "Test is successful. input.5.zst is not precompressed and therefore not compressed/modified again." +else + println "Test is not successful" +fi +println "Test completed" +sleep 5 + println "test : file removal" $ZSTD -f --rm tmp test ! -f tmp # tmp should no longer be present From 48f856640e802d7f15990d2973f3fb7562f18ce5 Mon Sep 17 00:00:00 2001 From: Shashank Tavildar Date: Fri, 25 Oct 2019 15:49:11 -0700 Subject: [PATCH 02/11] Added --exclude-compressed flag feature that skips compression of precompressed files --- programs/fileio.c | 7 ++++++- programs/util.c | 21 +++++++++++++++++++++ programs/util.h | 4 ++++ programs/zstdcli.c | 3 ++- tests/playTests.sh | 19 +++++++++++++++++++ 5 files changed, 52 insertions(+), 2 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 828878c6..ff3401b5 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1453,7 +1453,12 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs, ress.srcFile = FIO_openSrcFile(srcFileName); if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */ - + if (g_excludeCompressedFiles && !UTIL_isPrecompressedFile(srcFileName)) { /* precompressed file (--exclude-compressed). DO NOT COMPRESS */ + DISPLAYLEVEL(4, "Precompressed file: %s \n", srcFileName); + fclose(ress.srcFile); + ress.srcFile = NULL; + return 0; + } result = FIO_compressFilename_dstFile(prefs, ress, dstFileName, srcFileName, compressionLevel); fclose(ress.srcFile); diff --git a/programs/util.c b/programs/util.c index 58705880..830f2039 100644 --- a/programs/util.c +++ b/programs/util.c @@ -326,6 +326,27 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char #endif /* #ifdef _WIN32 */ +/* Check if the file is precompressed (.zst, .lz4, .gz, .xz). +YES => Skip the file (return 0) +NO => return 1 +*/ +int UTIL_isPrecompressedFile(const char *inputName) +{ + return compareExtensions(inputName,compressedFileExtensions); +} + +int compareExtensions(const char* infilename, const char extensionList[4][10]) +{ + int i=0; + //char* ext = strchr(infilename, '.'); + for(i=0;i<4;i++) + { + char* ext = strstr(infilename,extensionList[i]); + if(ext) + return 0; + } + return 1; +} /* * UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories, * and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb). diff --git a/programs/util.h b/programs/util.h index 71ba0d4f..0da6255c 100644 --- a/programs/util.h +++ b/programs/util.h @@ -127,6 +127,8 @@ extern int g_utilDisplayLevel; typedef struct stat stat_t; #endif +int g_excludeCompressedFiles; +static const char compressedFileExtensions[4][10] = {".zst",".gz",".xz",".lz4"}; int UTIL_fileExist(const char* filename); int UTIL_isRegularFile(const char* infilename); @@ -135,6 +137,8 @@ U32 UTIL_isDirectory(const char* infilename); int UTIL_getFileStat(const char* infilename, stat_t* statbuf); int UTIL_isSameFile(const char* file1, const char* file2); int UTIL_compareStr(const void *p1, const void *p2); +int UTIL_isPrecompressedFile(const char* infilename); +int compareExtensions(const char* infilename, const char extensionList[4][10]); U32 UTIL_isFIFO(const char* infilename); U32 UTIL_isLink(const char* infilename); diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 3d5c4280..11116afc 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -118,6 +118,7 @@ static int usage(const char* programName) #endif DISPLAY( " -D file: use `file` as Dictionary \n"); DISPLAY( " -o file: result stored into `file` (only if 1 input file) \n"); + DISPLAY( "--exclude-compressed: only compress files that are not previously compressed \n"); DISPLAY( " -f : overwrite output without prompting and (de)compress links \n"); DISPLAY( "--rm : remove source file(s) after successful de/compression \n"); DISPLAY( " -k : preserve source file(s) (default) \n"); @@ -708,7 +709,7 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_lcm_huffman; continue; } if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; } if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; } - + if (!strcmp(argument, "--exclude-compressed")) { g_excludeCompressedFiles = 1; continue; } /* long commands with arguments */ #ifndef ZSTD_NODICT if (longCommandWArg(&argument, "--train-cover")) { diff --git a/tests/playTests.sh b/tests/playTests.sh index c1da1650..9294bf81 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -215,6 +215,25 @@ $ZSTD tmp -c --compress-literals -19 | $ZSTD -t $ZSTD -b --fast=1 -i0e1 tmp --compress-literals $ZSTD -b --fast=1 -i0e1 tmp --no-compress-literals +println "test: --exclude-compressed flag" +mkdir precompressedFilterTestDir +./datagen $size > precompressedFilterTestDir/input.5 +./datagen $size > precompressedFilterTestDir/input.6 +$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir +sleep 5 +./datagen $size > precompressedFilterTestDir/input.7 +./datagen $size > precompressedFilterTestDir/input.8 +$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir +file1timestamp=`date -r precompressedFilterTestDir/input.5.zst +%s` +file2timestamp=`date -r precompressedFilterTestDir/input.7.zst +%s` +if [[ $file2timestamp -ge $file1timestamp ]]; then + println "Test is successful. input.5.zst is not precompressed and therefore not compressed/modified again." +else + println "Test is not successful" +fi +println "Test completed" +sleep 5 + println "test : file removal" $ZSTD -f --rm tmp test ! -f tmp # tmp should no longer be present From 02433e0b1556c8c9d769471226b12209dd6512c8 Mon Sep 17 00:00:00 2001 From: Shashank Tavildar Date: Mon, 28 Oct 2019 14:54:54 -0700 Subject: [PATCH 03/11] Addressing comments: -Created a list of extensions defined in fileio.h, -Updated test --- programs/fileio.c | 9 +++++++-- programs/util.c | 17 ++++++++--------- programs/util.h | 19 +++++++++++++++---- programs/zstdcli.c | 2 +- tests/playTests.sh | 1 - 5 files changed, 31 insertions(+), 17 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index ff3401b5..7cbf0280 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1453,8 +1453,13 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs, ress.srcFile = FIO_openSrcFile(srcFileName); if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */ - if (g_excludeCompressedFiles && !UTIL_isPrecompressedFile(srcFileName)) { /* precompressed file (--exclude-compressed). DO NOT COMPRESS */ - DISPLAYLEVEL(4, "Precompressed file: %s \n", srcFileName); + + /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used + * YES => ZSTD will not compress the file. + * NO => ZSTD will resume with compress operation. + */ + if (g_excludeCompressedFiles && UTIL_isCompressedFile(srcFileName)) { /* precompressed file (--exclude-compressed). DO NOT COMPRESS */ + DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName); fclose(ress.srcFile); ress.srcFile = NULL; return 0; diff --git a/programs/util.c b/programs/util.c index 830f2039..63e9ef9e 100644 --- a/programs/util.c +++ b/programs/util.c @@ -330,22 +330,21 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char YES => Skip the file (return 0) NO => return 1 */ -int UTIL_isPrecompressedFile(const char *inputName) +int UTIL_isCompressedFile(const char *inputName) { - return compareExtensions(inputName,compressedFileExtensions); + return compareExtensions(inputName,g_compressedFileExtensions); } -int compareExtensions(const char* infilename, const char extensionList[4][10]) +int compareExtensions(const char* infilename, const char* extensionList[]) { - int i=0; - //char* ext = strchr(infilename, '.'); - for(i=0;i<4;i++) + while(*extensionList != NULL) { - char* ext = strstr(infilename,extensionList[i]); + const char* ext = strstr(infilename,extensionList[i]); if(ext) - return 0; + return 1; + ++extensionList; } - return 1; + return 0; } /* * UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories, diff --git a/programs/util.h b/programs/util.h index 0da6255c..deb70786 100644 --- a/programs/util.h +++ b/programs/util.h @@ -39,7 +39,7 @@ extern "C" { #endif #include /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */ #include "mem.h" /* U32, U64 */ - +#include "fileio.h" /*-************************************************************ * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW @@ -128,7 +128,18 @@ extern int g_utilDisplayLevel; #endif int g_excludeCompressedFiles; -static const char compressedFileExtensions[4][10] = {".zst",".gz",".xz",".lz4"}; +static const char *g_compressedFileExtensions[] = { + ZSTD_EXTENSION, + TZSTD_EXTENSION, + GZ_EXTENSION, + TGZ_EXTENSION, + LZMA_EXTENSION, + XZ_EXTENSION, + TXZ_EXTENSION, + LZ4_EXTENSION, + TLZ4_EXTENSION, + NULL +}; int UTIL_fileExist(const char* filename); int UTIL_isRegularFile(const char* infilename); @@ -137,8 +148,8 @@ U32 UTIL_isDirectory(const char* infilename); int UTIL_getFileStat(const char* infilename, stat_t* statbuf); int UTIL_isSameFile(const char* file1, const char* file2); int UTIL_compareStr(const void *p1, const void *p2); -int UTIL_isPrecompressedFile(const char* infilename); -int compareExtensions(const char* infilename, const char extensionList[4][10]); +int UTIL_isCompressedFile(const char* infilename); +int compareExtensions(const char* infilename, const char *extensionList[]); U32 UTIL_isFIFO(const char* infilename); U32 UTIL_isLink(const char* infilename); diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 11116afc..a704a1ab 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -118,7 +118,6 @@ static int usage(const char* programName) #endif DISPLAY( " -D file: use `file` as Dictionary \n"); DISPLAY( " -o file: result stored into `file` (only if 1 input file) \n"); - DISPLAY( "--exclude-compressed: only compress files that are not previously compressed \n"); DISPLAY( " -f : overwrite output without prompting and (de)compress links \n"); DISPLAY( "--rm : remove source file(s) after successful de/compression \n"); DISPLAY( " -k : preserve source file(s) (default) \n"); @@ -137,6 +136,7 @@ static int usage_advanced(const char* programName) DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n"); DISPLAY( " -c : force write to standard output, even if it is the console\n"); DISPLAY( " -l : print information about zstd compressed files \n"); + DISPLAY( "--exclude-compressed: only compress files that are not previously compressed \n"); #ifndef ZSTD_NOCOMPRESS DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); diff --git a/tests/playTests.sh b/tests/playTests.sh index 9294bf81..ca286071 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -232,7 +232,6 @@ else println "Test is not successful" fi println "Test completed" -sleep 5 println "test : file removal" $ZSTD -f --rm tmp From 0e9a37daeb4a9ac0153ccf7cc144732fe5ee42bc Mon Sep 17 00:00:00 2001 From: Shashank Tavildar Date: Mon, 28 Oct 2019 15:22:26 -0700 Subject: [PATCH 04/11] Fixing tests and moving flag to advanced usage --- programs/zstdcli.c | 1 - tests/playTests.sh | 1 - 2 files changed, 2 deletions(-) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index a5a3f30d..a704a1ab 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -118,7 +118,6 @@ static int usage(const char* programName) #endif DISPLAY( " -D file: use `file` as Dictionary \n"); DISPLAY( " -o file: result stored into `file` (only if 1 input file) \n"); - DISPLAY( "--exclude-compressed: only compress files that are not previously compressed \n"); DISPLAY( " -f : overwrite output without prompting and (de)compress links \n"); DISPLAY( "--rm : remove source file(s) after successful de/compression \n"); DISPLAY( " -k : preserve source file(s) (default) \n"); diff --git a/tests/playTests.sh b/tests/playTests.sh index 9294bf81..ca286071 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -232,7 +232,6 @@ else println "Test is not successful" fi println "Test completed" -sleep 5 println "test : file removal" $ZSTD -f --rm tmp From 0f2bff2faf0cb60c4fe5346f56a5b42558a8e1a9 Mon Sep 17 00:00:00 2001 From: Shashank Tavildar Date: Mon, 28 Oct 2019 18:21:47 -0700 Subject: [PATCH 05/11] Addressing comments, removing cyclic dependency with header file, updating tests --- programs/fileio.c | 36 ++++++++++++++++++++++++++---------- programs/fileio.h | 1 + programs/util.c | 14 ++++---------- programs/util.h | 18 +----------------- programs/zstdcli.c | 2 +- tests/playTests.sh | 3 +++ 6 files changed, 36 insertions(+), 38 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 7cbf0280..e59fb80f 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -319,6 +319,8 @@ struct FIO_prefs_s { /* Computation resources preferences */ unsigned memLimit; int nbWorkers; + + int excludeCompressedFiles; }; @@ -359,6 +361,7 @@ FIO_prefs_t* FIO_createPreferences(void) ret->srcSizeHint = 0; ret->testMode = 0; ret->literalCompressionMode = ZSTD_lcm_auto; + ret->excludeCompressedFiles = 0; return ret; } @@ -402,6 +405,8 @@ void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) { prefs->nbWorkers = nbWorkers; } +void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; } + void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) { if (blockSize && prefs->nbWorkers==0) DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n"); @@ -1425,6 +1430,18 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs, return result; } +static const char *compressedFileExtensions[] = { + ZSTD_EXTENSION, + TZSTD_EXTENSION, + GZ_EXTENSION, + TGZ_EXTENSION, + LZMA_EXTENSION, + XZ_EXTENSION, + TXZ_EXTENSION, + LZ4_EXTENSION, + TLZ4_EXTENSION, + NULL +}; /*! FIO_compressFilename_srcFile() : * @return : 0 : compression completed correctly, @@ -1451,19 +1468,18 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs, return 1; } + /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used + * YES => ZSTD will skip compression of the file and will return 0. + * NO => ZSTD will resume with compress operation. + */ + if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) { + DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName); + return 0; + } + ress.srcFile = FIO_openSrcFile(srcFileName); if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */ - /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used - * YES => ZSTD will not compress the file. - * NO => ZSTD will resume with compress operation. - */ - if (g_excludeCompressedFiles && UTIL_isCompressedFile(srcFileName)) { /* precompressed file (--exclude-compressed). DO NOT COMPRESS */ - DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName); - fclose(ress.srcFile); - ress.srcFile = NULL; - return 0; - } result = FIO_compressFilename_dstFile(prefs, ress, dstFileName, srcFileName, compressionLevel); fclose(ress.srcFile); diff --git a/programs/fileio.h b/programs/fileio.h index af2c5d9d..a7da089f 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -93,6 +93,7 @@ void FIO_setLiteralCompressionMode( void FIO_setNoProgress(unsigned noProgress); void FIO_setNotificationLevel(int level); +void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles); /*-************************************* * Single File functions diff --git a/programs/util.c b/programs/util.c index f8207955..7212f7b8 100644 --- a/programs/util.c +++ b/programs/util.c @@ -331,24 +331,18 @@ YES => Skip the file (return 0) NO => return 1 */ -int UTIL_isCompressedFile(const char *inputName) +int UTIL_isCompressedFile(const char *inputName, const char *extensionList[]) { - return compareExtensions(inputName,g_compressedFileExtensions); -} - -int compareExtensions(const char* infilename, const char* extensionList[]) -{ - int i=0; - while(*extensionList != NULL) + while(*extensionList!=NULL) { - const char* ext = strstr(infilename,extensionList[i]); + const char* ext = strstr(inputName,*extensionList); if(ext) return 1; ++extensionList; - i++; } return 0; } + /* * UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories, * and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb). diff --git a/programs/util.h b/programs/util.h index deb70786..deaea032 100644 --- a/programs/util.h +++ b/programs/util.h @@ -39,7 +39,6 @@ extern "C" { #endif #include /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */ #include "mem.h" /* U32, U64 */ -#include "fileio.h" /*-************************************************************ * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW @@ -127,19 +126,6 @@ extern int g_utilDisplayLevel; typedef struct stat stat_t; #endif -int g_excludeCompressedFiles; -static const char *g_compressedFileExtensions[] = { - ZSTD_EXTENSION, - TZSTD_EXTENSION, - GZ_EXTENSION, - TGZ_EXTENSION, - LZMA_EXTENSION, - XZ_EXTENSION, - TXZ_EXTENSION, - LZ4_EXTENSION, - TLZ4_EXTENSION, - NULL -}; int UTIL_fileExist(const char* filename); int UTIL_isRegularFile(const char* infilename); @@ -148,9 +134,7 @@ U32 UTIL_isDirectory(const char* infilename); int UTIL_getFileStat(const char* infilename, stat_t* statbuf); int UTIL_isSameFile(const char* file1, const char* file2); int UTIL_compareStr(const void *p1, const void *p2); -int UTIL_isCompressedFile(const char* infilename); -int compareExtensions(const char* infilename, const char *extensionList[]); - +int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]); U32 UTIL_isFIFO(const char* infilename); U32 UTIL_isLink(const char* infilename); #define UTIL_FILESIZE_UNKNOWN ((U64)(-1)) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index a704a1ab..5463c019 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -709,7 +709,7 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_lcm_huffman; continue; } if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; } if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; } - if (!strcmp(argument, "--exclude-compressed")) { g_excludeCompressedFiles = 1; continue; } + if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; } /* long commands with arguments */ #ifndef ZSTD_NODICT if (longCommandWArg(&argument, "--train-cover")) { diff --git a/tests/playTests.sh b/tests/playTests.sh index ca286071..cb703227 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -224,6 +224,8 @@ sleep 5 ./datagen $size > precompressedFilterTestDir/input.7 ./datagen $size > precompressedFilterTestDir/input.8 $ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir +test ! -f input.5.zst.zst +test ! -f input.6.zst.zst file1timestamp=`date -r precompressedFilterTestDir/input.5.zst +%s` file2timestamp=`date -r precompressedFilterTestDir/input.7.zst +%s` if [[ $file2timestamp -ge $file1timestamp ]]; then @@ -232,6 +234,7 @@ else println "Test is not successful" fi println "Test completed" +sleep 5 println "test : file removal" $ZSTD -f --rm tmp From 850ba66139001f2caee0694efbc0e2e53bbbfc5f Mon Sep 17 00:00:00 2001 From: Shashank Tavildar Date: Mon, 28 Oct 2019 22:24:01 -0700 Subject: [PATCH 06/11] Minor fixes in test --- tests/playTests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/playTests.sh b/tests/playTests.sh index cb703227..23931a4a 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -216,7 +216,7 @@ $ZSTD -b --fast=1 -i0e1 tmp --compress-literals $ZSTD -b --fast=1 -i0e1 tmp --no-compress-literals println "test: --exclude-compressed flag" -mkdir precompressedFilterTestDir +mkdir -p precompressedFilterTestDir ./datagen $size > precompressedFilterTestDir/input.5 ./datagen $size > precompressedFilterTestDir/input.6 $ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir From 5e6dbad6c1c038d34c2be67edc500a012e9efb1a Mon Sep 17 00:00:00 2001 From: Shashank Tavildar Date: Tue, 29 Oct 2019 09:54:54 -0700 Subject: [PATCH 07/11] Deleting test directory before creation to pass travis-ci test --- tests/playTests.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/playTests.sh b/tests/playTests.sh index 23931a4a..d36a9d91 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -216,6 +216,7 @@ $ZSTD -b --fast=1 -i0e1 tmp --compress-literals $ZSTD -b --fast=1 -i0e1 tmp --no-compress-literals println "test: --exclude-compressed flag" +rm -rf precompressedFilterTestDir mkdir -p precompressedFilterTestDir ./datagen $size > precompressedFilterTestDir/input.5 ./datagen $size > precompressedFilterTestDir/input.6 From 9ab6a747d40144c804d8a23000c58389931423ff Mon Sep 17 00:00:00 2001 From: Shashank Tavildar Date: Tue, 29 Oct 2019 12:27:54 -0700 Subject: [PATCH 08/11] Created utility function to extract extension from filename, fixed tests --- programs/fileio.c | 3 +++ programs/util.c | 19 ++++++++++++++----- programs/util.h | 2 ++ tests/playTests.sh | 3 +-- 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index e59fb80f..c0d6494e 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1430,6 +1430,9 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs, return result; } +/* List used to compare file extensions (used with --exclude-compressed flag) +* Different from the suffixList and should only apply to ZSTD compress operationResult +*/ static const char *compressedFileExtensions[] = { ZSTD_EXTENSION, TZSTD_EXTENSION, diff --git a/programs/util.c b/programs/util.c index 7212f7b8..f6933bc8 100644 --- a/programs/util.c +++ b/programs/util.c @@ -326,23 +326,32 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char #endif /* #ifdef _WIN32 */ -/* Check if the file is precompressed (.zst, .lz4, .gz, .xz). -YES => Skip the file (return 0) -NO => return 1 +/* Check if the file is Compressed by comparing it with compressFileExtension list. +YES => Skip the file (return 1) +NO => return 0 */ int UTIL_isCompressedFile(const char *inputName, const char *extensionList[]) { + const char* ext = UTIL_getFileExtension(inputName); while(*extensionList!=NULL) { - const char* ext = strstr(inputName,*extensionList); - if(ext) + const char* isCompressedExtension = strstr(ext,*extensionList); + if(isCompressedExtension) return 1; ++extensionList; } return 0; } +/*Utility function to get file extension from file */ +const char* UTIL_getFileExtension(const char* infilename) +{ + const char* extension = strrchr(infilename, '.'); + if(!extension || extension==infilename) return ""; + return extension; +} + /* * UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories, * and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb). diff --git a/programs/util.h b/programs/util.h index deaea032..0d0642cb 100644 --- a/programs/util.h +++ b/programs/util.h @@ -135,6 +135,8 @@ int UTIL_getFileStat(const char* infilename, stat_t* statbuf); int UTIL_isSameFile(const char* file1, const char* file2); int UTIL_compareStr(const void *p1, const void *p2); int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]); +const char* UTIL_getFileExtension(const char* infilename); + U32 UTIL_isFIFO(const char* infilename); U32 UTIL_isLink(const char* infilename); #define UTIL_FILESIZE_UNKNOWN ((U64)(-1)) diff --git a/tests/playTests.sh b/tests/playTests.sh index d36a9d91..6161983e 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -230,12 +230,11 @@ test ! -f input.6.zst.zst file1timestamp=`date -r precompressedFilterTestDir/input.5.zst +%s` file2timestamp=`date -r precompressedFilterTestDir/input.7.zst +%s` if [[ $file2timestamp -ge $file1timestamp ]]; then - println "Test is successful. input.5.zst is not precompressed and therefore not compressed/modified again." + println "Test is successful. input.5.zst is precompressed and therefore not compressed/modified again." else println "Test is not successful" fi println "Test completed" -sleep 5 println "test : file removal" $ZSTD -f --rm tmp From c5060997e9c8ccc57e2fcc3c8bc0246cb3b93749 Mon Sep 17 00:00:00 2001 From: Shashank Tavildar Date: Tue, 29 Oct 2019 12:56:04 -0700 Subject: [PATCH 09/11] Added check to perform comparison only if extension is present --- programs/util.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/programs/util.c b/programs/util.c index f6933bc8..c93382c6 100644 --- a/programs/util.c +++ b/programs/util.c @@ -334,13 +334,16 @@ NO => return 0 int UTIL_isCompressedFile(const char *inputName, const char *extensionList[]) { const char* ext = UTIL_getFileExtension(inputName); - while(*extensionList!=NULL) - { - const char* isCompressedExtension = strstr(ext,*extensionList); - if(isCompressedExtension) - return 1; - ++extensionList; - } + if(strcmp(ext,"")) + { + while(*extensionList!=NULL) + { + const char* isCompressedExtension = strstr(ext,*extensionList); + if(isCompressedExtension) + return 1; + ++extensionList; + } + } return 0; } From 6d0b7bd6ce797c27f6c977f645f946c97dda4d1a Mon Sep 17 00:00:00 2001 From: Shashank Tavildar Date: Tue, 29 Oct 2019 13:36:09 -0700 Subject: [PATCH 10/11] Changed extension comparision logic, added new test cases --- programs/util.c | 9 ++------- tests/playTests.sh | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/programs/util.c b/programs/util.c index c93382c6..9afb5a86 100644 --- a/programs/util.c +++ b/programs/util.c @@ -326,11 +326,6 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char #endif /* #ifdef _WIN32 */ -/* Check if the file is Compressed by comparing it with compressFileExtension list. -YES => Skip the file (return 1) -NO => return 0 -*/ - int UTIL_isCompressedFile(const char *inputName, const char *extensionList[]) { const char* ext = UTIL_getFileExtension(inputName); @@ -338,8 +333,8 @@ int UTIL_isCompressedFile(const char *inputName, const char *extensionList[]) { while(*extensionList!=NULL) { - const char* isCompressedExtension = strstr(ext,*extensionList); - if(isCompressedExtension) + const int isCompressedExtension = strcmp(ext,*extensionList); + if(isCompressedExtension==0) return 1; ++extensionList; } diff --git a/tests/playTests.sh b/tests/playTests.sh index 6161983e..4672ecaa 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -225,8 +225,8 @@ sleep 5 ./datagen $size > precompressedFilterTestDir/input.7 ./datagen $size > precompressedFilterTestDir/input.8 $ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir -test ! -f input.5.zst.zst -test ! -f input.6.zst.zst +test ! -f precompressedFilterTestDir/input.5.zst.zst +test ! -f precompressedFilterTestDir/input.6.zst.zst file1timestamp=`date -r precompressedFilterTestDir/input.5.zst +%s` file2timestamp=`date -r precompressedFilterTestDir/input.7.zst +%s` if [[ $file2timestamp -ge $file1timestamp ]]; then @@ -234,6 +234,16 @@ if [[ $file2timestamp -ge $file1timestamp ]]; then else println "Test is not successful" fi +#File Extension check. +./datagen $size > precompressedFilterTestDir/input.zstbar +$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir +#ZSTD should compress input.zstbar +test -f precompressedFilterTestDir/input.zstbar.zst +#Check without the --exclude-compressed flag +$ZSTD --long --rm -r precompressedFilterTestDir +#Files should get compressed again without the --exclude-compressed flag. +test -f precompressedFilterTestDir/input.5.zst.zst +test -f precompressedFilterTestDir/input.6.zst.zst println "Test completed" println "test : file removal" From 3c1649f1395b93a55f66cb307989c556cc5418f8 Mon Sep 17 00:00:00 2001 From: Shashank Tavildar Date: Tue, 29 Oct 2019 15:59:20 -0700 Subject: [PATCH 11/11] Removed the optimization check --- programs/util.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/programs/util.c b/programs/util.c index 9afb5a86..2143d178 100644 --- a/programs/util.c +++ b/programs/util.c @@ -329,15 +329,12 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char int UTIL_isCompressedFile(const char *inputName, const char *extensionList[]) { const char* ext = UTIL_getFileExtension(inputName); - if(strcmp(ext,"")) + while(*extensionList!=NULL) { - while(*extensionList!=NULL) - { - const int isCompressedExtension = strcmp(ext,*extensionList); - if(isCompressedExtension==0) - return 1; - ++extensionList; - } + const int isCompressedExtension = strcmp(ext,*extensionList); + if(isCompressedExtension==0) + return 1; + ++extensionList; } return 0; }