diff --git a/programs/fileio.c b/programs/fileio.c index 5e9d622f..b8183d89 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -624,7 +624,6 @@ FIO_openDstFile(FIO_prefs_t* const prefs, } } - /*! FIO_createDictBuffer() : * creates a buffer, pointed by `*bufferPtr`, * loads `filename` content into it, up to DICTSIZE_MAX bytes. @@ -669,15 +668,9 @@ static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_p * Checks for and warns if there are any files that would have the same output path */ int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) { - const char **filenameTableSorted, *c, *prevElem, *filename; + const char **filenameTableSorted, *prevElem, *filename; unsigned u; - #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */ - c = "\\"; - #else - c = "/"; - #endif - filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles); if (!filenameTableSorted) { DISPLAY("Unable to malloc new str array, not checking for name collisions\n"); @@ -685,7 +678,7 @@ int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) { } for (u = 0; u < nbFiles; ++u) { - filename = strrchr(filenameTable[u], c[0]); + filename = strrchr(filenameTable[u], PATH_SEP); if (filename == NULL) { filenameTableSorted[u] = filenameTable[u]; } else { @@ -1599,6 +1592,7 @@ int FIO_compressFilename(FIO_prefs_t* const prefs, const char* dstFileName, cRess_t const ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams); int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel); +#define DISPLAY_LEVEL_DEFAULT 2 FIO_freeCResources(ress); return result; @@ -1663,6 +1657,7 @@ static unsigned long long FIO_getLargestFileSize(const char** inFileNames, unsig */ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, const char** inFileNamesTable, unsigned nbFiles, + const char* outMirroredRootDirName, const char* outDirName, const char* outFileName, const char* suffix, const char* dictFileName, int compressionLevel, @@ -1689,12 +1684,30 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, ress.dstFile = NULL; } } else { - unsigned u; + unsigned int u=0; + if (outMirroredRootDirName) + UTIL_mirrorSourceFilesDirectories(inFileNamesTable, nbFiles, outMirroredRootDirName); + for (u=0; u /* strerror, memcpy */ #endif /* #ifdef _WIN32 */ - /*-**************************************** * Internal Macros ******************************************/ @@ -130,6 +129,18 @@ int UTIL_getFileStat(const char* infilename, stat_t *statbuf) return 1; } +int UTIL_getDirectoryStat(const char* infilename, stat_t *statbuf) +{ +#if defined(_MSC_VER) + int const r = _stat64(infilename, statbuf); + if (!r && (statbuf->st_mode & _S_IFDIR)) return 1; +#else + int const r = stat(infilename, statbuf); + if (!r && S_ISDIR(statbuf->st_mode)) return 1; +#endif + return 0; +} + /* like chmod, but avoid changing permission of /dev/null */ int UTIL_chmod(char const* filename, mode_t permissions) { @@ -178,14 +189,7 @@ int UTIL_setFileStat(const char *filename, stat_t *statbuf) int UTIL_isDirectory(const char* infilename) { stat_t statbuf; -#if defined(_MSC_VER) - int const r = _stat64(infilename, &statbuf); - if (!r && (statbuf.st_mode & _S_IFDIR)) return 1; -#else - int const r = stat(infilename, &statbuf); - if (!r && S_ISDIR(statbuf.st_mode)) return 1; -#endif - return 0; + return UTIL_getDirectoryStat(infilename, &statbuf); } int UTIL_compareStr(const void *p1, const void *p2) { @@ -633,6 +637,287 @@ const char* UTIL_getFileExtension(const char* infilename) return extension; } +static int pathnameHas2Dots(const char *pathname) +{ + return NULL != strstr(pathname, ".."); +} + +static int isFileNameValidForMirroredOutput(const char *filename) +{ + return !pathnameHas2Dots(filename); +} + + +#define DIR_DEFAULT_MODE 0755 +static mode_t getDirMode(const char *dirName) +{ + stat_t st; + int ret = UTIL_getDirectoryStat(dirName, &st); + if (!ret) { + UTIL_DISPLAY("zstd: failed to get DIR stats %s: %s\n", dirName, strerror(errno)); + return DIR_DEFAULT_MODE; + } + return st.st_mode; +} + +static int makeDir(const char *dir, mode_t mode) +{ +#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) + int ret = _mkdir(dir); + (void) mode; +#else + int ret = mkdir(dir, mode); +#endif + if (ret != 0) { + if (errno == EEXIST) + return 0; + UTIL_DISPLAY("zstd: failed to create DIR %s: %s\n", dir, strerror(errno)); + } + return ret; +} + +/* this function requires a mutable input string */ +static void convertPathnameToDirName(char *pathname) +{ + size_t len = 0; + char* pos = NULL; + /* get dir name from pathname similar to 'dirname()' */ + assert(pathname != NULL); + + /* remove trailing '/' chars */ + len = strlen(pathname); + assert(len > 0); + while (pathname[len] == PATH_SEP) { + pathname[len] = '\0'; + len--; + } + if (len == 0) return; + + /* if input is a single file, return '.' instead. i.e. + * "xyz/abc/file.txt" => "xyz/abc" + "./file.txt" => "." + "file.txt" => "." + */ + pos = strrchr(pathname, PATH_SEP); + if (pos == NULL) { + pathname[0] = '.'; + pathname[1] = '\0'; + } else { + *pos = '\0'; + } +} + +/* pathname must be valid */ +static const char* trimLeadingRootChar(const char *pathname) +{ + assert(pathname != NULL); + if (pathname[0] == PATH_SEP) + return pathname + 1; + return pathname; +} + +/* pathname must be valid */ +static const char* trimLeadingCurrentDirConst(const char *pathname) +{ + assert(pathname != NULL); + if ((pathname[0] == '.') && (pathname[1] == PATH_SEP)) + return pathname + 2; + return pathname; +} + +static char* +trimLeadingCurrentDir(char *pathname) +{ + /* 'union charunion' can do const-cast without compiler warning */ + union charunion { + char *chr; + const char* cchr; + } ptr; + ptr.cchr = trimLeadingCurrentDirConst(pathname); + return ptr.chr; +} + +/* remove leading './' or '/' chars here */ +static const char * trimPath(const char *pathname) +{ + return trimLeadingRootChar( + trimLeadingCurrentDirConst(pathname)); +} + +static char* mallocAndJoin2Dir(const char *dir1, const char *dir2) +{ + const size_t dir1Size = strlen(dir1); + const size_t dir2Size = strlen(dir2); + char *outDirBuffer, *buffer, trailingChar; + + assert(dir1 != NULL && dir2 != NULL); + outDirBuffer = (char *) malloc(dir1Size + dir2Size + 2); + CONTROL(outDirBuffer != NULL); + + strncpy(outDirBuffer, dir1, dir1Size); + outDirBuffer[dir1Size] = '\0'; + + if (dir2[0] == '.') + return outDirBuffer; + + buffer = outDirBuffer + dir1Size; + trailingChar = *(buffer - 1); + if (trailingChar != PATH_SEP) { + *buffer = PATH_SEP; + buffer++; + } + strncpy(buffer, dir2, dir2Size); + buffer[dir2Size] = '\0'; + + return outDirBuffer; +} + +/* this function will return NULL if input srcFileName is not valid name for mirrored output path */ +char* UTIL_createMirroredDestDirName(const char* srcFileName, const char* outDirRootName) +{ + char* pathname = NULL; + if (!isFileNameValidForMirroredOutput(srcFileName)) + return NULL; + + pathname = mallocAndJoin2Dir(outDirRootName, trimPath(srcFileName)); + + convertPathnameToDirName(pathname); + return pathname; +} + +static int +mirrorSrcDir(char* srcDirName, const char* outDirName) +{ + mode_t srcMode; + int status = 0; + char* newDir = mallocAndJoin2Dir(outDirName, trimPath(srcDirName)); + if (!newDir) + return -ENOMEM; + + srcMode = getDirMode(srcDirName); + status = makeDir(newDir, srcMode); + free(newDir); + return status; +} + +static int +mirrorSrcDirRecursive(char* srcDirName, const char* outDirName) +{ + int status = 0; + char* pp = trimLeadingCurrentDir(srcDirName); + char* sp = NULL; + + while ((sp = strchr(pp, PATH_SEP)) != NULL) { + if (sp != pp) { + *sp = '\0'; + status = mirrorSrcDir(srcDirName, outDirName); + if (status != 0) + return status; + *sp = PATH_SEP; + } + pp = sp + 1; + } + status = mirrorSrcDir(srcDirName, outDirName); + return status; +} + +static void +makeMirroredDestDirsWithSameSrcDirMode(char** srcDirNames, unsigned nbFile, const char* outDirName) +{ + unsigned int i = 0; + for (i = 0; i < nbFile; i++) + mirrorSrcDirRecursive(srcDirNames[i], outDirName); +} + +static int +firstIsParentOrSameDirOfSecond(const char* firstDir, const char* secondDir) +{ + size_t firstDirLen = strlen(firstDir), + secondDirLen = strlen(secondDir); + return firstDirLen <= secondDirLen && + (secondDir[firstDirLen] == PATH_SEP || secondDir[firstDirLen] == '\0') && + 0 == strncmp(firstDir, secondDir, firstDirLen); +} + +static int compareDir(const void* pathname1, const void* pathname2) { + /* sort it after remove the leading '/' or './'*/ + const char* s1 = trimPath(*(char * const *) pathname1); + const char* s2 = trimPath(*(char * const *) pathname2); + return strcmp(s1, s2); +} + +static void +makeUniqueMirroredDestDirs(char** srcDirNames, unsigned nbFile, const char* outDirName) +{ + unsigned int i = 0, uniqueDirNr = 0; + char** uniqueDirNames = NULL; + + if (nbFile == 0) + return; + + uniqueDirNames = (char** ) malloc(nbFile * sizeof (char *)); + CONTROL(uniqueDirNames != NULL); + + /* if dirs is "a/b/c" and "a/b/c/d", we only need call: + * we just need "a/b/c/d" */ + qsort((void *)srcDirNames, nbFile, sizeof(char*), compareDir); + + uniqueDirNr = 1; + uniqueDirNames[uniqueDirNr - 1] = srcDirNames[0]; + for (i = 1; i < nbFile; i++) { + char* prevDirName = srcDirNames[i - 1]; + char* currDirName = srcDirNames[i]; + + /* note: we alwasy compare trimmed path, i.e.: + * src dir of "./foo" and "/foo" will be both saved into: + * "outDirName/foo/" */ + if (!firstIsParentOrSameDirOfSecond(trimPath(prevDirName), + trimPath(currDirName))) + uniqueDirNr++; + + /* we need maintain original src dir name instead of trimmed + * dir, so we can retrive the original src dir's mode_t */ + uniqueDirNames[uniqueDirNr - 1] = currDirName; + } + + makeMirroredDestDirsWithSameSrcDirMode(uniqueDirNames, uniqueDirNr, outDirName); + + free(uniqueDirNames); +} + +static void +makeMirroredDestDirs(char** srcFileNames, unsigned nbFile, const char* outDirName) +{ + unsigned int i = 0; + for (i = 0; i < nbFile; ++i) + convertPathnameToDirName(srcFileNames[i]); + makeUniqueMirroredDestDirs(srcFileNames, nbFile, outDirName); +} + +void UTIL_mirrorSourceFilesDirectories(const char** inFileNames, unsigned int nbFile, const char* outDirName) +{ + unsigned int i = 0, validFilenamesNr = 0; + char** srcFileNames = (char **) malloc(nbFile * sizeof (char *)); + CONTROL(srcFileNames != NULL); + + /* check input filenames is valid */ + for (i = 0; i < nbFile; ++i) { + if (isFileNameValidForMirroredOutput(inFileNames[i])) { + char* fname = STRDUP(inFileNames[i]); + CONTROL(fname != NULL); + srcFileNames[validFilenamesNr++] = fname; + } + } + + if (validFilenamesNr > 0) { + makeDir(outDirName, DIR_DEFAULT_MODE); + makeMirroredDestDirs(srcFileNames, validFilenamesNr, outDirName); + } + + for (i = 0; i < validFilenamesNr; i++) + free(srcFileNames[i]); + free(srcFileNames); +} FileNamesTable* UTIL_createExpandedFNT(const char** inputNames, size_t nbIfns, int followLinks) diff --git a/programs/util.h b/programs/util.h index 8e187e4f..580266e7 100644 --- a/programs/util.h +++ b/programs/util.h @@ -104,6 +104,14 @@ extern int g_utilDisplayLevel; typedef struct stat stat_t; #endif +#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */ +#define PATH_SEP '\\' +#define STRDUP(s) _strdup(s) +#else +#define PATH_SEP '/' +#include +#define STRDUP(s) strdup(s) +#endif int UTIL_fileExist(const char* filename); int UTIL_isRegularFile(const char* infilename); @@ -118,9 +126,13 @@ U64 UTIL_getFileSize(const char* infilename); U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles); int UTIL_getFileStat(const char* infilename, stat_t* statbuf); int UTIL_setFileStat(const char* filename, stat_t* statbuf); +int UTIL_getDirectoryStat(const char* infilename, stat_t* statbuf); int UTIL_chmod(char const* filename, mode_t permissions); /*< like chmod, but avoid changing permission of /dev/null */ int UTIL_compareStr(const void *p1, const void *p2); const char* UTIL_getFileExtension(const char* infilename); +void UTIL_mirrorSourceFilesDirectories(const char** fileNamesTable, unsigned int nbFiles, const char *outDirName); +char* UTIL_createMirroredDestDirName(const char* srcFileName, const char* outDirRootName); + /*-**************************************** @@ -207,6 +219,7 @@ void UTIL_refFilename(FileNamesTable* fnt, const char* filename); # define UTIL_HAS_CREATEFILELIST #elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */ # define UTIL_HAS_CREATEFILELIST +# define UTIL_HAS_MIRRORFILELIST #else /* do not define UTIL_HAS_CREATEFILELIST */ #endif diff --git a/programs/zstd.1.md b/programs/zstd.1.md index 176a856c..377e2faf 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -227,6 +227,15 @@ the last one takes effect. if multiple files, from different directories, end up having the same name. Collision resolution ensures first file with a given name will be present in `dir`, while in combination with `-f`, the last file will be present instead. +* `--output-dir-mirror[=dir]`: + similar to `--output-dir-flat`, the output files are stored underneath target + `dir` directory, but this option will preserve input directories structure in output `dir`. + + If the input directory has "..", the files in this directory will be ignored. If + the input directory is absolute directory (i.e. "/var/tmp/abc"), it will be + stored into the "output-dir/var/tmp/abc". + If there is multiple input files or directories, the collision resolution will be same as + `--output-dir-flat`. * `--format=FORMAT`: compress and decompress in other formats. If compiled with support, zstd can compress to or decompress from other compression algorithm diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 6b795bb6..06a8a446 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -150,6 +150,10 @@ static void usage_advanced(const char* programName) DISPLAYOUT( "--output-dir-flat=DIR : all resulting files are stored into DIR \n"); #endif +#ifdef UTIL_HAS_MIRRORFILELIST + DISPLAYOUT( "--output-dir-mirror==DIR : all resulting files are stored into DIR in the original directory structure \n"); +#endif + DISPLAYOUT( "-- : All arguments after \"--\" are treated as files \n"); #ifndef ZSTD_NOCOMPRESS @@ -645,6 +649,7 @@ int main(int const argCount, const char* argv[]) rsyncable = 0, nextArgumentIsOutFileName = 0, nextArgumentIsOutDirName = 0, + nextArgumentIsMirroredOutDirName = 0, nextArgumentIsMaxDict = 0, nextArgumentIsDictID = 0, nextArgumentsAreFiles = 0, @@ -672,6 +677,7 @@ int main(int const argCount, const char* argv[]) const char* programName = argv[0]; const char* outFileName = NULL; const char* outDirName = NULL; + const char* outMirroredDirName = NULL; const char* dictFileName = NULL; const char* patchFromDictFileName = NULL; const char* suffix = ZSTD_EXTENSION; @@ -768,6 +774,9 @@ int main(int const argCount, const char* argv[]) if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; } if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; } if (!strcmp(argument, "--output-dir-flat")) {nextArgumentIsOutDirName=1; lastCommand=1; continue; } +#ifdef UTIL_HAS_MIRRORFILELIST + if (!strcmp(argument, "--output-dir-mirror")) {nextArgumentIsMirroredOutDirName=1; lastCommand=1; continue; } +#endif if (!strcmp(argument, "--show-default-cparams")) { showDefaultCParams = 1; continue; } if (!strcmp(argument, "--content-size")) { contentSize = 1; continue; } if (!strcmp(argument, "--no-content-size")) { contentSize = 0; continue; } @@ -838,6 +847,7 @@ int main(int const argCount, const char* argv[]) if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readSizeTFromChar(&argument); continue; } if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readSizeTFromChar(&argument); continue; } if (longCommandWArg(&argument, "--output-dir-flat=")) { outDirName = argument; continue; } + if (longCommandWArg(&argument, "--output-dir-mirror=")) { outMirroredDirName = argument; continue; } if (longCommandWArg(&argument, "--patch-from=")) { patchFromDictFileName = argument; continue; } if (longCommandWArg(&argument, "--long")) { unsigned ldmWindowLog = 0; @@ -1066,6 +1076,13 @@ int main(int const argCount, const char* argv[]) continue; } + if (nextArgumentIsMirroredOutDirName) { + nextArgumentIsMirroredOutDirName = 0; + lastCommand = 0; + outMirroredDirName = argument; + continue; + } + /* none of the above : add filename to list */ UTIL_refFilename(filenames, argument); } @@ -1330,7 +1347,7 @@ int main(int const argCount, const char* argv[]) if ((filenames->tableSize==1) && outFileName) operationResult = FIO_compressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName, cLevel, compressionParams); else - operationResult = FIO_compressMultipleFilenames(prefs, filenames->fileNames, (unsigned)filenames->tableSize, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams); + operationResult = FIO_compressMultipleFilenames(prefs, filenames->fileNames, (unsigned)filenames->tableSize, outMirroredDirName, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams); #else (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; /* not used when ZSTD_NOCOMPRESS set */ DISPLAY("Compression not supported \n"); @@ -1340,7 +1357,7 @@ int main(int const argCount, const char* argv[]) if (filenames->tableSize == 1 && outFileName) { operationResult = FIO_decompressFilename(prefs, outFileName, filenames->fileNames[0], dictFileName); } else { - operationResult = FIO_decompressMultipleFilenames(prefs, filenames->fileNames, (unsigned)filenames->tableSize, outDirName, outFileName, dictFileName); + operationResult = FIO_decompressMultipleFilenames(prefs, filenames->fileNames, (unsigned)filenames->tableSize, outMirroredDirName, outDirName, outFileName, dictFileName); } #else DISPLAY("Decompression not supported \n"); diff --git a/tests/playTests.sh b/tests/playTests.sh index c3168d53..35978875 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -445,6 +445,31 @@ test -f tmpOutDirDecomp/tmp2 test -f tmpOutDirDecomp/tmp1 rm -rf tmp* +if [ "$isWindows" = false ] ; then + println "\n===> compress multiple files into an output directory and mirror input folder, --output-dir-mirror" + println "test --output-dir-mirror" > tmp1 + mkdir -p tmpInputTestDir/we/must/go/deeper + println cool > tmpInputTestDir/we/must/go/deeper/tmp2 + zstd tmp1 -r tmpInputTestDir --output-dir-mirror tmpOutDir + test -f tmpOutDir/tmp1.zst + test -f tmpOutDir/tmpInputTestDir/we/must/go/deeper/tmp2.zst + + println "test: compress input dir will be ignored if it has '..'" + zstd -r tmpInputTestDir/we/must/../must --output-dir-mirror non-exist && die "input cannot contain '..'" + test ! -d non-exist + + println "test : decompress multiple files into an output directory, --output-dir-mirror" + zstd tmpOutDir -r -d --output-dir-mirror tmpOutDirDecomp + test -f tmpOutDirDecomp/tmpOutDir/tmp1 + test -f tmpOutDirDecomp/tmpOutDir/tmpInputTestDir/we/must/go/deeper/tmp2 + + println "test: decompress input dir will be ignored if it has '..'" + zstd -r tmpOutDir/tmpInputTestDir/we/must/../must --output-dir-mirror non-exist && die "input cannot contain '..'" + test ! -d non-exist + + rm -rf tmp* +fi + println "test : compress multiple files reading them from a file, --filelist=FILE" println "Hello world!, file1" > tmp1