diff --git a/programs/util.c b/programs/util.c index 58705880..2abb1be3 100644 --- a/programs/util.c +++ b/programs/util.c @@ -187,6 +187,228 @@ U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbF return error ? UTIL_FILESIZE_UNKNOWN : total; } + +int UTIL_readLineFromFile(char* buf, size_t len, FILE* file) { + char* fgetsCheck = NULL; + + if (feof(file)) { + UTIL_DISPLAYLEVEL(1, "[ERROR] end of file reached and need to read\n"); + return -1; + } + + fgetsCheck = fgets(buf, (int) len, file); + + if(fgetsCheck == NULL || fgetsCheck != buf) { + UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_readLineFromFile] fgets has a problem check: %s buf: %s \n", + fgetsCheck == NULL ? "NULL" : fgetsCheck, buf); + return -1; + } + + return (int) strlen(buf)-1; /* -1 to ignore '\n' character */ +} + +/* Warning: inputFileSize should be less than or equal buf capacity and buf should be initialized*/ +static int readFromFile(char* buf, size_t inputFileSize, const char* inputFileName) { + + FILE* inputFile = fopen(inputFileName, "r"); + int nbFiles = -1; + unsigned pos = 0; + + + if(!buf) { + UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_readFileNamesTableFromFile] Can't create buffer.\n"); + return -1; + } + + if(!inputFile) { + UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_readFileNamesTableFromFile] Can't open file to read input file names.\n"); + return -1; + } + + for(nbFiles=0; !feof(inputFile) ; ) { + if(UTIL_readLineFromFile(buf+pos, inputFileSize, inputFile) > 0) { + int len = (int) strlen(buf+pos); + buf[pos+len-1] = '\0'; /* replace '\n' with '\0'*/ + pos += len; + ++nbFiles; + } + } + + fclose(inputFile); + + if(pos > inputFileSize) return -1; + + return nbFiles; +} + +/*Note: buf is not freed in case function successfully created table because filesTable->fileNames[0] = buf*/ +FileNamesTable* +UTIL_createFileNamesTable_fromFileName(const char* inputFileName) { + U64 inputFileSize = 0; + unsigned nbFiles = 0; + int ret_nbFiles = -1; + char* buf = NULL; + size_t i = 0, pos = 0; + + FileNamesTable* filesTable = NULL; + + if(!UTIL_fileExist(inputFileName) || !UTIL_isRegularFile(inputFileName)) + return NULL; + + inputFileSize = UTIL_getFileSize(inputFileName) + 1; /* (+1) to add '\0' at the end of last filename */ + + if(inputFileSize > MAX_FILE_OF_FILE_NAMES_SIZE) + return NULL; + + buf = (char*) malloc((size_t) inputFileSize * sizeof(char)); + if(!buf) { + UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_readFileNamesTableFromFile] Can't create buffer.\n"); + return NULL; + } + + ret_nbFiles = readFromFile(buf, (size_t) inputFileSize, inputFileName); + + if(ret_nbFiles <= 0) { + free(buf); + return NULL; + } + nbFiles = ret_nbFiles; + + filesTable = UTIL_createFileNamesTable(NULL, NULL, 0); + if(!filesTable) { + free(buf); + UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_readFileNamesTableFromFile] Can't create table for files.\n"); + return NULL; + } + + filesTable->tableSize = nbFiles; + filesTable->fileNames = (const char**) malloc((nbFiles+1) * sizeof(char*)); + + + + for(i = 0, pos = 0; i < nbFiles; ++i) { + filesTable->fileNames[i] = buf+pos; + pos += strlen(buf+pos)+1; + } + + + if(pos > inputFileSize){ + UTIL_freeFileNamesTable(filesTable); + if(buf) free(buf); + return NULL; + } + + filesTable->buf = buf; + + return filesTable; +} + +FileNamesTable* +UTIL_createFileNamesTable(const char** filenames, char* buf, size_t tableSize){ + FileNamesTable* table = (FileNamesTable*) malloc(sizeof(FileNamesTable)); + if(!table) { + return NULL; + } + table->fileNames = filenames; + table->buf = buf; + table->tableSize = tableSize; + return table; +} + +void UTIL_freeFileNamesTable(FileNamesTable* table) { + if(table) { + if(table->fileNames) { + free((void*)table->fileNames); + } + + if(table && table->buf) { + free(table->buf); + } + + free(table); + } +} + +static size_t getTotalTableSize(FileNamesTable* table) { + size_t i = 0, totalSize = 0; + for(i = 0 ; i < table->tableSize && table->fileNames[i] ; ++i) { + totalSize += strlen(table->fileNames[i]) + 1; /* +1 to add '\0' at the end of each fileName */ + } + + return totalSize; +} + +FileNamesTable* +UTIL_concatenateTwoTables(FileNamesTable* table1, FileNamesTable* table2) { + unsigned newTableIdx = 0, idx1 = 0, idx2 = 0; + size_t i = 0, pos = 0; + size_t newTotalTableSize = 0; + + FileNamesTable* newTable = NULL; + + char* buf = NULL; + + + newTable = UTIL_createFileNamesTable(NULL, NULL, 0); + + if(!newTable) { + UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_concatenateTwoTables] Can't create new table for concatenation output.\n"); + return NULL; + } + + newTotalTableSize = getTotalTableSize(table1) + getTotalTableSize(table2); + + buf = (char*) malloc(newTotalTableSize * sizeof(char)); + if(!buf) { + UTIL_freeFileNamesTable(newTable); + UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_concatenateTwoTables] Can't create buf for concatenation output.\n"); + return NULL; + } + + for(i = 0; i < newTotalTableSize ; ++i) buf[i] = '\0'; + + newTable->tableSize = table1->tableSize + table2->tableSize; + newTable->fileNames = (const char **) malloc(newTable->tableSize * sizeof(char*)); + + if(!newTable->fileNames) { + UTIL_freeFileNamesTable(newTable); + if(buf) free(buf); + UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_concatenateTwoTables] Can't create new table for concatenation output.\n"); + return NULL; + } + + for (i = 0; i < newTable->tableSize; ++i) + newTable->fileNames[i] = NULL; + + for( ; idx1 < table1->tableSize && table1->fileNames[idx1] && pos < newTotalTableSize; ++idx1, ++newTableIdx) { + size_t curLen = strlen(table1->fileNames[idx1]); + memcpy(buf+pos, table1->fileNames[idx1], curLen); + newTable->fileNames[newTableIdx] = buf+pos; + pos += curLen+1; + } + + + for( ; idx2 < table2->tableSize && table2->fileNames[idx2] && pos < newTotalTableSize ; ++idx2, ++newTableIdx) { + size_t curLen = strlen(table2->fileNames[idx2]); + memcpy(buf+pos, table2->fileNames[idx2], curLen); + newTable->fileNames[newTableIdx] = buf+pos; + pos += curLen+1; + } + + if(pos > newTotalTableSize) { + UTIL_freeFileNamesTable(newTable); + if(buf) free(buf); + return NULL; + } + + newTable->buf = buf; + + UTIL_freeFileNamesTable(table1); + UTIL_freeFileNamesTable(table2); + + return newTable; +} + #ifdef _WIN32 int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks) { diff --git a/programs/util.h b/programs/util.h index 71ba0d4f..c5f42324 100644 --- a/programs/util.h +++ b/programs/util.h @@ -90,7 +90,7 @@ extern "C" { * Constants ***************************************/ #define LIST_SIZE_INCREASE (8*1024) - +#define MAX_FILE_OF_FILE_NAMES_SIZE (1<<20)*50 /*-**************************************** * Compiler specifics @@ -142,6 +142,50 @@ U32 UTIL_isLink(const char* infilename); U64 UTIL_getFileSize(const char* infilename); U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbFiles); +/*! UTIL_readLineFromFile(char* buf, size_t len, File* file): + * @return : int. size next line in file or -1 in case of file ends + * function reads next line in the file + * Will also modify `*file`, advancing it to position where it stopped reading. + */ +int UTIL_readLineFromFile(char* buf, size_t len, FILE* file); + +/*Note: tableSize is denotes the total capacity of table*/ +typedef struct +{ + const char** fileNames; + char* buf; + size_t tableSize; +} FileNamesTable; + +/*! UTIL_readFileNamesTableFromFile(const char* inputFileName) : + * @return : char** the fileNamesTable or NULL in case of not regular file or file doesn't exist. + * reads fileNamesTable from input fileName. + * Note: inputFileSize should be less than or equal 50MB + */ +FileNamesTable* UTIL_createFileNamesTable_fromFileName(const char* inputFileName); + + +/*! UTIL_freeFileNamesTable(const char** filenames, char* buf, size_t tableSize) : + * This function takes an buffered based filename, buf and tableSize to create its object. + * @return : FileNamesTable* + */ + +FileNamesTable* +UTIL_createFileNamesTable(const char** filenames, char* buf, size_t tableSize); + + +/*! UTIL_freeFileNamesTable(FileNamesTable* table) : + * This function takes an buffered based table and frees it. + * @return : void. + */ +void UTIL_freeFileNamesTable(FileNamesTable* table); + +/*! UTIL_concatenateTwoTables(FileNamesTable* table1,FileNamesTable* table2): + * takes table1, its maxSize, table2 and its maxSize, free them and returns its concatenation. + * @return : FileNamesTable* concatenation of two tables + * note table1 and table2 will be freed + */ +FileNamesTable* UTIL_concatenateTwoTables(FileNamesTable* table1, FileNamesTable* table2); /* * A modified version of realloc(). diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 3d5c4280..e9960070 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -567,6 +567,7 @@ int main(int argCount, const char* argv[]) nextArgumentIsMaxDict = 0, nextArgumentIsDictID = 0, nextArgumentsAreFiles = 0, + isTableBufferBased = 0, nextEntryIsDictionary = 0, operationResult = 0, separateFiles = 0, @@ -584,7 +585,12 @@ int main(int argCount, const char* argv[]) int cLevelLast = -1000000000; unsigned recursive = 0; unsigned memLimit = 0; - const char** filenameTable = (const char**)malloc((size_t)argCount * sizeof(const char*)); /* argCount >= 1 */ + size_t filenameTableSize = argCount; + const char** filenameTable = (const char**)malloc(filenameTableSize * sizeof(const char*)); /* argCount >= 1 */ + FileNamesTable* extendedTable = NULL; + FileNamesTable* concatenatedTables = NULL; + FileNamesTable* curTable = NULL; + char* tableBuf = NULL; unsigned filenameIdx = 0; const char* programName = argv[0]; const char* outFileName = NULL; @@ -796,6 +802,45 @@ int main(int argCount, const char* argv[]) continue; } #endif + + if (longCommandWArg(&argument, "--file=")) { + + if(!UTIL_fileExist(argument) || !UTIL_isRegularFile(argument)){ + DISPLAYLEVEL(1, "[ERROR] wrong fileName: %s\n", argument); + CLEAN_RETURN(badusage(programName)); + } + + extendedTable = UTIL_createFileNamesTable_fromFileName(argument); + if(!extendedTable) { + CLEAN_RETURN(badusage(programName)); + } + + + filenameTable[filenameIdx] = NULL; // marking end of table + + curTable = UTIL_createFileNamesTable(filenameTable, tableBuf, filenameTableSize); + + if(!curTable) { + UTIL_freeFileNamesTable(extendedTable); + CLEAN_RETURN(badusage(programName)); + } + + concatenatedTables = UTIL_concatenateTwoTables(curTable, extendedTable); + if(!concatenatedTables) { + UTIL_freeFileNamesTable(curTable); + UTIL_freeFileNamesTable(extendedTable); + CLEAN_RETURN(badusage(programName)); + } + + filenameTable = concatenatedTables->fileNames; + filenameTableSize = concatenatedTables->tableSize; + tableBuf = concatenatedTables->buf; + + filenameIdx += (unsigned) extendedTable->tableSize; + isTableBufferBased = 1; + + continue; + } /* fall-through, will trigger bad_usage() later on */ } @@ -1205,6 +1250,15 @@ int main(int argCount, const char* argv[]) _end: FIO_freePreferences(prefs); + if(filenameTable) { + if(isTableBufferBased && tableBuf){ + free(tableBuf); + } + } + UTIL_freeFileNamesTable(curTable); + UTIL_freeFileNamesTable(extendedTable); + UTIL_freeFileNamesTable(concatenatedTables); + if (main_pause) waitEnter(); #ifdef UTIL_HAS_CREATEFILELIST if (extendedFileList) diff --git a/tests/playTests.sh b/tests/playTests.sh index f68ee81a..4e71c678 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -293,6 +293,53 @@ test -f tmpOutDirDecomp/tmp2 test -f tmpOutDirDecomp/tmp1 rm -rf tmp* +println "test : compress multiple files reading them from a file, --file=FILE" +mkdir tmpInputTestDir +println "Hello world!, file1" > tmpInputTestDir/file1 +println "Hello world!, file2" > tmpInputTestDir/file2 +println tmpInputTestDir/file1 > tmp +println tmpInputTestDir/file2 >> tmp +$ZSTD -f --file=tmp +test -f tmpInputTestDir/file2.zst +test -f tmpInputTestDir/file1.zst +rm tmpInputTestDir/*.zst + +println "test : compress multiple files reading them from multiple files, --file=FILE" +println "Hello world!, file3" > tmpInputTestDir/file3 +println "Hello world!, file4" > tmpInputTestDir/file4 +println tmpInputTestDir/file3 > tmp1 +println tmpInputTestDir/file4 >> tmp1 +$ZSTD -f --file=tmp --file=tmp1 +test -f tmpInputTestDir/file1.zst +test -f tmpInputTestDir/file2.zst +test -f tmpInputTestDir/file3.zst +test -f tmpInputTestDir/file4.zst + +println "test : decompress multiple files reading them from a file, --file=FILE" +rm tmpInputTestDir/file1 +rm tmpInputTestDir/file2 +println tmpInputTestDir/file1.zst > tmpZst +println tmpInputTestDir/file2.zst >> tmpZst +$ZSTD -d -f --file=tmpZst +test -f tmpInputTestDir/file2 +test -f tmpInputTestDir/file1 + +println "test : decompress multiple files reading them from multiple files, --file=FILE" +rm tmpInputTestDir/file1 +rm tmpInputTestDir/file2 +rm tmpInputTestDir/file3 +rm tmpInputTestDir/file4 +println tmpInputTestDir/file3.zst > tmpZst1 +println tmpInputTestDir/file4.zst >> tmpZst1 +$ZSTD -d -f --file=tmpZst --file=tmpZst1 +test -f tmpInputTestDir/file1 +test -f tmpInputTestDir/file2 +test -f tmpInputTestDir/file3 +test -f tmpInputTestDir/file4 + +rm -rf tmp* + + println "\n===> Advanced compression parameters " println "Hello world!" | $ZSTD --zstd=windowLog=21, - -o tmp.zst && die "wrong parameters not detected!" println "Hello world!" | $ZSTD --zstd=windowLo=21 - -o tmp.zst && die "wrong parameters not detected!"