diff --git a/CHANGELOG b/CHANGELOG index 44600267..3b882d4c 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -11,6 +11,7 @@ cli: Named pipes support, by @bimbashrestha cli: short tar's extension support, by @stokito cli: command --output-dir-flat= , generates target files into requested directory, by @senhuang42 cli: commands --stream-size=# and --size-hint=#, by @nmagerko +cli: command --exclude-compressed, by @shashank0791 cli: faster `-t` test mode cli: improved some error messages, by @vangyzen cli: rare deadlock condition within dictionary builder, by @terrelln diff --git a/README.md b/README.md index 9c5f9201..c94c7882 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ on the [Silesia compression corpus]. | Compressor name | Ratio | Compression| Decompress.| | --------------- | ------| -----------| ---------- | -| **zstd 1.4.0 -1** | 2.884 | 530 MB/s | 1360 MB/s | +| **zstd 1.4.4 -1** | 2.884 | 520 MB/s | 1600 MB/s | | zlib 1.2.11 -1 | 2.743 | 110 MB/s | 440 MB/s | | brotli 1.0.7 -0 | 2.701 | 430 MB/s | 470 MB/s | | quicklz 1.5.0 -1 | 2.238 | 600 MB/s | 800 MB/s | diff --git a/appveyor.yml b/appveyor.yml index 87fa5c12..dd2c02ac 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -173,8 +173,6 @@ sh -e playTests.sh --test-large-data && fullbench.exe -i1 && fullbench.exe -i1 -P0 && - fuzzer_VS2008_%PLATFORM%_Release.exe %FUZZERTEST% && - fuzzer_VS2010_%PLATFORM%_Release.exe %FUZZERTEST% && fuzzer_VS2012_%PLATFORM%_Release.exe %FUZZERTEST% && fuzzer_VS2013_%PLATFORM%_Release.exe %FUZZERTEST% && fuzzer_VS2015_%PLATFORM%_Release.exe %FUZZERTEST% diff --git a/build/cmake/CMakeLists.txt b/build/cmake/CMakeLists.txt index 33c05aee..9d0e7fb0 100644 --- a/build/cmake/CMakeLists.txt +++ b/build/cmake/CMakeLists.txt @@ -124,9 +124,13 @@ endif () #----------------------------------------------------------------------------- add_subdirectory(lib) +option(ZSTD_PROGRAMS_LINK_SHARED "PROGRAMS LINK SHARED" OFF) + if (ZSTD_BUILD_PROGRAMS) - if (NOT ZSTD_BUILD_STATIC) + if (NOT ZSTD_BUILD_STATIC AND NOT ZSTD_PROGRAMS_LINK_SHARED) message(SEND_ERROR "You need to build static library to build zstd CLI") + elseif(NOT ZSTD_BUILD_SHARED AND ZSTD_PROGRAMS_LINK_SHARED) + message(SEND_ERROR "You need to build shared library to build zstd CLI") endif () add_subdirectory(programs) diff --git a/build/cmake/programs/CMakeLists.txt b/build/cmake/programs/CMakeLists.txt index 8afd8350..b26e97d0 100644 --- a/build/cmake/programs/CMakeLists.txt +++ b/build/cmake/programs/CMakeLists.txt @@ -21,13 +21,19 @@ if (ZSTD_LEGACY_SUPPORT) include_directories(${PROGRAMS_LEGACY_DIR} ${LIBRARY_DIR}/legacy) endif () +if (ZSTD_PROGRAMS_LINK_SHARED) + set(PROGRAMS_ZSTD_LINK_TARGET libzstd_shared) +else () + set(PROGRAMS_ZSTD_LINK_TARGET libzstd_static) +endif () + if (MSVC) set(MSVC_RESOURCE_DIR ${ZSTD_SOURCE_DIR}/build/VS2010/zstd) set(PlatformDependResources ${MSVC_RESOURCE_DIR}/zstd.rc) endif () add_executable(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/benchfn.c ${PROGRAMS_DIR}/benchzstd.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/dibio.c ${PlatformDependResources}) -target_link_libraries(zstd libzstd_static) +target_link_libraries(zstd ${PROGRAMS_ZSTD_LINK_TARGET}) if (CMAKE_SYSTEM_NAME MATCHES "(Solaris|SunOS)") target_link_libraries(zstd rt) endif () @@ -68,7 +74,7 @@ if (UNIX) DESTINATION "${MAN_INSTALL_DIR}") add_executable(zstd-frugal ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${PROGRAMS_DIR}/fileio.c) - target_link_libraries(zstd-frugal libzstd_static) + target_link_libraries(zstd-frugal ${PROGRAMS_ZSTD_LINK_TARGET}) set_property(TARGET zstd-frugal APPEND PROPERTY COMPILE_DEFINITIONS "ZSTD_NOBENCH;ZSTD_NODICT") endif () diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 07df9f04..35346b92 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2771,7 +2771,7 @@ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSym /*! ZSTD_loadZstdDictionary() : * @return : dictID, or an error code * assumptions : magic number supposed already checked - * dictSize supposed > 8 + * dictSize supposed >= 8 */ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, @@ -2788,7 +2788,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, size_t dictID; ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1< 8); + assert(dictSize >= 8); assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); dictPtr += 4; /* skip magic number */ @@ -2890,7 +2890,10 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, void* workspace) { DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); - if ((dict==NULL) || (dictSize<=8)) return 0; + if ((dict==NULL) || (dictSize<8)) { + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); + return 0; + } ZSTD_reset_compressedBlockState(bs); @@ -2934,6 +2937,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, && (cdict->dictContentSize > 0) && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN || cdict->compressionLevel == 0) && (params->attachDictPref != ZSTD_dictForceLoad) ) { return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); @@ -2941,7 +2945,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, ZSTDcrp_makeClean, zbuff) ); - { size_t const dictID = cdict ? + { size_t const dictID = cdict ? ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, &cctx->workspace, params, cdict->dictContent, cdict->dictContentSize, @@ -3218,7 +3222,7 @@ static size_t ZSTD_initCDict_internal( ZSTDirp_reset, ZSTD_resetTarget_CDict)); /* (Maybe) load the dictionary - * Skips loading the dictionary if it is <= 8 bytes. + * Skips loading the dictionary if it is < 8 bytes. */ { ZSTD_CCtx_params params; memset(¶ms, 0, sizeof(params)); @@ -3381,6 +3385,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced( { ZSTD_CCtx_params params = cctx->requestedParams; params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN || cdict->compressionLevel == 0 ) && (params.attachDictPref != ZSTD_dictForceLoad) ? ZSTD_getCParamsFromCDict(cdict) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index ca47a667..dd4591b7 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1096,7 +1096,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); for (i=0; i<3; i++) { U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; - RETURN_ERROR_IF(rep==0 || rep >= dictContentSize, + RETURN_ERROR_IF(rep==0 || rep > dictContentSize, dictionary_corrupted); entropy->rep[i] = rep; } } @@ -1265,7 +1265,7 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, { RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); ZSTD_clearDict(dctx); - if (dict && dictSize >= 8) { + if (dict && dictSize != 0) { dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation); dctx->ddict = dctx->ddictLocal; diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index cbb66c8d..767e5f9a 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -617,7 +617,7 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_ ptrdiff_t const diff = op - ip; BYTE* const oend = op + length; - assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8)) || + assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) || (ovtype == ZSTD_overlap_src_before_dst && diff >= 0)); if (length < 8) { diff --git a/programs/fileio.c b/programs/fileio.c index 828878c6..98337672 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -319,6 +319,8 @@ struct FIO_prefs_s { /* Computation resources preferences */ unsigned memLimit; int nbWorkers; + + int excludeCompressedFiles; }; @@ -359,6 +361,7 @@ FIO_prefs_t* FIO_createPreferences(void) ret->srcSizeHint = 0; ret->testMode = 0; ret->literalCompressionMode = ZSTD_lcm_auto; + ret->excludeCompressedFiles = 0; return ret; } @@ -402,6 +405,8 @@ void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) { prefs->nbWorkers = nbWorkers; } +void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; } + void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) { if (blockSize && prefs->nbWorkers==0) DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n"); @@ -520,7 +525,11 @@ static FILE* FIO_openSrcFile(const char* srcFileName) return NULL; } - if (!UTIL_isRegularFile(srcFileName) && !UTIL_isFIFO(srcFileName)) { + if (!UTIL_isRegularFile(srcFileName) +#ifndef _MSC_VER + && !UTIL_isFIFO(srcFileName) +#endif /* _MSC_VER */ + ) { DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n", srcFileName); return NULL; @@ -1425,6 +1434,21 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs, return result; } +/* List used to compare file extensions (used with --exclude-compressed flag) +* Different from the suffixList and should only apply to ZSTD compress operationResult +*/ +static const char *compressedFileExtensions[] = { + ZSTD_EXTENSION, + TZSTD_EXTENSION, + GZ_EXTENSION, + TGZ_EXTENSION, + LZMA_EXTENSION, + XZ_EXTENSION, + TXZ_EXTENSION, + LZ4_EXTENSION, + TLZ4_EXTENSION, + NULL +}; /*! FIO_compressFilename_srcFile() : * @return : 0 : compression completed correctly, @@ -1451,6 +1475,15 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs, return 1; } + /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used + * YES => ZSTD will skip compression of the file and will return 0. + * NO => ZSTD will resume with compress operation. + */ + if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) { + DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName); + return 0; + } + ress.srcFile = FIO_openSrcFile(srcFileName); if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */ diff --git a/programs/fileio.h b/programs/fileio.h index af2c5d9d..a7da089f 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -93,6 +93,7 @@ void FIO_setLiteralCompressionMode( void FIO_setNoProgress(unsigned noProgress); void FIO_setNotificationLevel(int level); +void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles); /*-************************************* * Single File functions diff --git a/programs/util.c b/programs/util.c index c09461ef..c9503e7f 100644 --- a/programs/util.c +++ b/programs/util.c @@ -152,6 +152,8 @@ int UTIL_isSameFile(const char* fName1, const char* fName2) #endif } +#ifndef _MSC_VER +/* Using this to distinguish named pipes */ U32 UTIL_isFIFO(const char* infilename) { /* macro guards, as defined in : https://linux.die.net/man/2/lstat */ @@ -163,7 +165,7 @@ U32 UTIL_isFIFO(const char* infilename) (void)infilename; return 0; } - +#endif U32 UTIL_isLink(const char* infilename) { @@ -525,6 +527,27 @@ int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char #endif /* #ifdef _WIN32 */ +int UTIL_isCompressedFile(const char *inputName, const char *extensionList[]) +{ + const char* ext = UTIL_getFileExtension(inputName); + while(*extensionList!=NULL) + { + const int isCompressedExtension = strcmp(ext,*extensionList); + if(isCompressedExtension==0) + return 1; + ++extensionList; + } + return 0; +} + +/*Utility function to get file extension from file */ +const char* UTIL_getFileExtension(const char* infilename) +{ + const char* extension = strrchr(infilename, '.'); + if(!extension || extension==infilename) return ""; + return extension; +} + /* * UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories, * and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb). diff --git a/programs/util.h b/programs/util.h index d83dd187..d6c2ecf0 100644 --- a/programs/util.h +++ b/programs/util.h @@ -40,7 +40,6 @@ extern "C" { #include /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */ #include "mem.h" /* U32, U64 */ - /*-************************************************************ * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW ***************************************************************/ @@ -135,8 +134,12 @@ U32 UTIL_isDirectory(const char* infilename); int UTIL_getFileStat(const char* infilename, stat_t* statbuf); int UTIL_isSameFile(const char* file1, const char* file2); int UTIL_compareStr(const void *p1, const void *p2); +int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]); +const char* UTIL_getFileExtension(const char* infilename); +#ifndef _MSC_VER U32 UTIL_isFIFO(const char* infilename); +#endif U32 UTIL_isLink(const char* infilename); #define UTIL_FILESIZE_UNKNOWN ((U64)(-1)) U64 UTIL_getFileSize(const char* infilename); diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 831143f1..a244b6c4 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -135,6 +135,7 @@ static int usage_advanced(const char* programName) DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n"); DISPLAY( " -c : force write to standard output, even if it is the console\n"); DISPLAY( " -l : print information about zstd compressed files \n"); + DISPLAY( "--exclude-compressed: only compress files that are not previously compressed \n"); #ifndef ZSTD_NOCOMPRESS DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); @@ -710,7 +711,7 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_lcm_huffman; continue; } if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_lcm_uncompressed; continue; } if (!strcmp(argument, "--no-progress")) { FIO_setNoProgress(1); continue; } - + if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; } /* long commands with arguments */ #ifndef ZSTD_NODICT if (longCommandWArg(&argument, "--train-cover")) { @@ -1057,7 +1058,11 @@ int main(int argCount, const char* argv[]) if (!followLinks) { unsigned u; for (u=0, fileNamesNb=0; u +#include +#include +#include +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" + +/** + * Compresses the data and returns the compressed size or an error. + */ +static size_t compress(void* compressed, size_t compressedCapacity, + void const* source, size_t sourceSize, + void const* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, dictLoadMethod, dictContentType)); + size_t const compressedSize = ZSTD_compress2( + cctx, compressed, compressedCapacity, source, sourceSize); + ZSTD_freeCCtx(cctx); + return compressedSize; +} + +static size_t decompress(void* result, size_t resultCapacity, + void const* compressed, size_t compressedSize, + void const* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary_advanced( + dctx, dict, dictSize, dictLoadMethod, dictContentType)); + size_t const resultSize = ZSTD_decompressDCtx( + dctx, result, resultCapacity, compressed, compressedSize); + FUZZ_ZASSERT(resultSize); + ZSTD_freeDCtx(dctx); + return resultSize; +} + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) +{ + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + ZSTD_dictLoadMethod_e const dlm = + size = FUZZ_dataProducer_uint32Range(producer, 0, 1); + ZSTD_dictContentType_e const dct = + FUZZ_dataProducer_uint32Range(producer, 0, 2); + size = FUZZ_dataProducer_remainingBytes(producer); + + DEBUGLOG(2, "Dict load method %d", dlm); + DEBUGLOG(2, "Dict content type %d", dct); + DEBUGLOG(2, "Dict size %u", (unsigned)size); + + void* const rBuf = malloc(size); + FUZZ_ASSERT(rBuf); + size_t const cBufSize = ZSTD_compressBound(size); + void* const cBuf = malloc(cBufSize); + FUZZ_ASSERT(cBuf); + + size_t const cSize = + compress(cBuf, cBufSize, src, size, src, size, dlm, dct); + /* compression failing is okay */ + if (ZSTD_isError(cSize)) { + FUZZ_ASSERT_MSG(dct != ZSTD_dct_rawContent, "Raw must always succeed!"); + goto out; + } + size_t const rSize = + decompress(rBuf, size, cBuf, cSize, src, size, dlm, dct); + FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!memcmp(src, rBuf, size), "Corruption!"); + +out: + free(cBuf); + free(rBuf); + FUZZ_dataProducer_free(producer); + return 0; +} diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index 9df68df0..87f115af 100755 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -27,6 +27,7 @@ def abs_join(a, *p): class InputType(object): RAW_DATA = 1 COMPRESSED_DATA = 2 + DICTIONARY_DATA = 3 class FrameType(object): @@ -54,6 +55,7 @@ TARGET_INFO = { 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA), 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA), 'simple_compress': TargetInfo(InputType.RAW_DATA), + 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA), } TARGETS = list(TARGET_INFO.keys()) ALL_TARGETS = TARGETS + ['all'] @@ -73,6 +75,7 @@ LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a') AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz') DECODECORPUS = os.environ.get('DECODECORPUS', abs_join(FUZZ_DIR, '..', 'decodecorpus')) +ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd')) # Sanitizer environment variables MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '') @@ -673,6 +676,11 @@ def gen_parser(args): default=DECODECORPUS, help="decodecorpus binary (default: $DECODECORPUS='{}')".format( DECODECORPUS)) + parser.add_argument( + '--zstd', + type=str, + default=ZSTD, + help="zstd binary (default: $ZSTD='{}')".format(ZSTD)) parser.add_argument( '--fuzz-rng-seed-size', type=int, @@ -707,46 +715,66 @@ def gen(args): return 1 seed = create(args.seed) - with tmpdir() as compressed: - with tmpdir() as decompressed: - cmd = [ - args.decodecorpus, - '-n{}'.format(args.number), - '-p{}/'.format(compressed), - '-o{}'.format(decompressed), + with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict: + info = TARGET_INFO[args.TARGET] + + if info.input_type == InputType.DICTIONARY_DATA: + number = max(args.number, 1000) + else: + number = args.number + cmd = [ + args.decodecorpus, + '-n{}'.format(args.number), + '-p{}/'.format(compressed), + '-o{}'.format(decompressed), + ] + + if info.frame_type == FrameType.BLOCK: + cmd += [ + '--gen-blocks', + '--max-block-size-log={}'.format(min(args.max_size_log, 17)) ] + else: + cmd += ['--max-content-size-log={}'.format(args.max_size_log)] - info = TARGET_INFO[args.TARGET] - if info.frame_type == FrameType.BLOCK: - cmd += [ - '--gen-blocks', - '--max-block-size-log={}'.format(min(args.max_size_log, 17)) + print(' '.join(cmd)) + subprocess.check_call(cmd) + + if info.input_type == InputType.RAW_DATA: + print('using decompressed data in {}'.format(decompressed)) + samples = decompressed + elif info.input_type == InputType.COMPRESSED_DATA: + print('using compressed data in {}'.format(compressed)) + samples = compressed + else: + assert info.input_type == InputType.DICTIONARY_DATA + print('making dictionary data from {}'.format(decompressed)) + samples = dict + min_dict_size_log = 9 + max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log) + for dict_size_log in range(min_dict_size_log, max_dict_size_log): + dict_size = 1 << dict_size_log + cmd = [ + args.zstd, + '--train', + '-r', decompressed, + '--maxdict={}'.format(dict_size), + '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size)) ] - else: - cmd += ['--max-content-size-log={}'.format(args.max_size_log)] + print(' '.join(cmd)) + subprocess.check_call(cmd) - print(' '.join(cmd)) - subprocess.check_call(cmd) - - if info.input_type == InputType.RAW_DATA: - print('using decompressed data in {}'.format(decompressed)) - samples = decompressed - else: - assert info.input_type == InputType.COMPRESSED_DATA - print('using compressed data in {}'.format(compressed)) - samples = compressed - - # Copy the samples over and prepend the RNG seeds - for name in os.listdir(samples): - samplename = abs_join(samples, name) - outname = abs_join(seed, name) - with open(samplename, 'rb') as sample: - with open(outname, 'wb') as out: - CHUNK_SIZE = 131072 + # Copy the samples over and prepend the RNG seeds + for name in os.listdir(samples): + samplename = abs_join(samples, name) + outname = abs_join(seed, name) + with open(samplename, 'rb') as sample: + with open(outname, 'wb') as out: + CHUNK_SIZE = 131072 + chunk = sample.read(CHUNK_SIZE) + while len(chunk) > 0: + out.write(chunk) chunk = sample.read(CHUNK_SIZE) - while len(chunk) > 0: - out.write(chunk) - chunk = sample.read(CHUNK_SIZE) return 0 diff --git a/tests/playTests.sh b/tests/playTests.sh index 5f23ac55..05951ab2 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -215,6 +215,37 @@ $ZSTD tmp -c --compress-literals -19 | $ZSTD -t $ZSTD -b --fast=1 -i0e1 tmp --compress-literals $ZSTD -b --fast=1 -i0e1 tmp --no-compress-literals +println "test: --exclude-compressed flag" +rm -rf precompressedFilterTestDir +mkdir -p precompressedFilterTestDir +./datagen $size > precompressedFilterTestDir/input.5 +./datagen $size > precompressedFilterTestDir/input.6 +$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir +sleep 5 +./datagen $size > precompressedFilterTestDir/input.7 +./datagen $size > precompressedFilterTestDir/input.8 +$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir +test ! -f precompressedFilterTestDir/input.5.zst.zst +test ! -f precompressedFilterTestDir/input.6.zst.zst +file1timestamp=`date -r precompressedFilterTestDir/input.5.zst +%s` +file2timestamp=`date -r precompressedFilterTestDir/input.7.zst +%s` +if [[ $file2timestamp -ge $file1timestamp ]]; then + println "Test is successful. input.5.zst is precompressed and therefore not compressed/modified again." +else + println "Test is not successful" +fi +#File Extension check. +./datagen $size > precompressedFilterTestDir/input.zstbar +$ZSTD --exclude-compressed --long --rm -r precompressedFilterTestDir +#ZSTD should compress input.zstbar +test -f precompressedFilterTestDir/input.zstbar.zst +#Check without the --exclude-compressed flag +$ZSTD --long --rm -r precompressedFilterTestDir +#Files should get compressed again without the --exclude-compressed flag. +test -f precompressedFilterTestDir/input.5.zst.zst +test -f precompressedFilterTestDir/input.6.zst.zst +println "Test completed" + println "test : file removal" $ZSTD -f --rm tmp test ! -f tmp # tmp should no longer be present @@ -1207,6 +1238,8 @@ test -f dictionary rm -f tmp* dictionary +if [ "$isWindows" = false ] ; then + println "\n===> zstd fifo named pipe test " head -c 10 /dev/zero > tmp_original mkfifo named_pipe @@ -1217,4 +1250,6 @@ $DIFF -s tmp_original tmp_decompressed rm -rf tmp* rm -rf named_pipe +fi + rm -f tmp* diff --git a/zlibWrapper/Makefile b/zlibWrapper/Makefile index f63291c9..6addb743 100644 --- a/zlibWrapper/Makefile +++ b/zlibWrapper/Makefile @@ -103,6 +103,7 @@ zstd_zlibwrapper.o: $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.c $(ZLIBWRAPPER_PATH)/z zstdTurnedOn_zlibwrapper.o: CPPFLAGS += -DZWRAP_USE_ZSTD=1 zstdTurnedOn_zlibwrapper.o: $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.c $(ZLIBWRAPPER_PATH)/zstd_zlibwrapper.h + $(CC) $(CPPFLAGS) $(CFLAGS) $< -c -o $@ $(ZSTDLIBDIR)/libzstd.a: $(MAKE) -C $(ZSTDLIBDIR) libzstd.a