zstd/programs/fileio.c

2537 lines
99 KiB
C
Raw Normal View History

/*
2016-08-30 10:04:33 -07:00
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
2016-08-30 10:04:33 -07:00
*/
2015-01-23 16:58:16 -08:00
2015-10-22 08:55:40 -07:00
/* *************************************
2015-01-23 16:58:16 -08:00
* Compiler Options
2015-10-22 08:55:40 -07:00
***************************************/
2016-08-13 16:19:12 -07:00
#ifdef _MSC_VER /* Visual */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
2016-08-13 16:19:12 -07:00
# pragma warning(disable : 4204) /* non-constant aggregate initializer */
#endif
2016-12-21 04:23:34 -08:00
#if defined(__MINGW32__) && !defined(_POSIX_SOURCE)
# define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */
#endif
2015-01-23 16:58:16 -08:00
2016-02-12 11:19:48 -08:00
/*-*************************************
2015-01-23 16:58:16 -08:00
* Includes
2015-10-22 08:55:40 -07:00
***************************************/
#include "platform.h" /* Large Files support, SET_BINARY_MODE */
#include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile */
2015-11-09 08:42:17 -08:00
#include <stdio.h> /* fprintf, fopen, fread, _fileno, stdin, stdout */
#include <stdlib.h> /* malloc, free */
#include <string.h> /* strcmp, strlen */
#include <assert.h>
2015-11-09 08:42:17 -08:00
#include <errno.h> /* errno */
#include <signal.h>
2016-04-28 05:40:45 -07:00
2017-04-06 12:56:40 -07:00
#if defined (_MSC_VER)
# include <sys/stat.h>
# include <io.h>
#endif
#include "mem.h" /* U32, U64 */
2015-01-23 16:58:16 -08:00
#include "fileio.h"
2018-02-01 16:13:04 -08:00
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
#include "zstd.h"
2018-02-01 16:13:04 -08:00
#include "zstd_errors.h" /* ZSTD_error_frameParameter_windowTooLarge */
2017-02-08 07:54:23 -08:00
#if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
# include <zlib.h>
# if !defined(z_const)
# define z_const
# endif
2016-11-30 06:05:54 -08:00
#endif
2018-02-01 16:13:04 -08:00
#if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
# include <lzma.h>
#endif
2015-01-23 16:58:16 -08:00
#define LZ4_MAGICNUMBER 0x184D2204
#if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
# define LZ4F_ENABLE_OBSOLETE_ENUMS
# include <lz4frame.h>
# include <lz4.h>
#endif
2015-01-23 16:58:16 -08:00
2016-02-12 11:19:48 -08:00
/*-*************************************
2015-01-23 16:58:16 -08:00
* Constants
2015-10-22 08:55:40 -07:00
***************************************/
2016-07-02 16:10:53 -07:00
#define KB *(1<<10)
#define MB *(1<<20)
#define GB *(1U<<30)
#define ADAPT_WINDOWLOG_DEFAULT 23 /* 8 MB */
#define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */
2016-02-12 11:19:48 -08:00
#define FNSPACE 30
2015-01-23 16:58:16 -08:00
/*-*************************************
2015-01-23 16:58:16 -08:00
* Macros
2015-10-22 08:55:40 -07:00
***************************************/
struct FIO_display_prefs_s {
int displayLevel; /* 0 : no display; 1: errors; 2: + result + interaction + warnings; 3: + progression; 4: + information */
U32 noProgress;
};
static FIO_display_prefs_t g_display_prefs = {2, 0};
2015-01-23 16:58:16 -08:00
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
2017-06-15 12:27:32 -07:00
#define DISPLAYOUT(...) fprintf(stdout, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) { if (g_display_prefs.displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
2015-01-23 16:58:16 -08:00
2017-11-29 19:11:12 -08:00
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
#define READY_FOR_UPDATE() (!g_display_prefs.noProgress && UTIL_clockSpanMicro(g_displayClock) > g_refreshRate)
#define DELAY_NEXT_UPDATE() { g_displayClock = UTIL_getTime(); }
#define DISPLAYUPDATE(l, ...) { \
if (g_display_prefs.displayLevel>=l && !g_display_prefs.noProgress) { \
if (READY_FOR_UPDATE() || (g_display_prefs.displayLevel>=4)) { \
DELAY_NEXT_UPDATE(); \
DISPLAY(__VA_ARGS__); \
if (g_display_prefs.displayLevel>=4) fflush(stderr); \
} } }
2015-01-23 16:58:16 -08:00
#undef MIN /* in case it would be already defined */
2017-04-13 15:35:05 -07:00
#define MIN(a,b) ((a) < (b) ? (a) : (b))
2016-07-02 16:10:53 -07:00
#define EXM_THROW(error, ...) \
{ \
DISPLAYLEVEL(1, "zstd: "); \
DISPLAYLEVEL(5, "Error defined at %s, line %i : \n", __FILE__, __LINE__); \
DISPLAYLEVEL(1, "error %i : ", error); \
DISPLAYLEVEL(1, __VA_ARGS__); \
DISPLAYLEVEL(1, " \n"); \
exit(error); \
}
#define CHECK_V(v, f) \
v = f; \
if (ZSTD_isError(v)) { \
DISPLAYLEVEL(5, "%s \n", #f); \
EXM_THROW(11, "%s", ZSTD_getErrorName(v)); \
}
#define CHECK(f) { size_t err; CHECK_V(err, f); }
/*-************************************
* Signal (Ctrl-C trapping)
**************************************/
static const char* g_artefact = NULL;
static void INThandler(int sig)
{
assert(sig==SIGINT); (void)sig;
#if !defined(_MSC_VER)
signal(sig, SIG_IGN); /* this invocation generates a buggy warning in Visual Studio */
#endif
if (g_artefact) {
assert(UTIL_isRegularFile(g_artefact));
remove(g_artefact);
}
DISPLAY("\n");
exit(2);
}
static void addHandler(char const* dstFileName)
{
if (UTIL_isRegularFile(dstFileName)) {
g_artefact = dstFileName;
signal(SIGINT, INThandler);
} else {
g_artefact = NULL;
}
}
/* Idempotent */
static void clearHandler(void)
{
if (g_artefact) signal(SIGINT, SIG_DFL);
g_artefact = NULL;
}
/*-*********************************************************
* Termination signal trapping (Print debug stack trace)
***********************************************************/
#if defined(__has_feature) && !defined(BACKTRACE_ENABLE) /* Clang compiler */
# if (__has_feature(address_sanitizer))
# define BACKTRACE_ENABLE 0
# endif /* __has_feature(address_sanitizer) */
#elif defined(__SANITIZE_ADDRESS__) && !defined(BACKTRACE_ENABLE) /* GCC compiler */
# define BACKTRACE_ENABLE 0
#endif
#if !defined(BACKTRACE_ENABLE)
/* automatic detector : backtrace enabled by default on linux+glibc and osx */
# if (defined(__linux__) && defined(__GLIBC__)) \
|| (defined(__APPLE__) && defined(__MACH__))
# define BACKTRACE_ENABLE 1
# else
# define BACKTRACE_ENABLE 0
# endif
#endif
/* note : after this point, BACKTRACE_ENABLE is necessarily defined */
#if BACKTRACE_ENABLE
#include <execinfo.h> /* backtrace, backtrace_symbols */
#define MAX_STACK_FRAMES 50
static void ABRThandler(int sig) {
const char* name;
void* addrlist[MAX_STACK_FRAMES];
char** symbollist;
int addrlen, i;
switch (sig) {
case SIGABRT: name = "SIGABRT"; break;
case SIGFPE: name = "SIGFPE"; break;
case SIGILL: name = "SIGILL"; break;
case SIGINT: name = "SIGINT"; break;
case SIGSEGV: name = "SIGSEGV"; break;
default: name = "UNKNOWN";
}
DISPLAY("Caught %s signal, printing stack:\n", name);
/* Retrieve current stack addresses. */
addrlen = backtrace(addrlist, MAX_STACK_FRAMES);
if (addrlen == 0) {
DISPLAY("\n");
return;
}
/* Create readable strings to each frame. */
symbollist = backtrace_symbols(addrlist, addrlen);
/* Print the stack trace, excluding calls handling the signal. */
for (i = ZSTD_START_SYMBOLLIST_FRAME; i < addrlen; i++) {
DISPLAY("%s\n", symbollist[i]);
}
free(symbollist);
/* Reset and raise the signal so default handler runs. */
signal(sig, SIG_DFL);
raise(sig);
}
#endif
void FIO_addAbortHandler()
{
#if BACKTRACE_ENABLE
signal(SIGABRT, ABRThandler);
signal(SIGFPE, ABRThandler);
signal(SIGILL, ABRThandler);
signal(SIGSEGV, ABRThandler);
signal(SIGBUS, ABRThandler);
#endif
}
/*-************************************************************
2018-06-09 12:31:17 -07:00
* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
***************************************************************/
#if defined(_MSC_VER) && _MSC_VER >= 1400
# define LONG_SEEK _fseeki64
# define LONG_TELL _ftelli64
#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
2017-02-14 00:52:52 -08:00
# define LONG_SEEK fseeko
# define LONG_TELL ftello
#elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
# define LONG_SEEK fseeko64
# define LONG_TELL ftello64
#elif defined(_WIN32) && !defined(__DJGPP__)
# include <windows.h>
static int LONG_SEEK(FILE* file, __int64 offset, int origin) {
LARGE_INTEGER off;
DWORD method;
off.QuadPart = offset;
if (origin == SEEK_END)
method = FILE_END;
else if (origin == SEEK_CUR)
method = FILE_CURRENT;
else
method = FILE_BEGIN;
if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method))
2017-02-13 03:00:59 -08:00
return 0;
else
2017-02-13 03:00:59 -08:00
return -1;
}
#else
# define LONG_SEEK fseek
# define LONG_TELL ftell
#endif
/*-*************************************
* Parameters: Typedefs
2015-10-22 08:55:40 -07:00
***************************************/
struct FIO_prefs_s {
/* Algorithm preferences */
FIO_compressionType_t compressionType;
U32 sparseFileSupport; /* 0: no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */
int dictIDFlag;
int checksumFlag;
int blockSize;
int overlapLog;
U32 adaptiveMode;
int rsyncable;
int minAdaptLevel;
int maxAdaptLevel;
int ldmFlag;
int ldmHashLog;
int ldmMinMatch;
int ldmBucketSizeLog;
int ldmHashRateLog;
/* IO preferences */
U32 removeSrcFile;
U32 overwrite;
/* Computation resources preferences */
unsigned memLimit;
int nbWorkers;
};
/*-*************************************
* Parameters: Initialization
***************************************/
#define FIO_OVERLAP_LOG_NOTSET 9999
#define FIO_LDM_PARAM_NOTSET 9999
FIO_prefs_t* FIO_createPreferences(void)
{
FIO_prefs_t* const ret = (FIO_prefs_t*)malloc(sizeof(FIO_prefs_t));
if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
ret->compressionType = FIO_zstdCompression;
ret->overwrite = 0;
ret->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
ret->dictIDFlag = 1;
ret->checksumFlag = 1;
ret->removeSrcFile = 0;
ret->memLimit = 0;
ret->nbWorkers = 1;
ret->blockSize = 0;
ret->overlapLog = FIO_OVERLAP_LOG_NOTSET;
ret->adaptiveMode = 0;
ret->rsyncable = 0;
ret->minAdaptLevel = -50; /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */
ret->maxAdaptLevel = 22; /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */
ret->ldmFlag = 0;
ret->ldmHashLog = 0;
ret->ldmMinMatch = 0;
ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
return ret;
}
void FIO_freePreferences(FIO_prefs_t* const prefs)
{
free(prefs);
}
/*-*************************************
* Parameters: Display Options
***************************************/
void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; }
void FIO_setNoProgress(unsigned noProgress) { g_display_prefs.noProgress = noProgress; }
/*-*************************************
* Parameters: Setters
***************************************/
void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType) { prefs->compressionType = compressionType; }
void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; }
void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse) { prefs->sparseFileSupport = sparse; }
void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag) { prefs->dictIDFlag = dictIDFlag; }
void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag) { prefs->checksumFlag = checksumFlag; }
void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag) { prefs->removeSrcFile = (flag>0); }
void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit) { prefs->memLimit = memLimit; }
void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) {
#ifndef ZSTD_MULTITHREAD
if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n");
#endif
prefs->nbWorkers = nbWorkers;
}
void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) {
if (blockSize && prefs->nbWorkers==0)
DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n");
prefs->blockSize = blockSize;
}
void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog){
if (overlapLog && prefs->nbWorkers==0)
DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n");
prefs->overlapLog = overlapLog;
}
void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt) {
if ((adapt>0) && (prefs->nbWorkers==0))
EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n");
prefs->adaptiveMode = adapt;
}
void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
if ((rsyncable>0) && (prefs->nbWorkers==0))
2018-11-12 19:59:42 -08:00
EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n");
prefs->rsyncable = rsyncable;
2018-11-12 19:59:42 -08:00
}
void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel)
{
#ifndef ZSTD_NOCOMPRESS
assert(minCLevel >= ZSTD_minCLevel());
#endif
prefs->minAdaptLevel = minCLevel;
}
void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel)
{
prefs->maxAdaptLevel = maxCLevel;
}
void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag) {
prefs->ldmFlag = (ldmFlag>0);
}
2017-09-02 21:10:36 -07:00
void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog) {
prefs->ldmHashLog = ldmHashLog;
2017-09-02 21:10:36 -07:00
}
void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch) {
prefs->ldmMinMatch = ldmMinMatch;
}
void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog) {
prefs->ldmBucketSizeLog = ldmBucketSizeLog;
}
2015-01-23 16:58:16 -08:00
void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) {
prefs->ldmHashRateLog = ldmHashRateLog;
}
2015-01-23 16:58:16 -08:00
/*-*************************************
2015-01-23 16:58:16 -08:00
* Functions
2015-10-22 08:55:40 -07:00
***************************************/
2017-04-06 12:56:40 -07:00
/** FIO_remove() :
* @result : Unlink `fileName`, even if it's read-only */
static int FIO_remove(const char* path)
{
if (!UTIL_isRegularFile(path)) {
DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s \n", path);
return 0;
}
2017-04-06 12:56:40 -07:00
#if defined(_WIN32) || defined(WIN32)
/* windows doesn't allow remove read-only files,
* so try to make it writable first */
2017-04-06 12:56:40 -07:00
chmod(path, _S_IWRITE);
#endif
return remove(path);
}
2016-11-11 17:26:54 -08:00
/** FIO_openSrcFile() :
* condition : `srcFileName` must be non-NULL.
* @result : FILE* to `srcFileName`, or NULL if it fails */
static FILE* FIO_openSrcFile(const char* srcFileName)
{
assert(srcFileName != NULL);
2016-02-02 05:36:49 -08:00
if (!strcmp (srcFileName, stdinmark)) {
DISPLAYLEVEL(4,"Using stdin for input \n");
SET_BINARY_MODE(stdin);
return stdin;
}
if (!UTIL_fileExist(srcFileName)) {
DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n",
srcFileName, strerror(errno));
return NULL;
}
if (!UTIL_isRegularFile(srcFileName)) {
DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
srcFileName);
return NULL;
}
{ FILE* const f = fopen(srcFileName, "rb");
if (f == NULL)
DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
return f;
}
}
2016-09-15 06:38:44 -07:00
/** FIO_openDstFile() :
* condition : `dstFileName` must be non-NULL.
2016-09-15 06:38:44 -07:00
* @result : FILE* to `dstFileName`, or NULL if it fails */
static FILE* FIO_openDstFile(FIO_prefs_t* const prefs, const char* srcFileName, const char* dstFileName)
{
assert(dstFileName != NULL);
2016-02-02 05:36:49 -08:00
if (!strcmp (dstFileName, stdoutmark)) {
DISPLAYLEVEL(4,"Using stdout for output \n");
SET_BINARY_MODE(stdout);
if (prefs->sparseFileSupport == 1) {
prefs->sparseFileSupport = 0;
DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n");
}
return stdout;
}
/* ensure dst is not the same file as src */
if (srcFileName != NULL) {
#ifdef _MSC_VER
/* note : Visual does not support file identification by inode.
* The following work-around is limited to detecting exact name repetition only,
* aka `filename` is considered different from `subdir/../filename` */
if (!strcmp(srcFileName, dstFileName)) {
DISPLAYLEVEL(1, "zstd: Refusing to open a output file which will overwrite the input file \n");
return NULL;
}
#else
stat_t srcStat;
stat_t dstStat;
if (UTIL_getFileStat(srcFileName, &srcStat)
&& UTIL_getFileStat(dstFileName, &dstStat)) {
if (srcStat.st_dev == dstStat.st_dev
&& srcStat.st_ino == dstStat.st_ino) {
DISPLAYLEVEL(1, "zstd: Refusing to open a output file which will overwrite the input file \n");
return NULL;
}
}
#endif
}
if (prefs->sparseFileSupport == 1) {
prefs->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
}
if (UTIL_isRegularFile(dstFileName)) {
/* Check if destination file already exists */
FILE* const fCheck = fopen( dstFileName, "rb" );
if (!strcmp(dstFileName, nulmark)) {
EXM_THROW(40, "%s is unexpectedly categorized as a regular file",
dstFileName);
}
if (fCheck != NULL) { /* dst file exists, authorization prompt */
fclose(fCheck);
if (!prefs->overwrite) {
if (g_display_prefs.displayLevel <= 1) {
/* No interaction possible */
DISPLAY("zstd: %s already exists; not overwritten \n",
dstFileName);
return NULL;
}
DISPLAY("zstd: %s already exists; overwrite (y/N) ? ",
dstFileName);
{ int ch = getchar();
if ((ch!='Y') && (ch!='y')) {
DISPLAY(" not overwritten \n");
2016-07-01 16:05:31 -07:00
return NULL;
2015-12-29 02:57:15 -08:00
}
/* flush rest of input line */
while ((ch!=EOF) && (ch!='\n')) ch = getchar();
} }
/* need to unlink */
FIO_remove(dstFileName);
} }
{ FILE* const f = fopen( dstFileName, "wb" );
if (f == NULL)
DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
return f;
}
}
/*! FIO_createDictBuffer() :
* creates a buffer, pointed by `*bufferPtr`,
* loads `filename` content into it, up to DICTSIZE_MAX bytes.
* @return : loaded size
* if fileName==NULL, returns 0 and a NULL pointer
*/
static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName)
2015-12-17 11:30:14 -08:00
{
FILE* fileHandle;
U64 fileSize;
assert(bufferPtr != NULL);
2015-12-17 11:30:14 -08:00
*bufferPtr = NULL;
2016-02-02 05:36:49 -08:00
if (fileName == NULL) return 0;
2015-12-17 11:30:14 -08:00
DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
fileHandle = fopen(fileName, "rb");
if (fileHandle==NULL) EXM_THROW(31, "%s: %s", fileName, strerror(errno));
2016-04-28 03:23:33 -07:00
fileSize = UTIL_getFileSize(fileName);
if (fileSize > DICTSIZE_MAX) {
EXM_THROW(32, "Dictionary file %s is too large (> %u MB)",
fileName, DICTSIZE_MAX >> 20); /* avoid extreme cases */
}
2016-07-01 16:05:31 -07:00
*bufferPtr = malloc((size_t)fileSize);
if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
{ size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
if (readSize != fileSize)
EXM_THROW(35, "Error reading dictionary file %s : %s",
fileName, strerror(errno));
}
2015-12-17 11:30:14 -08:00
fclose(fileHandle);
return (size_t)fileSize;
}
#ifndef ZSTD_NOCOMPRESS
/* **********************************************************************
* Compression
************************************************************************/
typedef struct {
FILE* srcFile;
FILE* dstFile;
void* srcBuffer;
size_t srcBufferSize;
void* dstBuffer;
size_t dstBufferSize;
2016-08-13 14:45:45 -07:00
ZSTD_CStream* cctx;
} cRess_t;
static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
const char* dictFileName, int cLevel,
U64 srcSize,
ZSTD_compressionParameters comprParams) {
cRess_t ress;
2016-07-13 08:38:39 -07:00
memset(&ress, 0, sizeof(ress));
DISPLAYLEVEL(6, "FIO_createCResources \n");
ress.cctx = ZSTD_createCCtx();
if (ress.cctx == NULL)
EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx",
strerror(errno));
2016-08-13 14:45:45 -07:00
ress.srcBufferSize = ZSTD_CStreamInSize();
ress.srcBuffer = malloc(ress.srcBufferSize);
2016-08-13 14:45:45 -07:00
ress.dstBufferSize = ZSTD_CStreamOutSize();
ress.dstBuffer = malloc(ress.dstBufferSize);
if (!ress.srcBuffer || !ress.dstBuffer)
EXM_THROW(31, "allocation error : not enough memory");
2018-02-01 17:07:27 -08:00
/* Advanced parameters, including dictionary */
2016-09-15 06:38:44 -07:00
{ void* dictBuffer;
size_t const dictBuffSize = FIO_createDictBuffer(&dictBuffer, dictFileName); /* works with dictFileName==NULL */
if (dictFileName && (dictBuffer==NULL))
EXM_THROW(32, "allocation error : can't create dictBuffer");
2017-06-11 18:39:46 -07:00
if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog)
comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT;
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, 1) ); /* always enable content size when available (note: supposed to be default) */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
/* compression level */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
/* long distance matching */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) );
if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) {
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) );
}
if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) {
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) );
}
/* compression parameters */
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, comprParams.strategy) );
/* multi-threading */
2018-02-01 17:07:27 -08:00
#ifdef ZSTD_MULTITHREAD
DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers);
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) );
if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) {
DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog);
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) );
}
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
#endif
/* dictionary */
2018-02-01 17:07:27 -08:00
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) ); /* set the value temporarily for dictionary loading, to adapt compression parameters */
CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) );
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) ); /* reset */
2016-09-15 06:38:44 -07:00
free(dictBuffer);
}
return ress;
}
static void FIO_freeCResources(cRess_t ress)
{
free(ress.srcBuffer);
free(ress.dstBuffer);
2016-09-15 06:38:44 -07:00
ZSTD_freeCStream(ress.cctx); /* never fails */
}
2017-02-08 07:54:23 -08:00
#ifdef ZSTD_GZCOMPRESS
static unsigned long long
FIO_compressGzFrame(cRess_t* ress,
const char* srcFileName, U64 const srcFileSize,
int compressionLevel, U64* readsize)
2017-02-08 07:54:23 -08:00
{
unsigned long long inFileSize = 0, outFileSize = 0;
z_stream strm;
2017-02-08 08:37:14 -08:00
int ret;
2017-02-08 07:54:23 -08:00
if (compressionLevel > Z_BEST_COMPRESSION)
compressionLevel = Z_BEST_COMPRESSION;
2017-02-08 07:54:23 -08:00
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
2017-02-08 08:37:14 -08:00
ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED,
15 /* maxWindowLogSize */ + 16 /* gzip only */,
8, Z_DEFAULT_STRATEGY); /* see http://www.zlib.net/manual.html */
if (ret != Z_OK)
EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret);
2017-02-08 07:54:23 -08:00
2017-02-08 08:37:14 -08:00
strm.next_in = 0;
2017-02-27 04:21:05 -08:00
strm.avail_in = 0;
2017-02-08 07:54:23 -08:00
strm.next_out = (Bytef*)ress->dstBuffer;
strm.avail_out = (uInt)ress->dstBufferSize;
while (1) {
if (strm.avail_in == 0) {
size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
if (inSize == 0) break;
inFileSize += inSize;
strm.next_in = (z_const unsigned char*)ress->srcBuffer;
strm.avail_in = (uInt)inSize;
}
ret = deflate(&strm, Z_NO_FLUSH);
if (ret != Z_OK)
EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret);
2017-02-08 07:54:23 -08:00
{ size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
if (decompBytes) {
if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes)
EXM_THROW(73, "Write error : cannot write to output file");
2017-02-08 07:54:23 -08:00
outFileSize += decompBytes;
strm.next_out = (Bytef*)ress->dstBuffer;
strm.avail_out = (uInt)ress->dstBufferSize;
}
}
if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
(unsigned)(inFileSize>>20),
(double)outFileSize/inFileSize*100)
else
DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
(unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
(double)outFileSize/inFileSize*100);
2017-02-08 07:54:23 -08:00
}
while (1) {
2017-02-08 08:37:14 -08:00
ret = deflate(&strm, Z_FINISH);
2017-02-08 07:54:23 -08:00
{ size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
if (decompBytes) {
if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes)
EXM_THROW(75, "Write error : %s", strerror(errno));
2017-02-08 07:54:23 -08:00
outFileSize += decompBytes;
strm.next_out = (Bytef*)ress->dstBuffer;
strm.avail_out = (uInt)ress->dstBufferSize;
} }
2017-02-08 07:54:23 -08:00
if (ret == Z_STREAM_END) break;
if (ret != Z_BUF_ERROR)
EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret);
2017-02-08 07:54:23 -08:00
}
2017-02-08 08:37:14 -08:00
ret = deflateEnd(&strm);
if (ret != Z_OK)
EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret);
2017-02-08 07:54:23 -08:00
*readsize = inFileSize;
return outFileSize;
}
#endif
#ifdef ZSTD_LZMACOMPRESS
static unsigned long long
FIO_compressLzmaFrame(cRess_t* ress,
const char* srcFileName, U64 const srcFileSize,
int compressionLevel, U64* readsize, int plain_lzma)
{
unsigned long long inFileSize = 0, outFileSize = 0;
lzma_stream strm = LZMA_STREAM_INIT;
lzma_action action = LZMA_RUN;
lzma_ret ret;
if (compressionLevel < 0) compressionLevel = 0;
if (compressionLevel > 9) compressionLevel = 9;
if (plain_lzma) {
lzma_options_lzma opt_lzma;
if (lzma_lzma_preset(&opt_lzma, compressionLevel))
EXM_THROW(71, "zstd: %s: lzma_lzma_preset error", srcFileName);
ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */
if (ret != LZMA_OK)
EXM_THROW(71, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret);
} else {
ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */
if (ret != LZMA_OK)
EXM_THROW(71, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret);
}
strm.next_in = 0;
strm.avail_in = 0;
strm.next_out = (BYTE*)ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
while (1) {
if (strm.avail_in == 0) {
size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
if (inSize == 0) action = LZMA_FINISH;
inFileSize += inSize;
strm.next_in = (BYTE const*)ress->srcBuffer;
strm.avail_in = inSize;
}
ret = lzma_code(&strm, action);
if (ret != LZMA_OK && ret != LZMA_STREAM_END)
EXM_THROW(72, "zstd: %s: lzma_code encoding error %d", srcFileName, ret);
{ size_t const compBytes = ress->dstBufferSize - strm.avail_out;
if (compBytes) {
if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes)
EXM_THROW(73, "Write error : %s", strerror(errno));
outFileSize += compBytes;
strm.next_out = (BYTE*)ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
} }
if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
(unsigned)(inFileSize>>20),
(double)outFileSize/inFileSize*100)
else
DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
(unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
(double)outFileSize/inFileSize*100);
if (ret == LZMA_STREAM_END) break;
}
lzma_end(&strm);
*readsize = inFileSize;
return outFileSize;
}
#endif
#ifdef ZSTD_LZ4COMPRESS
#if LZ4_VERSION_NUMBER <= 10600
#define LZ4F_blockLinked blockLinked
#define LZ4F_max64KB max64KB
#endif
static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
static unsigned long long
FIO_compressLz4Frame(cRess_t* ress,
const char* srcFileName, U64 const srcFileSize,
int compressionLevel, int checksumFlag,
U64* readsize)
{
const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB);
unsigned long long inFileSize = 0, outFileSize = 0;
LZ4F_preferences_t prefs;
LZ4F_compressionContext_t ctx;
LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
if (LZ4F_isError(errorCode))
EXM_THROW(31, "zstd: failed to create lz4 compression context");
memset(&prefs, 0, sizeof(prefs));
assert(blockSize <= ress->srcBufferSize);
2017-04-26 10:17:38 -07:00
prefs.autoFlush = 1;
prefs.compressionLevel = compressionLevel;
prefs.frameInfo.blockMode = LZ4F_blockLinked;
prefs.frameInfo.blockSizeID = LZ4F_max64KB;
prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)checksumFlag;
#if LZ4_VERSION_NUMBER >= 10600
prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize;
#endif
assert(LZ4F_compressBound(blockSize, &prefs) <= ress->dstBufferSize);
{
size_t readSize;
size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs);
if (LZ4F_isError(headerSize))
EXM_THROW(33, "File header generation failed : %s",
LZ4F_getErrorName(headerSize));
if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize)
EXM_THROW(34, "Write error : %s (cannot write header)", strerror(errno));
outFileSize += headerSize;
/* Read first block */
readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
inFileSize += readSize;
/* Main Loop */
while (readSize>0) {
size_t const outSize = LZ4F_compressUpdate(ctx,
ress->dstBuffer, ress->dstBufferSize,
ress->srcBuffer, readSize, NULL);
if (LZ4F_isError(outSize))
EXM_THROW(35, "zstd: %s: lz4 compression failed : %s",
srcFileName, LZ4F_getErrorName(outSize));
outFileSize += outSize;
if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
(unsigned)(inFileSize>>20),
(double)outFileSize/inFileSize*100)
} else {
DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
(unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
(double)outFileSize/inFileSize*100);
}
/* Write Block */
{ size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile);
if (sizeCheck != outSize)
EXM_THROW(36, "Write error : %s", strerror(errno));
}
/* Read next block */
readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
inFileSize += readSize;
}
if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName);
/* End of Stream mark */
headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL);
if (LZ4F_isError(headerSize))
EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s",
srcFileName, LZ4F_getErrorName(headerSize));
{ size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile);
if (sizeCheck != headerSize)
EXM_THROW(39, "Write error : %s (cannot write end of stream)",
strerror(errno));
}
outFileSize += headerSize;
}
*readsize = inFileSize;
LZ4F_freeCompressionContext(ctx);
return outFileSize;
}
#endif
static unsigned long long
FIO_compressZstdFrame(FIO_prefs_t* const prefs,
const cRess_t* ressPtr,
const char* srcFileName, U64 fileSize,
int compressionLevel, U64* readsize)
{
cRess_t const ress = *ressPtr;
FILE* const srcFile = ress.srcFile;
FILE* const dstFile = ress.dstFile;
U64 compressedfilesize = 0;
ZSTD_EndDirective directive = ZSTD_e_continue;
2018-08-11 20:48:06 -07:00
/* stats */
ZSTD_frameProgression previous_zfp_update = { 0, 0, 0, 0, 0, 0 };
ZSTD_frameProgression previous_zfp_correction = { 0, 0, 0, 0, 0, 0 };
typedef enum { noChange, slower, faster } speedChange_e;
speedChange_e speedChange = noChange;
unsigned flushWaiting = 0;
2018-08-11 20:48:06 -07:00
unsigned inputPresented = 0;
unsigned inputBlocked = 0;
unsigned lastJobID = 0;
DISPLAYLEVEL(6, "compression using zstd format \n");
2017-02-08 07:54:23 -08:00
/* init */
if (fileSize != UTIL_FILESIZE_UNKNOWN) {
CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
}
(void)srcFileName;
/* Main compression loop */
do {
size_t stillToFlush;
/* Fill input Buffer */
size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 };
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize);
*readsize += inSize;
if ((inSize == 0) || (*readsize == fileSize))
directive = ZSTD_e_end;
stillToFlush = 1;
2018-08-09 15:16:31 -07:00
while ((inBuff.pos != inBuff.size) /* input buffer must be entirely ingested */
|| (directive == ZSTD_e_end && stillToFlush != 0) ) {
size_t const oldIPos = inBuff.pos;
ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 };
size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx);
CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive));
/* count stats */
2018-08-11 20:48:06 -07:00
inputPresented++;
if (oldIPos == inBuff.pos) inputBlocked++; /* input buffer is full and can't take any more : input speed is faster than consumption rate */
if (!toFlushNow) flushWaiting = 1;
/* Write compressed stream */
DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n",
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
(unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos);
if (outBuff.pos) {
size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile);
if (sizeCheck != outBuff.pos)
EXM_THROW(25, "Write error : %s (cannot write compressed block)",
strerror(errno));
compressedfilesize += outBuff.pos;
}
/* display notification; and adapt compression level */
if (READY_FOR_UPDATE()) {
ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
double const cShare = (double)zfp.produced / (zfp.consumed + !zfp.consumed/*avoid div0*/) * 100;
/* display progress notifications */
if (g_display_prefs.displayLevel >= 3) {
DISPLAYUPDATE(3, "\r(L%i) Buffered :%4u MB - Consumed :%4u MB - Compressed :%4u MB => %.2f%% ",
compressionLevel,
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
(unsigned)((zfp.ingested - zfp.consumed) >> 20),
(unsigned)(zfp.consumed >> 20),
(unsigned)(zfp.produced >> 20),
cShare );
} else { /* summarized notifications if == 2; */
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
DISPLAYLEVEL(2, "\rRead : %u ", (unsigned)(zfp.consumed >> 20));
if (fileSize != UTIL_FILESIZE_UNKNOWN)
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
DISPLAYLEVEL(2, "/ %u ", (unsigned)(fileSize >> 20));
DISPLAYLEVEL(2, "MB ==> %2.f%% ", cShare);
DELAY_NEXT_UPDATE();
}
/* adaptive mode : statistics measurement and speed correction */
if (prefs->adaptiveMode) {
/* check output speed */
if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */
unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced;
unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed;
assert(zfp.produced >= previous_zfp_update.produced);
assert(prefs->nbWorkers >= 1);
/* test if compression is blocked
* either because output is slow and all buffers are full
* or because input is slow and no job can start while waiting for at least one buffer to be filled.
* note : excluse starting part, since currentJobID > 1 */
if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/
&& (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */
) {
DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n")
speedChange = slower;
}
previous_zfp_update = zfp;
if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */
&& (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */
) {
DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed);
speedChange = slower;
}
flushWaiting = 0;
}
/* course correct only if there is at least one new job completed */
if (zfp.currentJobID > lastJobID) {
DISPLAYLEVEL(6, "compression level adaptation check \n")
/* check input speed */
if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */
if (inputBlocked <= 0) {
DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n");
speedChange = slower;
} else if (speedChange == noChange) {
unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested;
unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed;
unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced;
unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed;
previous_zfp_correction = zfp;
assert(inputPresented > 0);
DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n",
inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100,
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
(unsigned)newlyIngested, (unsigned)newlyConsumed,
(unsigned)newlyFlushed, (unsigned)newlyProduced);
if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */
&& (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */
&& (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */
) {
DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n",
newlyIngested, newlyConsumed, newlyProduced, newlyFlushed);
speedChange = faster;
}
}
inputBlocked = 0;
inputPresented = 0;
}
2018-08-11 20:48:06 -07:00
if (speedChange == slower) {
DISPLAYLEVEL(6, "slower speed , higher compression \n")
compressionLevel ++;
if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel();
if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel;
compressionLevel += (compressionLevel == 0); /* skip 0 */
ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
2018-08-11 20:48:06 -07:00
}
if (speedChange == faster) {
DISPLAYLEVEL(6, "faster speed , lighter compression \n")
2018-08-11 20:48:06 -07:00
compressionLevel --;
if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel;
2018-08-11 20:48:06 -07:00
compressionLevel -= (compressionLevel == 0); /* skip 0 */
ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
2018-08-11 20:48:06 -07:00
}
speedChange = noChange;
lastJobID = zfp.currentJobID;
} /* if (zfp.currentJobID > lastJobID) */
} /* if (g_adaptiveMode) */
} /* if (READY_FOR_UPDATE()) */
} /* while ((inBuff.pos != inBuff.size) */
} while (directive != ZSTD_e_end);
if (ferror(srcFile)) {
EXM_THROW(26, "Read error : I/O error");
}
if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) {
EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B",
(unsigned long long)*readsize, (unsigned long long)fileSize);
}
return compressedfilesize;
}
/*! FIO_compressFilename_internal() :
* same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
* @return : 0 : compression completed correctly,
* 1 : missing or pb opening srcFileName
*/
static int
FIO_compressFilename_internal(FIO_prefs_t* const prefs,
cRess_t ress,
const char* dstFileName, const char* srcFileName,
int compressionLevel)
{
U64 readsize = 0;
U64 compressedfilesize = 0;
U64 const fileSize = UTIL_getFileSize(srcFileName);
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
DISPLAYLEVEL(5, "%s: %u bytes \n", srcFileName, (unsigned)fileSize);
/* compression format selection */
switch (prefs->compressionType) {
default:
case FIO_zstdCompression:
compressedfilesize = FIO_compressZstdFrame(prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize);
break;
case FIO_gzipCompression:
#ifdef ZSTD_GZCOMPRESS
compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
#else
(void)compressionLevel;
EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n",
srcFileName);
#endif
break;
case FIO_xzCompression:
case FIO_lzmaCompression:
#ifdef ZSTD_LZMACOMPRESS
compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, prefs->compressionType==FIO_lzmaCompression);
#else
(void)compressionLevel;
EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n",
srcFileName);
#endif
break;
case FIO_lz4Compression:
#ifdef ZSTD_LZ4COMPRESS
compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, prefs->checksumFlag, &readsize);
#else
(void)compressionLevel;
EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n",
srcFileName);
#endif
break;
}
/* Status */
DISPLAYLEVEL(2, "\r%79s\r", "");
DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6llu => %6llu bytes, %s) \n",
srcFileName,
(double)compressedfilesize / (readsize+(!readsize)/*avoid div by zero*/) * 100,
2016-07-13 10:30:40 -07:00
(unsigned long long)readsize, (unsigned long long) compressedfilesize,
dstFileName);
return 0;
}
/*! FIO_compressFilename_dstFile() :
* open dstFileName, or pass-through if ress.dstFile != NULL,
* then start compression with FIO_compressFilename_internal().
* Manages source removal (--rm) and file permissions transfer.
* note : ress.srcFile must be != NULL,
* so reach this function through FIO_compressFilename_srcFile().
* @return : 0 : compression completed correctly,
* 1 : pb
*/
static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs,
cRess_t ress,
const char* dstFileName,
const char* srcFileName,
int compressionLevel)
{
int closeDstFile = 0;
int result;
stat_t statbuf;
int transfer_permissions = 0;
assert(ress.srcFile != NULL);
if (ress.dstFile == NULL) {
closeDstFile = 1;
DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s", dstFileName);
ress.dstFile = FIO_openDstFile(prefs, srcFileName, dstFileName);
if (ress.dstFile==NULL) return 1; /* could not open dstFileName */
/* Must only be added after FIO_openDstFile() succeeds.
* Otherwise we may delete the destination file if it already exists,
* and the user presses Ctrl-C when asked if they wish to overwrite.
*/
addHandler(dstFileName);
if ( strcmp (srcFileName, stdinmark)
&& UTIL_getFileStat(srcFileName, &statbuf))
transfer_permissions = 1;
}
result = FIO_compressFilename_internal(prefs, ress, dstFileName, srcFileName, compressionLevel);
if (closeDstFile) {
FILE* const dstFile = ress.dstFile;
ress.dstFile = NULL;
clearHandler();
if (fclose(dstFile)) { /* error closing dstFile */
DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
result=1;
}
if ( (result != 0) /* operation failure */
&& strcmp(dstFileName, nulmark) /* special case : don't remove() /dev/null */
&& strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
) {
FIO_remove(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */
} else if ( strcmp(dstFileName, stdoutmark)
&& strcmp(dstFileName, nulmark)
&& transfer_permissions) {
UTIL_setFileStat(dstFileName, &statbuf);
}
}
return result;
}
2016-07-01 16:05:31 -07:00
/*! FIO_compressFilename_srcFile() :
* @return : 0 : compression completed correctly,
* 1 : missing or pb opening srcFileName
*/
static int
FIO_compressFilename_srcFile(FIO_prefs_t* const prefs,
cRess_t ress,
const char* dstFileName,
const char* srcFileName,
int compressionLevel)
{
int result;
/* File check */
if (UTIL_isDirectory(srcFileName)) {
DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
return 1;
}
ress.srcFile = FIO_openSrcFile(srcFileName);
if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */
result = FIO_compressFilename_dstFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
fclose(ress.srcFile);
ress.srcFile = NULL;
if ( prefs->removeSrcFile /* --rm */
&& result == 0 /* success */
&& strcmp(srcFileName, stdinmark) /* exception : don't erase stdin */
) {
/* We must clear the handler, since after this point calling it would
* delete both the source and destination files.
*/
clearHandler();
if (FIO_remove(srcFileName))
EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno));
}
return result;
}
int FIO_compressFilename(FIO_prefs_t* const prefs,
const char* dstFileName, const char* srcFileName,
const char* dictFileName, int compressionLevel,
ZSTD_compressionParameters comprParams)
2015-01-23 16:58:16 -08:00
{
clock_t const start = clock();
U64 const fileSize = UTIL_getFileSize(srcFileName);
U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize;
2015-01-23 16:58:16 -08:00
cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams);
int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
2015-01-23 16:58:16 -08:00
2016-07-01 16:05:31 -07:00
double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC;
DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds);
FIO_freeCResources(ress);
return result;
2015-01-23 16:58:16 -08:00
}
/* FIO_determineCompressedName() :
* create a destination filename for compressed srcFileName.
* @return a pointer to it.
* This function never returns an error (it may abort() in case of pb)
*/
static const char*
FIO_determineCompressedName(const char* srcFileName, const char* suffix)
{
static size_t dfnbCapacity = 0;
static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
size_t const sfnSize = strlen(srcFileName);
size_t const suffixSize = strlen(suffix);
if (dfnbCapacity <= sfnSize+suffixSize+1) {
/* resize buffer for dstName */
free(dstFileNameBuffer);
dfnbCapacity = sfnSize + suffixSize + 30;
dstFileNameBuffer = (char*)malloc(dfnbCapacity);
if (!dstFileNameBuffer) {
EXM_THROW(30, "zstd: %s", strerror(errno));
} }
assert(dstFileNameBuffer != NULL);
memcpy(dstFileNameBuffer, srcFileName, sfnSize);
memcpy(dstFileNameBuffer+sfnSize, suffix, suffixSize+1 /* Include terminating null */);
return dstFileNameBuffer;
}
/* FIO_compressMultipleFilenames() :
* compress nbFiles files
* into one destination (outFileName)
* or into one file each (outFileName == NULL, but suffix != NULL).
*/
int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
const char** inFileNamesTable, unsigned nbFiles,
2017-12-12 18:32:50 -08:00
const char* outFileName, const char* suffix,
const char* dictFileName, int compressionLevel,
ZSTD_compressionParameters comprParams)
{
int error = 0;
U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]);
U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize;
U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : firstSrcSize ;
cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams);
/* init */
assert(outFileName != NULL || suffix != NULL);
if (outFileName != NULL) { /* output into a single destination (stdout typically) */
ress.dstFile = FIO_openDstFile(prefs, NULL, outFileName);
if (ress.dstFile == NULL) { /* could not open outFileName */
error = 1;
} else {
unsigned u;
for (u=0; u<nbFiles; u++)
error |= FIO_compressFilename_srcFile(prefs, ress, outFileName, inFileNamesTable[u], compressionLevel);
if (fclose(ress.dstFile))
EXM_THROW(29, "Write error (%s) : cannot properly close %s",
strerror(errno), outFileName);
ress.dstFile = NULL;
}
} else {
unsigned u;
for (u=0; u<nbFiles; u++) {
const char* const srcFileName = inFileNamesTable[u];
const char* const dstFileName = FIO_determineCompressedName(srcFileName, suffix); /* cannot fail */
error |= FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
} }
FIO_freeCResources(ress);
return error;
}
#endif /* #ifndef ZSTD_NOCOMPRESS */
#ifndef ZSTD_NODECOMPRESS
/* **************************************************************************
* Decompression
***************************************************************************/
2015-12-17 11:30:14 -08:00
typedef struct {
void* srcBuffer;
size_t srcBufferSize;
size_t srcBufferLoaded;
2015-12-17 11:30:14 -08:00
void* dstBuffer;
size_t dstBufferSize;
2016-08-13 14:45:45 -07:00
ZSTD_DStream* dctx;
FILE* dstFile;
2015-12-17 11:30:14 -08:00
} dRess_t;
static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName)
2015-12-17 11:30:14 -08:00
{
dRess_t ress;
2016-07-13 08:38:39 -07:00
memset(&ress, 0, sizeof(ress));
2015-12-17 11:30:14 -08:00
2016-07-13 08:38:39 -07:00
/* Allocation */
2016-08-13 14:45:45 -07:00
ress.dctx = ZSTD_createDStream();
if (ress.dctx==NULL)
EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno));
CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) );
2016-08-13 14:45:45 -07:00
ress.srcBufferSize = ZSTD_DStreamInSize();
2015-12-17 11:30:14 -08:00
ress.srcBuffer = malloc(ress.srcBufferSize);
2016-08-13 14:45:45 -07:00
ress.dstBufferSize = ZSTD_DStreamOutSize();
2015-12-17 11:30:14 -08:00
ress.dstBuffer = malloc(ress.dstBufferSize);
if (!ress.srcBuffer || !ress.dstBuffer)
EXM_THROW(61, "Allocation error : not enough memory");
2015-12-17 11:30:14 -08:00
/* dictionary */
2016-09-14 08:26:59 -07:00
{ void* dictBuffer;
size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName);
CHECK( ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize) );
2016-09-14 08:26:59 -07:00
free(dictBuffer);
}
2015-12-17 11:30:14 -08:00
return ress;
}
static void FIO_freeDResources(dRess_t ress)
{
CHECK( ZSTD_freeDStream(ress.dctx) );
2015-12-17 11:30:14 -08:00
free(ress.srcBuffer);
free(ress.dstBuffer);
}
/** FIO_fwriteSparse() :
* @return : storedSkips, to be provided to next call to FIO_fwriteSparse() of LZ4IO_fwriteSparseEnd() */
static unsigned FIO_fwriteSparse(FIO_prefs_t* const prefs, FILE* file, const void* buffer, size_t bufferSize, unsigned storedSkips)
{
const size_t* const bufferT = (const size_t*)buffer; /* Buffer is supposed malloc'ed, hence aligned on size_t */
size_t bufferSizeT = bufferSize / sizeof(size_t);
const size_t* const bufferTEnd = bufferT + bufferSizeT;
const size_t* ptrT = bufferT;
static const size_t segmentSizeT = (32 KB) / sizeof(size_t); /* 0-test re-attempted every 32 KB */
if (!prefs->sparseFileSupport) { /* normal write */
size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file);
if (sizeCheck != bufferSize)
EXM_THROW(70, "Write error : %s (cannot write decoded block)",
strerror(errno));
return 0;
}
/* avoid int overflow */
if (storedSkips > 1 GB) {
int const seekResult = LONG_SEEK(file, 1 GB, SEEK_CUR);
if (seekResult != 0)
EXM_THROW(71, "1 GB skip error (sparse file support)");
storedSkips -= 1 GB;
}
while (ptrT < bufferTEnd) {
size_t seg0SizeT = segmentSizeT;
size_t nb0T;
/* count leading zeros */
if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT;
bufferSizeT -= seg0SizeT;
for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ;
storedSkips += (unsigned)(nb0T * sizeof(size_t));
if (nb0T != seg0SizeT) { /* not all 0s */
int const seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR);
if (seekResult) EXM_THROW(72, "Sparse skip error ; try --no-sparse");
storedSkips = 0;
seg0SizeT -= nb0T;
ptrT += nb0T;
{ size_t const sizeCheck = fwrite(ptrT, sizeof(size_t), seg0SizeT, file);
if (sizeCheck != seg0SizeT)
EXM_THROW(73, "Write error : cannot write decoded block");
} }
ptrT += seg0SizeT;
}
{ static size_t const maskT = sizeof(size_t)-1;
if (bufferSize & maskT) {
/* size not multiple of sizeof(size_t) : implies end of block */
const char* const restStart = (const char*)bufferTEnd;
const char* restPtr = restStart;
size_t restSize = bufferSize & maskT;
const char* const restEnd = restStart + restSize;
for ( ; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ;
storedSkips += (unsigned) (restPtr - restStart);
if (restPtr != restEnd) {
int seekResult = LONG_SEEK(file, storedSkips, SEEK_CUR);
if (seekResult)
EXM_THROW(74, "Sparse skip error ; try --no-sparse");
storedSkips = 0;
{ size_t const sizeCheck = fwrite(restPtr, 1, (size_t)(restEnd - restPtr), file);
if (sizeCheck != (size_t)(restEnd - restPtr))
EXM_THROW(75, "Write error : cannot write decoded end of block");
} } } }
return storedSkips;
}
static void FIO_fwriteSparseEnd(FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips)
{
if (storedSkips>0) {
assert(prefs->sparseFileSupport > 0); /* storedSkips>0 implies sparse support is enabled */
if (LONG_SEEK(file, storedSkips-1, SEEK_CUR) != 0)
EXM_THROW(69, "Final skip error (sparse file support)");
/* last zero must be explicitly written,
* so that skipped ones get implicitly translated as zero by FS */
{ const char lastZeroByte[1] = { 0 };
if (fwrite(lastZeroByte, 1, 1, file) != 1)
EXM_THROW(69, "Write error : cannot write last zero");
} }
}
2016-12-05 08:39:38 -08:00
/** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode
@return : 0 (no error) */
static int FIO_passThrough(FIO_prefs_t* const prefs,
FILE* foutput, FILE* finput,
void* buffer, size_t bufferSize,
size_t alreadyLoaded)
2016-12-05 08:39:38 -08:00
{
size_t const blockSize = MIN(64 KB, bufferSize);
size_t readFromInput = 1;
unsigned storedSkips = 0;
2016-12-05 09:31:14 -08:00
/* assumption : ress->srcBufferLoaded bytes already loaded and stored within buffer */
{ size_t const sizeCheck = fwrite(buffer, 1, alreadyLoaded, foutput);
if (sizeCheck != alreadyLoaded) {
DISPLAYLEVEL(1, "Pass-through write error \n");
return 1;
} }
2016-12-05 08:39:38 -08:00
while (readFromInput) {
readFromInput = fread(buffer, 1, blockSize, finput);
storedSkips = FIO_fwriteSparse(prefs, foutput, buffer, readFromInput, storedSkips);
2016-12-05 08:39:38 -08:00
}
FIO_fwriteSparseEnd(prefs, foutput, storedSkips);
2016-12-05 08:39:38 -08:00
return 0;
}
2018-02-01 16:13:04 -08:00
/* FIO_highbit64() :
* gives position of highest bit.
* note : only works for v > 0 !
*/
static unsigned FIO_highbit64(unsigned long long v)
{
unsigned count = 0;
assert(v != 0);
v >>= 1;
while (v) { v >>= 1; count++; }
return count;
}
/* FIO_zstdErrorHelp() :
* detailed error message when requested window size is too large */
static void FIO_zstdErrorHelp(FIO_prefs_t* const prefs, dRess_t* ress, size_t err, char const* srcFileName)
{
ZSTD_frameHeader header;
2018-02-01 16:13:04 -08:00
/* Help message only for one specific error */
if (ZSTD_getErrorCode(err) != ZSTD_error_frameParameter_windowTooLarge)
return;
2018-02-01 16:13:04 -08:00
/* Try to decode the frame header */
2018-02-01 16:13:04 -08:00
err = ZSTD_getFrameHeader(&header, ress->srcBuffer, ress->srcBufferLoaded);
if (err == 0) {
2018-02-01 20:16:00 -08:00
unsigned long long const windowSize = header.windowSize;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0);
assert(prefs->memLimit > 0);
DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u\n",
srcFileName, windowSize, prefs->memLimit);
if (windowLog <= ZSTD_WINDOWLOG_MAX) {
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
unsigned const windowMB = (unsigned)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0));
assert(windowSize < (U64)(1ULL << 52)); /* ensure now overflow for windowMB */
DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB\n",
srcFileName, windowLog, windowMB);
return;
}
}
2018-02-01 16:13:04 -08:00
DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u; not supported\n",
srcFileName, ZSTD_WINDOWLOG_MAX);
}
2016-12-05 08:39:38 -08:00
/** FIO_decompressFrame() :
* @return : size of decoded zstd frame, or an error code
*/
#define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2))
static unsigned long long FIO_decompressZstdFrame(
FIO_prefs_t* const prefs,
dRess_t* ress,
2016-12-05 08:39:38 -08:00
FILE* finput,
const char* srcFileName,
U64 alreadyDecoded)
2015-01-23 16:58:16 -08:00
{
2016-08-28 12:47:17 -07:00
U64 frameSize = 0;
U32 storedSkips = 0;
2015-01-23 16:58:16 -08:00
size_t const srcFileLength = strlen(srcFileName);
if (srcFileLength>20) srcFileName += srcFileLength-20; /* display last 20 characters only */
2016-12-05 08:39:38 -08:00
ZSTD_resetDStream(ress->dctx);
/* Header loading : ensures ZSTD_getFrameHeader() will succeed */
{ size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX;
if (ress->srcBufferLoaded < toDecode) {
size_t const toRead = toDecode - ress->srcBufferLoaded;
void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded;
ress->srcBufferLoaded += fread(startPosition, 1, toRead, finput);
} }
/* Main decompression Loop */
2016-02-02 05:36:49 -08:00
while (1) {
2016-12-05 08:39:38 -08:00
ZSTD_inBuffer inBuff = { ress->srcBuffer, ress->srcBufferLoaded, 0 };
ZSTD_outBuffer outBuff= { ress->dstBuffer, ress->dstBufferSize, 0 };
size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff);
if (ZSTD_isError(readSizeHint)) {
DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n",
srcFileName, ZSTD_getErrorName(readSizeHint));
FIO_zstdErrorHelp(prefs, ress, readSizeHint, srcFileName);
return FIO_ERROR_FRAME_DECODING;
}
2015-11-25 05:42:45 -08:00
/* Write block */
storedSkips = FIO_fwriteSparse(prefs, ress->dstFile, ress->dstBuffer, outBuff.pos, storedSkips);
2016-08-16 16:39:22 -07:00
frameSize += outBuff.pos;
DISPLAYUPDATE(2, "\r%-20.20s : %u MB... ",
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
srcFileName, (unsigned)((alreadyDecoded+frameSize)>>20) );
2015-11-25 05:42:45 -08:00
2016-12-05 08:39:38 -08:00
if (inBuff.pos > 0) {
memmove(ress->srcBuffer, (char*)ress->srcBuffer + inBuff.pos, inBuff.size - inBuff.pos);
ress->srcBufferLoaded -= inBuff.pos;
}
2016-09-07 05:54:23 -07:00
if (readSizeHint == 0) break; /* end of frame */
if (inBuff.size != inBuff.pos) {
DISPLAYLEVEL(1, "%s : Decoding error (37) : should consume entire input \n",
srcFileName);
return FIO_ERROR_FRAME_DECODING;
}
2015-01-23 16:58:16 -08:00
/* Fill input buffer */
{ size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize); /* support large skippable frames */
if (ress->srcBufferLoaded < toDecode) {
size_t const toRead = toDecode - ress->srcBufferLoaded; /* > 0 */
void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded;
size_t const readSize = fread(startPosition, 1, toRead, finput);
if (readSize==0) {
DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n",
srcFileName);
return FIO_ERROR_FRAME_DECODING;
}
ress->srcBufferLoaded += readSize;
} } }
2015-01-23 16:58:16 -08:00
FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
2015-11-25 05:42:45 -08:00
return frameSize;
2015-09-10 15:26:09 -07:00
}
2016-12-01 02:56:31 -08:00
#ifdef ZSTD_GZDECOMPRESS
static unsigned long long FIO_decompressGzFrame(dRess_t* ress,
FILE* srcFile, const char* srcFileName)
2016-12-02 04:50:29 -08:00
{
2016-12-02 12:40:57 -08:00
unsigned long long outFileSize = 0;
2016-12-02 06:01:31 -08:00
z_stream strm;
int flush = Z_NO_FLUSH;
int decodingError = 0;
2016-12-02 12:40:57 -08:00
2016-12-02 06:01:31 -08:00
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.next_in = 0;
2017-02-27 04:21:05 -08:00
strm.avail_in = 0;
/* see http://www.zlib.net/manual.html */
if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK)
return FIO_ERROR_FRAME_DECODING;
2016-12-02 12:40:57 -08:00
strm.next_out = (Bytef*)ress->dstBuffer;
strm.avail_out = (uInt)ress->dstBufferSize;
strm.avail_in = (uInt)ress->srcBufferLoaded;
2016-12-05 08:39:38 -08:00
strm.next_in = (z_const unsigned char*)ress->srcBuffer;
2016-12-01 02:56:31 -08:00
2016-12-02 06:01:31 -08:00
for ( ; ; ) {
int ret;
2016-12-05 08:39:38 -08:00
if (strm.avail_in == 0) {
ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
if (ress->srcBufferLoaded == 0) flush = Z_FINISH;
2016-12-05 08:39:38 -08:00
strm.next_in = (z_const unsigned char*)ress->srcBuffer;
strm.avail_in = (uInt)ress->srcBufferLoaded;
2016-12-02 04:11:39 -08:00
}
ret = inflate(&strm, flush);
if (ret == Z_BUF_ERROR) {
DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName);
decodingError = 1; break;
}
if (ret != Z_OK && ret != Z_STREAM_END) {
DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret);
decodingError = 1; break;
}
2016-12-05 08:39:38 -08:00
{ size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
2016-12-02 12:40:57 -08:00
if (decompBytes) {
if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) {
DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno));
decodingError = 1; break;
}
2016-12-02 12:40:57 -08:00
outFileSize += decompBytes;
strm.next_out = (Bytef*)ress->dstBuffer;
strm.avail_out = (uInt)ress->dstBufferSize;
2016-12-05 08:39:38 -08:00
}
}
if (ret == Z_STREAM_END) break;
}
2016-12-01 02:56:31 -08:00
2017-07-03 15:14:55 -07:00
if (strm.avail_in > 0)
memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
2016-12-05 08:39:38 -08:00
ress->srcBufferLoaded = strm.avail_in;
if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */
&& (decodingError==0) ) {
DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName);
decodingError = 1;
}
return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
}
#endif
#ifdef ZSTD_LZMADECOMPRESS
static unsigned long long FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, const char* srcFileName, int plain_lzma)
{
unsigned long long outFileSize = 0;
lzma_stream strm = LZMA_STREAM_INIT;
lzma_action action = LZMA_RUN;
lzma_ret initRet;
int decodingError = 0;
strm.next_in = 0;
strm.avail_in = 0;
if (plain_lzma) {
initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */
} else {
initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */
}
if (initRet != LZMA_OK) {
DISPLAYLEVEL(1, "zstd: %s: %s error %d \n",
plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder",
srcFileName, initRet);
return FIO_ERROR_FRAME_DECODING;
}
strm.next_out = (BYTE*)ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
strm.next_in = (BYTE const*)ress->srcBuffer;
strm.avail_in = ress->srcBufferLoaded;
for ( ; ; ) {
lzma_ret ret;
if (strm.avail_in == 0) {
ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
if (ress->srcBufferLoaded == 0) action = LZMA_FINISH;
strm.next_in = (BYTE const*)ress->srcBuffer;
strm.avail_in = ress->srcBufferLoaded;
}
ret = lzma_code(&strm, action);
if (ret == LZMA_BUF_ERROR) {
DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName);
decodingError = 1; break;
}
if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n",
srcFileName, ret);
decodingError = 1; break;
}
{ size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
if (decompBytes) {
if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) {
DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno));
decodingError = 1; break;
}
outFileSize += decompBytes;
strm.next_out = (BYTE*)ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
} }
if (ret == LZMA_STREAM_END) break;
}
2017-07-03 15:14:55 -07:00
if (strm.avail_in > 0)
memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
ress->srcBufferLoaded = strm.avail_in;
lzma_end(&strm);
return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
2016-12-01 02:56:31 -08:00
}
#endif
#ifdef ZSTD_LZ4DECOMPRESS
static unsigned long long FIO_decompressLz4Frame(dRess_t* ress,
FILE* srcFile, const char* srcFileName)
{
unsigned long long filesize = 0;
LZ4F_errorCode_t nextToLoad;
LZ4F_decompressionContext_t dCtx;
LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
int decodingError = 0;
if (LZ4F_isError(errorCode)) {
DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n");
return FIO_ERROR_FRAME_DECODING;
}
/* Init feed with magic number (already consumed from FILE* sFile) */
{ size_t inSize = 4;
size_t outSize= 0;
MEM_writeLE32(ress->srcBuffer, LZ4_MAGICNUMBER);
nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &outSize, ress->srcBuffer, &inSize, NULL);
if (LZ4F_isError(nextToLoad)) {
DISPLAYLEVEL(1, "zstd: %s: lz4 header error : %s \n",
srcFileName, LZ4F_getErrorName(nextToLoad));
LZ4F_freeDecompressionContext(dCtx);
return FIO_ERROR_FRAME_DECODING;
} }
/* Main Loop */
for (;nextToLoad;) {
size_t readSize;
size_t pos = 0;
size_t decodedBytes = ress->dstBufferSize;
/* Read input */
if (nextToLoad > ress->srcBufferSize) nextToLoad = ress->srcBufferSize;
readSize = fread(ress->srcBuffer, 1, nextToLoad, srcFile);
if (!readSize) break; /* reached end of file or stream */
while ((pos < readSize) || (decodedBytes == ress->dstBufferSize)) { /* still to read, or still to flush */
/* Decode Input (at least partially) */
size_t remaining = readSize - pos;
decodedBytes = ress->dstBufferSize;
nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL);
if (LZ4F_isError(nextToLoad)) {
DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n",
srcFileName, LZ4F_getErrorName(nextToLoad));
decodingError = 1; nextToLoad = 0; break;
}
pos += remaining;
/* Write Block */
if (decodedBytes) {
if (fwrite(ress->dstBuffer, 1, decodedBytes, ress->dstFile) != decodedBytes) {
2017-08-14 05:03:46 -07:00
DISPLAYLEVEL(1, "zstd: %s \n", strerror(errno));
decodingError = 1; nextToLoad = 0; break;
}
filesize += decodedBytes;
DISPLAYUPDATE(2, "\rDecompressed : %u MB ", (unsigned)(filesize>>20));
}
if (!nextToLoad) break;
}
}
/* can be out because readSize == 0, which could be an fread() error */
if (ferror(srcFile)) {
DISPLAYLEVEL(1, "zstd: %s: read error \n", srcFileName);
decodingError=1;
}
if (nextToLoad!=0) {
DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName);
decodingError=1;
}
LZ4F_freeDecompressionContext(dCtx);
2017-07-03 15:14:55 -07:00
ress->srcBufferLoaded = 0; /* LZ4F will reach exact frame boundary */
return decodingError ? FIO_ERROR_FRAME_DECODING : filesize;
}
#endif
2016-12-01 02:56:31 -08:00
/** FIO_decompressFrames() :
* Find and decode frames inside srcFile
* srcFile presumed opened and valid
* @return : 0 : OK
* 1 : error
*/
static int FIO_decompressFrames(FIO_prefs_t* const prefs, dRess_t ress, FILE* srcFile,
const char* dstFileName, const char* srcFileName)
2015-10-18 14:18:32 -07:00
{
2016-07-26 07:44:09 -07:00
unsigned readSomething = 0;
2016-12-02 12:40:57 -08:00
unsigned long long filesize = 0;
assert(srcFile != NULL);
2016-11-30 06:05:54 -08:00
2016-12-02 06:01:31 -08:00
/* for each frame */
for ( ; ; ) {
/* check magic number -> version */
size_t const toRead = 4;
const BYTE* const buf = (const BYTE*)ress.srcBuffer;
2017-07-03 15:14:55 -07:00
if (ress.srcBufferLoaded < toRead) /* load up to 4 bytes for header */
ress.srcBufferLoaded += fread((char*)ress.srcBuffer + ress.srcBufferLoaded,
(size_t)1, toRead - ress.srcBufferLoaded, srcFile);
2016-12-05 08:39:38 -08:00
if (ress.srcBufferLoaded==0) {
2017-07-03 15:14:55 -07:00
if (readSomething==0) { /* srcFile is empty (which is invalid) */
DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName);
return 1;
2017-07-03 15:14:55 -07:00
} /* else, just reached frame boundary */
break; /* no more input */
}
readSomething = 1; /* there is at least 1 byte in srcFile */
if (ress.srcBufferLoaded < toRead) {
DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName);
return 1;
}
2017-07-03 15:14:55 -07:00
if (ZSTD_isFrame(buf, ress.srcBufferLoaded)) {
unsigned long long const frameSize = FIO_decompressZstdFrame(prefs, &ress, srcFile, srcFileName, filesize);
2017-07-03 15:14:55 -07:00
if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
filesize += frameSize;
} else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */
2016-12-02 06:01:31 -08:00
#ifdef ZSTD_GZDECOMPRESS
2017-07-03 13:47:46 -07:00
unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFile, srcFileName);
if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
filesize += frameSize;
2016-11-30 06:05:54 -08:00
#else
DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName);
return 1;
#endif
} else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */
|| (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */
#ifdef ZSTD_LZMADECOMPRESS
unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFile, srcFileName, buf[0] != 0xFD);
if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
filesize += frameSize;
#else
DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName);
return 1;
#endif
} else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) {
#ifdef ZSTD_LZ4DECOMPRESS
unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFile, srcFileName);
if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
filesize += frameSize;
#else
DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName);
return 1;
2016-11-30 06:05:54 -08:00
#endif
} else if ((prefs->overwrite) && !strcmp (dstFileName, stdoutmark)) { /* pass-through mode */
return FIO_passThrough(prefs,
ress.dstFile, srcFile,
ress.srcBuffer, ress.srcBufferSize,
ress.srcBufferLoaded);
} else {
DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName);
return 1;
} } /* for each frame */
2015-10-18 14:18:32 -07:00
2015-12-17 11:30:14 -08:00
/* Final Status */
2015-10-18 14:18:32 -07:00
DISPLAYLEVEL(2, "\r%79s\r", "");
2016-08-28 12:47:17 -07:00
DISPLAYLEVEL(2, "%-20s: %llu bytes \n", srcFileName, filesize);
2015-10-18 14:18:32 -07:00
return 0;
}
/** FIO_decompressDstFile() :
open `dstFileName`,
or path-through if ress.dstFile is already != 0,
then start decompression process (FIO_decompressFrames()).
@return : 0 : OK
1 : operation aborted
*/
static int FIO_decompressDstFile(FIO_prefs_t* const prefs,
dRess_t ress, FILE* srcFile,
const char* dstFileName, const char* srcFileName)
{
int result;
stat_t statbuf;
int transfer_permissions = 0;
int releaseDstFile = 0;
if (ress.dstFile == NULL) {
releaseDstFile = 1;
ress.dstFile = FIO_openDstFile(prefs, srcFileName, dstFileName);
if (ress.dstFile==0) return 1;
/* Must only be added after FIO_openDstFile() succeeds.
* Otherwise we may delete the destination file if it already exists,
* and the user presses Ctrl-C when asked if they wish to overwrite.
*/
addHandler(dstFileName);
if ( strcmp(srcFileName, stdinmark) /* special case : don't transfer permissions from stdin */
&& UTIL_getFileStat(srcFileName, &statbuf) )
transfer_permissions = 1;
}
result = FIO_decompressFrames(prefs, ress, srcFile, dstFileName, srcFileName);
if (releaseDstFile) {
FILE* const dstFile = ress.dstFile;
clearHandler();
ress.dstFile = NULL;
if (fclose(dstFile)) {
DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
result = 1;
}
if ( (result != 0) /* operation failure */
&& strcmp(dstFileName, nulmark) /* special case : don't remove() /dev/null (#316) */
&& strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
) {
FIO_remove(dstFileName); /* remove decompression artefact; note: don't do anything special if remove() fails */
} else { /* operation success */
if ( strcmp(dstFileName, stdoutmark) /* special case : don't chmod stdout */
&& strcmp(dstFileName, nulmark) /* special case : don't chmod /dev/null */
&& transfer_permissions ) /* file permissions correctly extracted from src */
UTIL_setFileStat(dstFileName, &statbuf); /* transfer file permissions from src into dst */
}
}
return result;
}
/** FIO_decompressSrcFile() :
Open `srcFileName`, transfer control to decompressDstFile()
@return : 0 : OK
1 : error
*/
static int FIO_decompressSrcFile(FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName)
{
FILE* srcFile;
int result;
if (UTIL_isDirectory(srcFileName)) {
DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
return 1;
}
srcFile = FIO_openSrcFile(srcFileName);
if (srcFile==NULL) return 1;
2018-09-24 00:52:19 -07:00
ress.srcBufferLoaded = 0;
result = FIO_decompressDstFile(prefs, ress, srcFile, dstFileName, srcFileName);
2016-12-02 04:50:29 -08:00
/* Close file */
if (fclose(srcFile)) {
DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); /* error should not happen */
return 1;
}
if ( prefs->removeSrcFile /* --rm */
&& (result==0) /* decompression successful */
&& strcmp(srcFileName, stdinmark) ) /* not stdin */ {
/* We must clear the handler, since after this point calling it would
* delete both the source and destination files.
*/
clearHandler();
if (FIO_remove(srcFileName)) {
/* failed to remove src file */
DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
return 1;
} }
return result;
}
2015-10-18 14:18:32 -07:00
int FIO_decompressFilename(FIO_prefs_t* const prefs,
const char* dstFileName, const char* srcFileName,
2015-12-17 11:30:14 -08:00
const char* dictFileName)
{
dRess_t const ress = FIO_createDResources(prefs, dictFileName);
2015-12-17 11:30:14 -08:00
int const decodingError = FIO_decompressSrcFile(prefs, ress, dstFileName, srcFileName);
2015-12-17 11:30:14 -08:00
FIO_freeDResources(ress);
return decodingError;
2015-12-17 11:30:14 -08:00
}
/* FIO_determineDstName() :
* create a destination filename from a srcFileName.
* @return a pointer to it.
* @return == NULL if there is an error */
static const char*
FIO_determineDstName(const char* srcFileName)
{
static size_t dfnbCapacity = 0;
static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
size_t const sfnSize = strlen(srcFileName);
size_t suffixSize;
const char* const suffixPtr = strrchr(srcFileName, '.');
if (suffixPtr == NULL) {
DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n",
srcFileName);
return NULL;
}
suffixSize = strlen(suffixPtr);
/* check suffix is authorized */
if (sfnSize <= suffixSize
|| ( strcmp(suffixPtr, ZSTD_EXTENSION)
#ifdef ZSTD_GZDECOMPRESS
&& strcmp(suffixPtr, GZ_EXTENSION)
#endif
#ifdef ZSTD_LZMADECOMPRESS
&& strcmp(suffixPtr, XZ_EXTENSION)
&& strcmp(suffixPtr, LZMA_EXTENSION)
#endif
#ifdef ZSTD_LZ4DECOMPRESS
&& strcmp(suffixPtr, LZ4_EXTENSION)
#endif
) ) {
const char* suffixlist = ZSTD_EXTENSION
#ifdef ZSTD_GZDECOMPRESS
"/" GZ_EXTENSION
#endif
#ifdef ZSTD_LZMADECOMPRESS
"/" XZ_EXTENSION "/" LZMA_EXTENSION
#endif
#ifdef ZSTD_LZ4DECOMPRESS
"/" LZ4_EXTENSION
#endif
;
DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s expected) -- ignored \n",
srcFileName, suffixlist);
return NULL;
}
/* allocate enough space to write dstFilename into it */
if (dfnbCapacity+suffixSize <= sfnSize+1) {
free(dstFileNameBuffer);
dfnbCapacity = sfnSize + 20;
dstFileNameBuffer = (char*)malloc(dfnbCapacity);
if (dstFileNameBuffer==NULL)
EXM_THROW(74, "%s : not enough memory for dstFileName", strerror(errno));
}
/* return dst name == src name truncated from suffix */
assert(dstFileNameBuffer != NULL);
memcpy(dstFileNameBuffer, srcFileName, sfnSize - suffixSize);
dstFileNameBuffer[sfnSize-suffixSize] = '\0';
return dstFileNameBuffer;
/* note : dstFileNameBuffer memory is not going to be free */
}
int
FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs,
const char* srcNamesTable[], unsigned nbFiles,
const char* outFileName,
const char* dictFileName)
2015-12-17 11:30:14 -08:00
{
int error = 0;
dRess_t ress = FIO_createDResources(prefs, dictFileName);
2015-12-17 11:30:14 -08:00
2017-12-12 18:32:50 -08:00
if (outFileName) {
unsigned u;
ress.dstFile = FIO_openDstFile(prefs, NULL, outFileName);
2017-12-12 18:32:50 -08:00
if (ress.dstFile == 0) EXM_THROW(71, "cannot open %s", outFileName);
for (u=0; u<nbFiles; u++)
error |= FIO_decompressSrcFile(prefs, ress, outFileName, srcNamesTable[u]);
if (fclose(ress.dstFile))
EXM_THROW(72, "Write error : %s : cannot properly close output file",
strerror(errno));
} else {
unsigned u;
for (u=0; u<nbFiles; u++) { /* create dstFileName */
const char* const srcFileName = srcNamesTable[u];
const char* const dstFileName = FIO_determineDstName(srcFileName);
if (dstFileName == NULL) { error=1; continue; }
error |= FIO_decompressSrcFile(prefs, ress, dstFileName, srcFileName);
}
}
2015-12-17 11:30:14 -08:00
FIO_freeDResources(ress);
return error;
2015-12-17 11:30:14 -08:00
}
/* **************************************************************************
* .zst file info (--list command)
***************************************************************************/
typedef struct {
U64 decompressedSize;
U64 compressedSize;
U64 windowSize;
int numActualFrames;
int numSkippableFrames;
int decompUnavailable;
int usesCheck;
U32 nbFiles;
} fileInfo_t;
typedef enum {
info_success=0,
info_frame_error=1,
info_not_zstd=2,
info_file_error=3,
info_truncated_input=4,
} InfoError;
#define ERROR_IF(c,n,...) { \
if (c) { \
DISPLAYLEVEL(1, __VA_ARGS__); \
DISPLAYLEVEL(1, " \n"); \
return n; \
} \
}
static InfoError
FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile)
{
/* begin analyzing frame */
for ( ; ; ) {
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile);
if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN) {
if ( feof(srcFile)
&& (numBytesRead == 0)
&& (info->compressedSize > 0)
&& (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) {
unsigned long long file_position = (unsigned long long) LONG_TELL(srcFile);
unsigned long long file_size = (unsigned long long) info->compressedSize;
ERROR_IF(file_position != file_size, info_truncated_input,
"Error: seeked to position %llu, which is beyond file size of %llu\n",
file_position,
file_size);
break; /* correct end of file => success */
}
ERROR_IF(feof(srcFile), info_not_zstd, "Error: reached end of file with incomplete frame");
ERROR_IF(1, info_frame_error, "Error: did not reach end of file but ran out of frames");
}
{ U32 const magicNumber = MEM_readLE32(headerBuffer);
/* Zstandard frame */
if (magicNumber == ZSTD_MAGICNUMBER) {
ZSTD_frameHeader header;
U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead);
if ( frameContentSize == ZSTD_CONTENTSIZE_ERROR
|| frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN ) {
info->decompUnavailable = 1;
} else {
info->decompressedSize += frameContentSize;
}
ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0,
info_frame_error, "Error: could not decode frame header");
info->windowSize = header.windowSize;
/* move to the end of the frame header */
{ size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead);
ERROR_IF(ZSTD_isError(headerSize), info_frame_error, "Error: could not determine frame header size");
ERROR_IF(fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR) != 0,
info_frame_error, "Error: could not move to end of frame header");
}
/* skip all blocks in the frame */
{ int lastBlock = 0;
do {
BYTE blockHeaderBuffer[3];
ERROR_IF(fread(blockHeaderBuffer, 1, 3, srcFile) != 3,
info_frame_error, "Error while reading block header");
{ U32 const blockHeader = MEM_readLE24(blockHeaderBuffer);
U32 const blockTypeID = (blockHeader >> 1) & 3;
U32 const isRLE = (blockTypeID == 1);
U32 const isWrongBlock = (blockTypeID == 3);
long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3);
ERROR_IF(isWrongBlock, info_frame_error, "Error: unsupported block type");
lastBlock = blockHeader & 1;
ERROR_IF(fseek(srcFile, blockSize, SEEK_CUR) != 0,
info_frame_error, "Error: could not skip to end of block");
}
} while (lastBlock != 1);
}
/* check if checksum is used */
{ BYTE const frameHeaderDescriptor = headerBuffer[4];
int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2;
if (contentChecksumFlag) {
info->usesCheck = 1;
ERROR_IF(fseek(srcFile, 4, SEEK_CUR) != 0,
info_frame_error, "Error: could not skip past checksum");
} }
info->numActualFrames++;
}
/* Skippable frame */
else if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
U32 const frameSize = MEM_readLE32(headerBuffer + 4);
long const seek = (long)(8 + frameSize - numBytesRead);
ERROR_IF(LONG_SEEK(srcFile, seek, SEEK_CUR) != 0,
info_frame_error, "Error: could not find end of skippable frame");
info->numSkippableFrames++;
}
/* unknown content */
else {
return info_not_zstd;
}
} /* magic number analysis */
} /* end analyzing frames */
return info_success;
}
static InfoError
getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName)
{
InfoError status;
FILE* const srcFile = FIO_openSrcFile(inFileName);
ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName);
info->compressedSize = UTIL_getFileSize(inFileName);
status = FIO_analyzeFrames(info, srcFile);
fclose(srcFile);
info->nbFiles = 1;
return status;
}
/** getFileInfo() :
* Reads information from file, stores in *info
* @return : InfoError status
*/
static InfoError
getFileInfo(fileInfo_t* info, const char* srcFileName)
{
ERROR_IF(!UTIL_isRegularFile(srcFileName),
info_file_error, "Error : %s is not a file", srcFileName);
return getFileInfo_fileConfirmed(info, srcFileName);
}
static void
displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel)
{
unsigned const unit = info->compressedSize < (1 MB) ? (1 KB) : (1 MB);
const char* const unitStr = info->compressedSize < (1 MB) ? "KB" : "MB";
double const windowSizeUnit = (double)info->windowSize / unit;
double const compressedSizeUnit = (double)info->compressedSize / unit;
double const decompressedSizeUnit = (double)info->decompressedSize / unit;
double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/info->compressedSize;
const char* const checkString = (info->usesCheck ? "XXH64" : "None");
if (displayLevel <= 2) {
if (!info->decompUnavailable) {
DISPLAYOUT("%6d %5d %7.2f %2s %9.2f %2s %5.3f %5s %s\n",
info->numSkippableFrames + info->numActualFrames,
info->numSkippableFrames,
compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr,
ratio, checkString, inFileName);
} else {
DISPLAYOUT("%6d %5d %7.2f %2s %5s %s\n",
info->numSkippableFrames + info->numActualFrames,
info->numSkippableFrames,
compressedSizeUnit, unitStr,
checkString, inFileName);
}
} else {
DISPLAYOUT("%s \n", inFileName);
DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames);
if (info->numSkippableFrames)
DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames);
DISPLAYOUT("Window Size: %.2f %2s (%llu B)\n",
windowSizeUnit, unitStr,
(unsigned long long)info->windowSize);
DISPLAYOUT("Compressed Size: %.2f %2s (%llu B)\n",
compressedSizeUnit, unitStr,
(unsigned long long)info->compressedSize);
if (!info->decompUnavailable) {
DISPLAYOUT("Decompressed Size: %.2f %2s (%llu B)\n",
decompressedSizeUnit, unitStr,
(unsigned long long)info->decompressedSize);
DISPLAYOUT("Ratio: %.4f\n", ratio);
}
DISPLAYOUT("Check: %s\n", checkString);
DISPLAYOUT("\n");
}
}
static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2)
{
fileInfo_t total;
memset(&total, 0, sizeof(total));
total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames;
total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames;
total.compressedSize = fi1.compressedSize + fi2.compressedSize;
total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize;
total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable;
total.usesCheck = fi1.usesCheck & fi2.usesCheck;
total.nbFiles = fi1.nbFiles + fi2.nbFiles;
return total;
}
static int
FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel)
{
fileInfo_t info;
memset(&info, 0, sizeof(info));
{ InfoError const error = getFileInfo(&info, inFileName);
switch (error) {
case info_frame_error:
/* display error, but provide output */
DISPLAYLEVEL(1, "Error while parsing \"%s\" \n", inFileName);
break;
case info_not_zstd:
DISPLAYOUT("File \"%s\" not compressed by zstd \n", inFileName);
if (displayLevel > 2) DISPLAYOUT("\n");
return 1;
case info_file_error:
/* error occurred while opening the file */
if (displayLevel > 2) DISPLAYOUT("\n");
return 1;
case info_truncated_input:
DISPLAYOUT("File \"%s\" is truncated \n", inFileName);
if (displayLevel > 2) DISPLAYOUT("\n");
return 1;
case info_success:
default:
break;
}
displayInfo(inFileName, &info, displayLevel);
*total = FIO_addFInfo(*total, info);
2018-12-20 14:46:23 -08:00
assert(error == info_success || error == info_frame_error);
return error;
}
}
int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel)
{
/* ensure no specified input is stdin (needs fseek() capability) */
{ unsigned u;
for (u=0; u<numFiles;u++) {
ERROR_IF(!strcmp (filenameTable[u], stdinmark),
1, "zstd: --list does not support reading from standard input");
} }
if (numFiles == 0) {
if (!IS_CONSOLE(stdin)) {
DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n");
}
DISPLAYLEVEL(1, "No files given \n");
return 1;
}
if (displayLevel <= 2) {
DISPLAYOUT("Frames Skips Compressed Uncompressed Ratio Check Filename\n");
}
{ int error = 0;
fileInfo_t total;
memset(&total, 0, sizeof(total));
total.usesCheck = 1;
/* --list each file, and check for any error */
{ unsigned u;
for (u=0; u<numFiles;u++) {
error |= FIO_listFile(&total, filenameTable[u], displayLevel);
} }
if (numFiles > 1 && displayLevel <= 2) { /* display total */
unsigned const unit = total.compressedSize < (1 MB) ? (1 KB) : (1 MB);
const char* const unitStr = total.compressedSize < (1 MB) ? "KB" : "MB";
double const compressedSizeUnit = (double)total.compressedSize / unit;
double const decompressedSizeUnit = (double)total.decompressedSize / unit;
double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/total.compressedSize;
const char* const checkString = (total.usesCheck ? "XXH64" : "");
DISPLAYOUT("----------------------------------------------------------------- \n");
if (total.decompUnavailable) {
DISPLAYOUT("%6d %5d %7.2f %2s %5s %u files\n",
total.numSkippableFrames + total.numActualFrames,
total.numSkippableFrames,
compressedSizeUnit, unitStr,
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
checkString, (unsigned)total.nbFiles);
} else {
DISPLAYOUT("%6d %5d %7.2f %2s %9.2f %2s %5.3f %5s %u files\n",
total.numSkippableFrames + total.numActualFrames,
total.numSkippableFrames,
compressedSizeUnit, unitStr, decompressedSizeUnit, unitStr,
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
ratio, checkString, (unsigned)total.nbFiles);
} }
return error;
}
}
#endif /* #ifndef ZSTD_NODECOMPRESS */