Merge pull request #895 from facebook/fileSize_unknown

Distinguish 0-size from size-unavailable
dev
Yann Collet 2017-10-18 16:50:17 -07:00 committed by GitHub
commit 9d377c6e88
8 changed files with 97 additions and 45 deletions

View File

@ -551,6 +551,11 @@ static void BMK_loadFiles(void* buffer, size_t bufferSize,
fileSizes[n] = 0;
continue;
}
if (fileSize == UTIL_FILESIZE_UNKNOWN) {
DISPLAYLEVEL(2, "Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]);
fileSizes[n] = 0;
continue;
}
f = fopen(fileNamesTable[n], "rb");
if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
DISPLAYUPDATE(2, "Loading %s... \r", fileNamesTable[n]);
@ -581,11 +586,14 @@ static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, co
/* Load dictionary */
if (dictFileName != NULL) {
U64 dictFileSize = UTIL_getFileSize(dictFileName);
if (dictFileSize > 64 MB) EXM_THROW(10, "dictionary file %s too large", dictFileName);
U64 const dictFileSize = UTIL_getFileSize(dictFileName);
if (dictFileSize > 64 MB)
EXM_THROW(10, "dictionary file %s too large", dictFileName);
dictBufferSize = (size_t)dictFileSize;
dictBuffer = malloc(dictBufferSize);
if (dictBuffer==NULL) EXM_THROW(11, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize);
if (dictBuffer==NULL)
EXM_THROW(11, "not enough memory for dictionary (%u bytes)",
(U32)dictBufferSize);
BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1);
}

View File

@ -117,7 +117,7 @@ static unsigned DiB_loadFiles(void* buffer, size_t* bufferSizePtr,
for (fileIndex=0; fileIndex<nbFiles; fileIndex++) {
const char* const fileName = fileNamesTable[fileIndex];
unsigned long long const fs64 = UTIL_getFileSize(fileName);
unsigned long long remainingToLoad = fs64;
unsigned long long remainingToLoad = (fs64 == UTIL_FILESIZE_UNKNOWN) ? 0 : fs64;
U32 const nbChunks = targetChunkSize ? (U32)((fs64 + (targetChunkSize-1)) / targetChunkSize) : 1;
U64 const chunkSize = targetChunkSize ? MIN(targetChunkSize, fs64) : fs64;
size_t const maxChunkSize = (size_t)MIN(chunkSize, SAMPLESIZE_MAX);
@ -245,8 +245,9 @@ static fileStats DiB_fileStats(const char** fileNamesTable, unsigned nbFiles, si
memset(&fs, 0, sizeof(fs));
for (n=0; n<nbFiles; n++) {
U64 const fileSize = UTIL_getFileSize(fileNamesTable[n]);
U32 const nbSamples = (U32)(chunkSize ? (fileSize + (chunkSize-1)) / chunkSize : 1);
U64 const chunkToLoad = chunkSize ? MIN(chunkSize, fileSize) : fileSize;
U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? 0 : fileSize;
U32 const nbSamples = (U32)(chunkSize ? (srcSize + (chunkSize-1)) / chunkSize : 1);
U64 const chunkToLoad = chunkSize ? MIN(chunkSize, srcSize) : srcSize;
size_t const cappedChunkSize = (size_t)MIN(chunkToLoad, SAMPLESIZE_MAX);
fs.totalSizeToLoad += cappedChunkSize * nbSamples;
fs.oneSampleTooLarge |= (chunkSize > 2*SAMPLESIZE_MAX);

View File

@ -25,7 +25,7 @@
* Includes
***************************************/
#include "platform.h" /* Large Files support, SET_BINARY_MODE */
#include "util.h" /* UTIL_getFileSize */
#include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile */
#include <stdio.h> /* fprintf, fopen, fread, _fileno, stdin, stdout */
#include <stdlib.h> /* malloc, free */
#include <string.h> /* strcmp, strlen */
@ -564,7 +564,7 @@ static unsigned long long FIO_compressGzFrame(cRess_t* ress,
strm.avail_out = (uInt)ress->dstBufferSize;
}
}
if (!srcFileSize)
if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
(U32)(inFileSize>>20),
(double)outFileSize/inFileSize*100)
@ -651,7 +651,7 @@ static unsigned long long FIO_compressLzmaFrame(cRess_t* ress,
strm.next_out = (BYTE*)ress->dstBuffer;
strm.avail_out = ress->dstBufferSize;
} }
if (!srcFileSize)
if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
(U32)(inFileSize>>20),
(double)outFileSize/inFileSize*100)
@ -697,18 +697,17 @@ static unsigned long long FIO_compressLz4Frame(cRess_t* ress,
prefs.frameInfo.blockSizeID = LZ4F_max4MB;
prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)g_checksumFlag;
#if LZ4_VERSION_NUMBER >= 10600
prefs.frameInfo.contentSize = srcFileSize;
prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize;
#endif
{
size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max4MB);
{ size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max4MB);
size_t readSize;
size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs);
if (LZ4F_isError(headerSize))
EXM_THROW(33, "File header generation failed : %s",
LZ4F_getErrorName(headerSize));
{ size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile);
if (sizeCheck!=headerSize) EXM_THROW(34, "Write error : cannot write header"); }
if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize)
EXM_THROW(34, "Write error : cannot write header");
outFileSize += headerSize;
/* Read first block */
@ -725,7 +724,7 @@ static unsigned long long FIO_compressLz4Frame(cRess_t* ress,
EXM_THROW(35, "zstd: %s: lz4 compression failed : %s",
srcFileName, LZ4F_getErrorName(outSize));
outFileSize += outSize;
if (!srcFileSize)
if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
(U32)(inFileSize>>20),
(double)outFileSize/inFileSize*100)
@ -816,12 +815,12 @@ static int FIO_compressFilename_internal(cRess_t ress,
/* init */
#ifdef ZSTD_NEWAPI
if (fileSize!=0) /* when src is stdin, fileSize==0, but is effectively unknown */
if (fileSize!=UTIL_FILESIZE_UNKNOWN) /* when src is stdin, fileSize==0, but is effectively unknown */
ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize);
#elif defined(ZSTD_MULTITHREAD)
CHECK( ZSTDMT_resetCStream(ress.cctx, fileSize ? fileSize : ZSTD_CONTENTSIZE_UNKNOWN) );
CHECK( ZSTDMT_resetCStream(ress.cctx, (fileSize==UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize) );
#else
CHECK( ZSTD_resetCStream(ress.cctx, fileSize ? fileSize : ZSTD_CONTENTSIZE_UNKNOWN) );
CHECK( ZSTD_resetCStream(ress.cctx, (fileSize==UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize) );
#endif
/* Main compression loop */
@ -851,18 +850,18 @@ static int FIO_compressFilename_internal(cRess_t ress,
compressedfilesize += outBuff.pos;
} }
if (g_nbThreads > 1) {
if (!fileSize)
if (fileSize == UTIL_FILESIZE_UNKNOWN)
DISPLAYUPDATE(2, "\rRead : %u MB", (U32)(readsize>>20))
else
DISPLAYUPDATE(2, "\rRead : %u / %u MB",
(U32)(readsize>>20), (U32)(fileSize>>20));
} else {
if (!fileSize)
if (fileSize == UTIL_FILESIZE_UNKNOWN)
DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
(U32)(readsize>>20),
(double)compressedfilesize/readsize*100)
else
DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
(U32)(readsize>>20), (U32)(fileSize>>20),
(double)compressedfilesize/readsize*100);
}
@ -985,7 +984,8 @@ int FIO_compressFilename(const char* dstFileName, const char* srcFileName,
const char* dictFileName, int compressionLevel, ZSTD_compressionParameters* comprParams)
{
clock_t const start = clock();
U64 const srcSize = UTIL_getFileSize(srcFileName);
U64 const fileSize = UTIL_getFileSize(srcFileName);
U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize;
cRess_t const ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams);
int const result = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel);
@ -1007,7 +1007,9 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile
size_t dfnSize = FNSPACE;
char* dstFileName = (char*)malloc(FNSPACE);
size_t const suffixSize = suffix ? strlen(suffix) : 0;
U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : UTIL_getFileSize(inFileNamesTable[0]) ;
U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]);
U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize;
U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : firstSrcSize ;
cRess_t ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, comprParams);
/* init */
@ -1799,7 +1801,7 @@ typedef struct {
* 2 for file not compressed with zstd
* 3 for cases in which file could not be opened.
*/
static int getFileInfo(fileInfo_t* info, const char* inFileName){
static int getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName){
int detectError = 0;
FILE* const srcFile = FIO_openSrcFile(inFileName);
if (srcFile == NULL) {
@ -1815,7 +1817,8 @@ static int getFileInfo(fileInfo_t* info, const char* inFileName){
if (numBytesRead < ZSTD_frameHeaderSize_min) {
if ( feof(srcFile)
&& (numBytesRead == 0)
&& (info->compressedSize > 0) ) {
&& (info->compressedSize > 0)
&& (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) {
break;
}
else if (feof(srcFile)) {
@ -1928,6 +1931,17 @@ static int getFileInfo(fileInfo_t* info, const char* inFileName){
return detectError;
}
static int getFileInfo(fileInfo_t* info, const char* srcFileName)
{
int const isAFile = UTIL_isRegularFile(srcFileName);
if (!isAFile) {
DISPLAY("Error : %s is not a file", srcFileName);
return 3;
}
return getFileInfo_fileConfirmed(info, srcFileName);
}
static void displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel){
unsigned const unit = info->compressedSize < (1 MB) ? (1 KB) : (1 MB);
const char* const unitStr = info->compressedSize < (1 MB) ? "KB" : "MB";

View File

@ -313,33 +313,40 @@ UTIL_STATIC U32 UTIL_isLink(const char* infilename)
}
#define UTIL_FILESIZE_UNKNOWN ((U64)(-1))
UTIL_STATIC U64 UTIL_getFileSize(const char* infilename)
{
int r;
if (!UTIL_isRegularFile(infilename)) return UTIL_FILESIZE_UNKNOWN;
{ int r;
#if defined(_MSC_VER)
struct __stat64 statbuf;
r = _stat64(infilename, &statbuf);
if (r || !(statbuf.st_mode & S_IFREG)) return 0; /* No good... */
struct __stat64 statbuf;
r = _stat64(infilename, &statbuf);
if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN;
#elif defined(__MINGW32__) && defined (__MSVCRT__)
struct _stati64 statbuf;
r = _stati64(infilename, &statbuf);
if (r || !(statbuf.st_mode & S_IFREG)) return 0; /* No good... */
struct _stati64 statbuf;
r = _stati64(infilename, &statbuf);
if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN;
#else
struct stat statbuf;
r = stat(infilename, &statbuf);
if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */
struct stat statbuf;
r = stat(infilename, &statbuf);
if (r || !S_ISREG(statbuf.st_mode)) return UTIL_FILESIZE_UNKNOWN;
#endif
return (U64)statbuf.st_size;
return (U64)statbuf.st_size;
}
}
UTIL_STATIC U64 UTIL_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles)
{
U64 total = 0;
int error = 0;
unsigned n;
for (n=0; n<nbFiles; n++)
total += UTIL_getFileSize(fileNamesTable[n]);
return total;
for (n=0; n<nbFiles; n++) {
U64 const size = UTIL_getFileSize(fileNamesTable[n]);
error |= (size == UTIL_FILESIZE_UNKNOWN);
total += size;
}
return error ? UTIL_FILESIZE_UNKNOWN : total;
}

View File

@ -484,8 +484,8 @@ static int benchFiles(const char** fileNamesTable, const int nbFiles, U32 benchN
/* Loop for each file */
int fileIdx;
for (fileIdx=0; fileIdx<nbFiles; fileIdx++) {
const char* inFileName = fileNamesTable[fileIdx];
FILE* inFile = fopen( inFileName, "rb" );
const char* const inFileName = fileNamesTable[fileIdx];
FILE* const inFile = fopen( inFileName, "rb" );
U64 inFileSize;
size_t benchedSize;
void* origBuff;
@ -495,6 +495,11 @@ static int benchFiles(const char** fileNamesTable, const int nbFiles, U32 benchN
/* Memory allocation & restrictions */
inFileSize = UTIL_getFileSize(inFileName);
if (inFileSize == UTIL_FILESIZE_UNKNOWN) {
DISPLAY( "Cannot measure size of %s\n", inFileName);
fclose(inFile);
return 11;
}
benchedSize = BMK_findMaxMem(inFileSize*3) / 3;
if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
if (benchedSize < inFileSize)

View File

@ -687,6 +687,11 @@ int benchFiles(const char** fileNamesTable, int nbFiles)
DISPLAY( "Pb opening %s\n", inFileName);
return 11;
}
if (inFileSize == UTIL_FILESIZE_UNKNOWN) {
DISPLAY("Pb evaluatin size of %s \n", inFileName);
fclose(inFile);
return 11;
}
/* Memory allocation */
benchedSize = BMK_findMaxMem(inFileSize*3) / 3;
@ -740,6 +745,11 @@ int optimizeForSize(const char* inFileName, U32 targetSpeed)
/* Init */
if (inFile==NULL) { DISPLAY( "Pb opening %s\n", inFileName); return 11; }
if (inFileSize == UTIL_FILESIZE_UNKNOWN) {
DISPLAY("Pb evaluatin size of %s \n", inFileName);
fclose(inFile);
return 11;
}
/* Memory allocation & restrictions */
if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;

View File

@ -621,7 +621,7 @@ $ECHO "\n===> zstd --list/-l test with null files "
$ZSTD tmp5
$ZSTD -l tmp5.zst
! $ZSTD -l tmp5*
$ZSTD -lv tmp5.zst
$ZSTD -lv tmp5.zst | grep "Decompressed Size: 0.00 KB (0 B)" # check that 0 size is present in header
! $ZSTD -lv tmp5*
$ECHO "\n===> zstd --list/-l test with no content size field "

View File

@ -684,6 +684,11 @@ static void BMK_loadFiles(void* buffer, size_t bufferSize,
fileSizes[n] = 0;
continue;
}
if (fileSize == UTIL_FILESIZE_UNKNOWN) {
DISPLAYLEVEL(2, "Cannot determine size of %s ... \n", fileNamesTable[n]);
fileSizes[n] = 0;
continue;
}
f = fopen(fileNamesTable[n], "rb");
if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
DISPLAYUPDATE(2, "Loading %s... \r", fileNamesTable[n]);
@ -714,11 +719,13 @@ static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles,
/* Load dictionary */
if (dictFileName != NULL) {
U64 dictFileSize = UTIL_getFileSize(dictFileName);
if (dictFileSize > 64 MB) EXM_THROW(10, "dictionary file %s too large", dictFileName);
U64 const dictFileSize = UTIL_getFileSize(dictFileName);
if (dictFileSize > 64 MB)
EXM_THROW(10, "dictionary file %s too large", dictFileName);
dictBufferSize = (size_t)dictFileSize;
dictBuffer = malloc(dictBufferSize);
if (dictBuffer==NULL) EXM_THROW(11, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize);
if (dictBuffer==NULL)
EXM_THROW(11, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize);
BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1);
}