cleaning up code for analyzing frames

This commit is contained in:
Paul Cruz 2017-06-15 14:13:28 -07:00
parent e208992529
commit a9b77c83e5
3 changed files with 90 additions and 87 deletions

View File

@ -204,7 +204,7 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size)
/** ZSTD_frameHeaderSize() : /** ZSTD_frameHeaderSize() :
* srcSize must be >= ZSTD_frameHeaderSize_prefix. * srcSize must be >= ZSTD_frameHeaderSize_prefix.
* @return : size of the Frame Header */ * @return : size of the Frame Header */
static size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
{ {
if (srcSize < ZSTD_frameHeaderSize_prefix) return ERROR(srcSize_wrong); if (srcSize < ZSTD_frameHeaderSize_prefix) return ERROR(srcSize_wrong);
{ BYTE const fhd = ((const BYTE*)src)[4]; { BYTE const fhd = ((const BYTE*)src)[4];

View File

@ -461,6 +461,11 @@ ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t
* however it does mean that all frame data must be present and valid. */ * however it does mean that all frame data must be present and valid. */
ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
/*! ZSTD_frameHeaderSize() :
* `src` should point to the start of a ZSTD frame
* `srcSize` must be >= ZSTD_frameHeaderSize_prefix.
* @return : size of the Frame Header */
size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
/*************************************** /***************************************
* Context memory usage * Context memory usage

View File

@ -872,17 +872,6 @@ typedef struct {
int usesCheck; int usesCheck;
} fileInfo_t; } fileInfo_t;
static int calcFrameHeaderSize(BYTE frameHeaderDescriptor){
const int frameContentSizeFlag = frameHeaderDescriptor >> 6;
const int singleSegmentFlag = (frameHeaderDescriptor & (1 << 5)) >> 5;
const int dictionaryIDFlag = frameHeaderDescriptor & 3;
const int windowDescriptorBytes = singleSegmentFlag ? 0 : 1;
const int frameContentSizeBytes = (frameContentSizeFlag != 0) ? (1 << frameContentSizeFlag) : (singleSegmentFlag ? 1 : 0);
const int dictionaryIDBytes = dictionaryIDFlag ? 1 << (dictionaryIDFlag - 1): 0;
return 4 + 1 + windowDescriptorBytes + frameContentSizeBytes + dictionaryIDBytes;
}
/* /*
* Reads information from file, stores in *info * Reads information from file, stores in *info
* if successful, returns 0, otherwise returns 1 * if successful, returns 0, otherwise returns 1
@ -900,78 +889,79 @@ static int getFileInfo(fileInfo_t* info, const char* inFileName){
info->canComputeDecompSize = 1; info->canComputeDecompSize = 1;
/* begin analyzing frame */ /* begin analyzing frame */
for( ; ; ){ for( ; ; ){
BYTE magicNumberBuffer[4]; BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
size_t numBytesRead = fread(magicNumberBuffer, 1, 4, srcFile); size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile);
U32 magicNumber; if (numBytesRead < ZSTD_frameHeaderSize_min) {
if (numBytesRead != 4) break; if(feof(srcFile)){
magicNumber = MEM_readLE32(magicNumberBuffer); DISPLAY("ran out of files\n");
if (magicNumber==ZSTD_MAGICNUMBER) { break;
BYTE frameHeaderDescriptor; }
int totalFrameHeaderBytes; else{
BYTE* frameHeader; DISPLAY("Error: did not reach end of file but ran out of frames\n");
int lastBlock = 0;
size_t readBytes = fread(&frameHeaderDescriptor, 1, 1, srcFile);
info->numActualFrames++;
if (readBytes != 1) {
DISPLAY("Error: could not read frame header descriptor\n");
fclose(srcFile); fclose(srcFile);
return 1; return 1;
} }
/* calculate actual frame header size */ }
totalFrameHeaderBytes = calcFrameHeaderSize(frameHeaderDescriptor); U32 const magicNumber = MEM_readLE32(headerBuffer);
if (magicNumber == ZSTD_MAGICNUMBER) {
U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead);
if (frameContentSize == ZSTD_CONTENTSIZE_ERROR || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN) {
info->canComputeDecompSize = 0;
DISPLAY("could not compute decompressed size\n");
}
else {
info->decompressedSize += frameContentSize;
}
size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead);
if (ZSTD_isError(headerSize)) {
fclose(srcFile);
DISPLAY("Error: could not determine frame header size\n");
return 1;
}
/* reset to beginning of from and read entire header */
{ {
int returnVal = fseek(srcFile, -5, SEEK_CUR); /* go back to the beginning of the frame */
if (returnVal!=0) { int const ret = fseek(srcFile, -numBytesRead, SEEK_CUR);
DISPLAY("Error: could not reset to the beginning of the frame header\n"); if (ret != 0) {
DISPLAY("Error: could not rewind to beginning of frame\n");
fclose(srcFile); fclose(srcFile);
return 1; return 1;
} }
} }
frameHeader = (BYTE*)malloc(totalFrameHeaderBytes);
if (frameHeader==NULL) {
DISPLAY("Error: could not allocate space for frameHeader\n");
fclose(srcFile);
return 1;
}
readBytes = fread(frameHeader, 1, totalFrameHeaderBytes, srcFile);
if (readBytes != (size_t)totalFrameHeaderBytes) {
DISPLAY("Error: could not read frame header\n");
fclose(srcFile);
free(frameHeader);
return 1;
}
/* get decompressed file size */
{ {
U64 additional = ZSTD_getFrameContentSize(frameHeader, totalFrameHeaderBytes); /* skip frame header */
if (additional!=ZSTD_CONTENTSIZE_UNKNOWN && additional!=ZSTD_CONTENTSIZE_ERROR) { int const ret = fseek(srcFile, headerSize, SEEK_CUR);
info->decompressedSize += additional; if (ret != 0) {
} DISPLAY("Error: could not skip header\n");
else{ fclose(srcFile);
info->canComputeDecompSize = 0; return 1;
} }
} }
/* skip the rest of the blocks in the frame */ /* skip the rest of the blocks in the frame */
do{ {
BYTE blockHeaderBuffer[3]; int lastBlock = 0;
U32 blockHeader; int readBytes = 0;
int blockSize; do{
readBytes = fread(blockHeaderBuffer, 1, 3, srcFile); BYTE blockHeaderBuffer[3];
if (readBytes != 3) { U32 blockHeader;
DISPLAY("There was a problem reading the block header\n"); int blockSize;
exit(1); readBytes = fread(blockHeaderBuffer, 1, 3, srcFile);
} if (readBytes != 3) {
blockHeader = MEM_readLE24(blockHeaderBuffer); DISPLAY("There was a problem reading the block header\n");
lastBlock = blockHeader & 1; fclose(srcFile);
blockSize = (blockHeader - (blockHeader & 7)) >> 3; return 1;
fseek(srcFile, blockSize, SEEK_CUR); }
}while (lastBlock != 1); blockHeader = MEM_readLE24(blockHeaderBuffer);
lastBlock = blockHeader & 1;
blockSize = (blockHeader - (blockHeader & 7)) >> 3;
fseek(srcFile, blockSize, SEEK_CUR);
}while (lastBlock != 1);
}
{ {
/* check if checksum is used */ /* check if checksum is used */
BYTE frameHeaderDescriptor = headerBuffer[4];
int contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2; int contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2;
if (contentChecksumFlag) { if (contentChecksumFlag) {
info->usesCheck = 1; info->usesCheck = 1;
@ -980,7 +970,7 @@ static int getFileInfo(fileInfo_t* info, const char* inFileName){
fseek(srcFile, 4, SEEK_CUR); fseek(srcFile, 4, SEEK_CUR);
} }
} }
free(frameHeader); info->numActualFrames++;
} }
else if (magicNumber == ZSTD_MAGIC_SKIPPABLE_START) { else if (magicNumber == ZSTD_MAGIC_SKIPPABLE_START) {
BYTE frameSizeBuffer[4]; BYTE frameSizeBuffer[4];
@ -992,7 +982,16 @@ static int getFileInfo(fileInfo_t* info, const char* inFileName){
exit(1); exit(1);
} }
frameSize = MEM_readLE32(frameSizeBuffer); frameSize = MEM_readLE32(frameSizeBuffer);
{
int const ret = fseek(srcFile, frameSize, SEEK_CUR);
if (ret != 0) {
DISPLAY("Error: could not find end of skippable frame\n");
fclose(srcFile);
return 1;
}
}
fseek(srcFile, frameSize, SEEK_CUR); fseek(srcFile, frameSize, SEEK_CUR);
info->numSkippableFrames++;
} }
} }
fclose(srcFile); fclose(srcFile);
@ -1004,51 +1003,50 @@ void displayInfo(const char* inFileName, fileInfo_t* info, int displayLevel){
if(displayLevel<=2){ if(displayLevel<=2){
if(info->usesCheck && info->canComputeDecompSize){ if(info->usesCheck && info->canComputeDecompSize){
DISPLAY("Skippable Non-Skippable Compressed Uncompressed Ratio Check Filename\n"); DISPLAYOUT("Skippable Non-Skippable Compressed Uncompressed Ratio Check Filename\n");
DISPLAY("%9d %13d %7.2f MB %7.2f MB %5.3f XXH64 %s\n", DISPLAYOUT("%9d %13d %7.2f MB %7.2f MB %5.3f XXH64 %s\n",
info->numSkippableFrames, info->numActualFrames, compressedSizeMB, decompressedSizeMB, info->numSkippableFrames, info->numActualFrames, compressedSizeMB, decompressedSizeMB,
compressedSizeMB/decompressedSizeMB, inFileName); compressedSizeMB/decompressedSizeMB, inFileName);
} }
else if(!info->usesCheck){ else if(!info->usesCheck){
DISPLAY("Skippable Non-Skippable Compressed Uncompressed Ratio Check Filename\n"); DISPLAYOUT("Skippable Non-Skippable Compressed Uncompressed Ratio Check Filename\n");
DISPLAY("%9d %13d %7.2f MB %7.2f MB %5.3f %s\n", DISPLAYOUT("%9d %13d %7.2f MB %7.2f MB %5.3f %s\n",
info->numSkippableFrames, info->numActualFrames, compressedSizeMB, decompressedSizeMB, info->numSkippableFrames, info->numActualFrames, compressedSizeMB, decompressedSizeMB,
compressedSizeMB/decompressedSizeMB, inFileName); compressedSizeMB/decompressedSizeMB, inFileName);
} }
else if(!info->canComputeDecompSize){ else if(!info->canComputeDecompSize){
DISPLAY("Skippable Non-Skippable Compressed Uncompressed Ratio Check Filename\n"); DISPLAYOUT("Skippable Non-Skippable Compressed Uncompressed Ratio Check Filename\n");
DISPLAY("%9d %13d %7.2f MB XXH64 %s\n", DISPLAYOUT("%9d %13d %7.2f MB XXH64 %s\n",
info->numSkippableFrames, info->numActualFrames, compressedSizeMB, inFileName); info->numSkippableFrames, info->numActualFrames, compressedSizeMB, inFileName);
} }
else{ else{
DISPLAY("Skippable Non-Skippable Filename\n"); DISPLAYOUT("Skippable Non-Skippable Filename\n");
DISPLAY("%9d %13d %7.2f MB %s\n", DISPLAYOUT("%9d %13d %7.2f MB %s\n",
info->numSkippableFrames, info->numActualFrames, compressedSizeMB, inFileName); info->numSkippableFrames, info->numActualFrames, compressedSizeMB, inFileName);
} }
} }
else{ else{
DISPLAY("# Zstandard Frames: %d\n", info->numActualFrames); DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames);
DISPLAY("# Skippable Frames: %d\n", info->numSkippableFrames); DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames);
DISPLAY("Compressed Size: %.2f MB (%llu B)\n", compressedSizeMB, info->compressedSize); DISPLAYOUT("Compressed Size: %.2f MB (%llu B)\n", compressedSizeMB, info->compressedSize);
if(info->canComputeDecompSize){ if (info->canComputeDecompSize) {
DISPLAY("Decompressed Size: %.2f MB (%llu B)\n", decompressedSizeMB, info->decompressedSize); DISPLAYOUT("Decompressed Size: %.2f MB (%llu B)\n", decompressedSizeMB, info->decompressedSize);
DISPLAY("Ratio: %.4f\n", compressedSizeMB/decompressedSizeMB); DISPLAYOUT("Ratio: %.4f\n", compressedSizeMB/decompressedSizeMB);
} }
if(info->usesCheck){ if (info->usesCheck) {
DISPLAY("Check: XXH64\n"); DISPLAYOUT("Check: XXH64\n");
} }
} }
} }
int FIO_listFile(const char* inFileName, int displayLevel){ int FIO_listFile(const char* inFileName, int displayLevel){
const char* const suffixPtr = strrchr(inFileName, '.'); const char* const suffixPtr = strrchr(inFileName, '.');
DISPLAY("File: %s\n", inFileName); DISPLAY("File: %s\n", inFileName);
if(!suffixPtr || strcmp(suffixPtr, ZSTD_EXTENSION)){ if (!suffixPtr || strcmp(suffixPtr, ZSTD_EXTENSION)) {
DISPLAYLEVEL(1, "file %s was not compressed with zstd -- ignoring\n\n", inFileName); DISPLAYLEVEL(1, "file %s was not compressed with zstd -- ignoring\n\n", inFileName);
return 1; return 1;
} }
else{ else {
fileInfo_t* info = (fileInfo_t*)malloc(sizeof(fileInfo_t)); fileInfo_t* info = (fileInfo_t*)malloc(sizeof(fileInfo_t));
int error = getFileInfo(info, inFileName); int error = getFileInfo(info, inFileName);
if(error==1){ if(error==1){