s/chunk/frame/

dev
Sean Purcell 2017-04-12 11:06:00 -07:00
parent e80f1d74b3
commit 5ee1135f30
7 changed files with 142 additions and 142 deletions

View File

@ -59,7 +59,7 @@ static size_t fclose_orDie(FILE* file)
exit(6); exit(6);
} }
static void compressFile_orDie(const char* fname, const char* outName, int cLevel, unsigned chunkSize) static void compressFile_orDie(const char* fname, const char* outName, int cLevel, unsigned frameSize)
{ {
FILE* const fin = fopen_orDie(fname, "rb"); FILE* const fin = fopen_orDie(fname, "rb");
FILE* const fout = fopen_orDie(outName, "wb"); FILE* const fout = fopen_orDie(outName, "wb");
@ -70,7 +70,7 @@ static void compressFile_orDie(const char* fname, const char* outName, int cLeve
ZSTD_seekable_CStream* const cstream = ZSTD_seekable_createCStream(); ZSTD_seekable_CStream* const cstream = ZSTD_seekable_createCStream();
if (cstream==NULL) { fprintf(stderr, "ZSTD_seekable_createCStream() error \n"); exit(10); } if (cstream==NULL) { fprintf(stderr, "ZSTD_seekable_createCStream() error \n"); exit(10); }
size_t const initResult = ZSTD_seekable_initCStream(cstream, cLevel, 1, chunkSize); size_t const initResult = ZSTD_seekable_initCStream(cstream, cLevel, 1, frameSize);
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_initCStream() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); } if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_initCStream() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
size_t read, toRead = buffInSize; size_t read, toRead = buffInSize;
@ -116,15 +116,15 @@ int main(int argc, const char** argv) {
if (argc!=3) { if (argc!=3) {
printf("wrong arguments\n"); printf("wrong arguments\n");
printf("usage:\n"); printf("usage:\n");
printf("%s FILE CHUNK_SIZE\n", exeName); printf("%s FILE FRAME_SIZE\n", exeName);
return 1; return 1;
} }
{ const char* const inFileName = argv[1]; { const char* const inFileName = argv[1];
unsigned const chunkSize = (unsigned)atoi(argv[2]); unsigned const frameSize = (unsigned)atoi(argv[2]);
const char* const outFileName = createOutFilename_orDie(inFileName); const char* const outFileName = createOutFilename_orDie(inFileName);
compressFile_orDie(inFileName, outFileName, 5, chunkSize); compressFile_orDie(inFileName, outFileName, 5, frameSize);
} }
return 0; return 0;

View File

@ -9,17 +9,17 @@ static const unsigned ZSTD_seekTableFooterSize = 9;
#define ZSTD_SEEKABLE_MAGICNUMBER 0x8F92EAB1 #define ZSTD_SEEKABLE_MAGICNUMBER 0x8F92EAB1
#define ZSTD_SEEKABLE_MAXCHUNKS 0x8000000U #define ZSTD_SEEKABLE_MAXFRAMES 0x8000000U
/* 0xFE03F607 is the largest number x such that ZSTD_compressBound(x) fits in a 32-bit integer */ /* 0xFE03F607 is the largest number x such that ZSTD_compressBound(x) fits in a 32-bit integer */
#define ZSTD_SEEKABLE_MAX_CHUNK_DECOMPRESSED_SIZE 0xFE03F607 #define ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE 0xFE03F607
/*-**************************************************************************** /*-****************************************************************************
* Seekable Format * Seekable Format
* *
* The seekable format splits the compressed data into a series of "chunks", * The seekable format splits the compressed data into a series of "frames",
* each compressed individually so that decompression of a section in the * each compressed individually so that decompression of a section in the
* middle of an archive only requires zstd to decompress at most a chunk's * middle of an archive only requires zstd to decompress at most a frame's
* worth of extra data, instead of the entire archive. * worth of extra data, instead of the entire archive.
******************************************************************************/ ******************************************************************************/
@ -37,15 +37,15 @@ typedef struct ZSTD_seekable_DStream_s ZSTD_seekable_DStream;
* compressor. * compressor.
* *
* Data streamed to the seekable compressor will automatically be split into * Data streamed to the seekable compressor will automatically be split into
* chunks of size `maxChunkSize` (provided in ZSTD_seekable_initCStream()), * frames of size `maxFrameSize` (provided in ZSTD_seekable_initCStream()),
* or if none is provided, will be cut off whenver ZSTD_endChunk() is called * or if none is provided, will be cut off whenver ZSTD_endFrame() is called
* or when the default maximum chunk size is reached (approximately 4GB). * or when the default maximum frame size is reached (approximately 4GB).
* *
* Use ZSTD_seekable_initCStream() to initialize a ZSTD_seekable_CStream object * Use ZSTD_seekable_initCStream() to initialize a ZSTD_seekable_CStream object
* for a new compression operation. * for a new compression operation.
* `maxChunkSize` indicates the size at which to automatically start a new * `maxFrameSize` indicates the size at which to automatically start a new
* seekable frame. `maxChunkSize == 0` implies the default maximum size. * seekable frame. `maxFrameSize == 0` implies the default maximum size.
* `checksumFlag` indicates whether or not the seek table should include chunk * `checksumFlag` indicates whether or not the seek table should include frame
* checksums on the uncompressed data for verification. * checksums on the uncompressed data for verification.
* @return : a size hint for input to provide for compression, or an error code * @return : a size hint for input to provide for compression, or an error code
* checkable with ZSTD_isError() * checkable with ZSTD_isError()
@ -61,14 +61,14 @@ typedef struct ZSTD_seekable_DStream_s ZSTD_seekable_DStream;
* value will work fine. * value will work fine.
* Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize() * Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize()
* *
* At any time, call ZSTD_seekable_endChunk() to end the current chunk and * At any time, call ZSTD_seekable_endFrame() to end the current frame and
* start a new one. * start a new one.
* *
* ZSTD_endStream() will end the current chunk, and then write the seek table * ZSTD_seekable_endStream() will end the current frame, and then write the seek
* so that decompressors can efficiently find compressed chunks. * table so that decompressors can efficiently find compressed frames.
* ZSTD_endStream() may return a number > 0 if it was unable to flush all the * ZSTD_seekable_endStream() may return a number > 0 if it was unable to flush
* necessary data to `output`. In this case, it should be called again until * all the necessary data to `output`. In this case, it should be called again
* all remaining data is flushed out and 0 is returned. * until all remaining data is flushed out and 0 is returned.
******************************************************************************/ ******************************************************************************/
/*===== Seekable compressor management =====*/ /*===== Seekable compressor management =====*/
@ -76,9 +76,9 @@ ZSTDLIB_API ZSTD_seekable_CStream* ZSTD_seekable_createCStream(void);
ZSTDLIB_API size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs); ZSTDLIB_API size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs);
/*===== Seekable compression functions =====*/ /*===== Seekable compression functions =====*/
ZSTDLIB_API size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs, int compressionLevel, int checksumFlag, unsigned maxChunkSize); ZSTDLIB_API size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs, int compressionLevel, int checksumFlag, unsigned maxFrameSize);
ZSTDLIB_API size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); ZSTDLIB_API size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
ZSTDLIB_API size_t ZSTD_seekable_endChunk(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output); ZSTDLIB_API size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output);
ZSTDLIB_API size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output); ZSTDLIB_API size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output);
/*-**************************************************************************** /*-****************************************************************************

View File

@ -18,7 +18,7 @@ Distribution of this document is unlimited.
## Introduction ## Introduction
This document defines a format for compressed data to be stored so that subranges of the data can be efficiently decompressed without requiring the entire document to be decompressed. This document defines a format for compressed data to be stored so that subranges of the data can be efficiently decompressed without requiring the entire document to be decompressed.
This is done by splitting up the input data into chunks, This is done by splitting up the input data into frames,
each of which are compressed independently, each of which are compressed independently,
and so can be decompressed independently. and so can be decompressed independently.
Decompression then takes advantage of a provided 'seek table', which allows the decompressor to immediately jump to the desired data. This is done in a way that is compatible with the original Zstandard format by placing the seek table in a Zstandard skippable frame. Decompression then takes advantage of a provided 'seek table', which allows the decompressor to immediately jump to the desired data. This is done in a way that is compatible with the original Zstandard format by placing the seek table in a Zstandard skippable frame.
@ -31,7 +31,7 @@ In this document:
## Format ## Format
The format consists of a number of chunks (Zstandard compressed frames and skippable frames), followed by a final skippable frame at the end containing the seek table. The format consists of a number of frames (Zstandard compressed frames and skippable frames), followed by a final skippable frame at the end containing the seek table.
### Seek Table Format ### Seek Table Format
The structure of the seek table frame is as follows: The structure of the seek table frame is as follows:
@ -58,7 +58,7 @@ This is for compatibility with [Zstandard skippable frames].
#### `Seek_Table_Footer` #### `Seek_Table_Footer`
The seek table footer format is as follows: The seek table footer format is as follows:
|`Number_Of_Chunks`|`Seek_Table_Descriptor`|`Seekable_Magic_Number`| |`Number_Of_Frames`|`Seek_Table_Descriptor`|`Seekable_Magic_Number`|
|------------------|-----------------------|-----------------------| |------------------|-----------------------|-----------------------|
| 4 bytes | 1 byte | 4 bytes | | 4 bytes | 1 byte | 4 bytes |
@ -68,9 +68,9 @@ Value : 0x8F92EAB1.
This value must be the last bytes present in the compressed file so that decoders This value must be the last bytes present in the compressed file so that decoders
can efficiently find it and determine if there is an actual seek table present. can efficiently find it and determine if there is an actual seek table present.
__`Number_Of_Chunks`__ __`Number_Of_Frames`__
The number of stored chunks in the data. The number of stored frames in the data.
__`Seek_Table_Descriptor`__ __`Seek_Table_Descriptor`__
@ -87,13 +87,13 @@ for example the addition of inline dictionaries.
__`Checksum_Flag`__ __`Checksum_Flag`__
If the checksum flag is set, each of the seek table entries contains a 4 byte checksum of the uncompressed data contained in its chunk. If the checksum flag is set, each of the seek table entries contains a 4 byte checksum of the uncompressed data contained in its frame.
`Reserved_Bits` are not currently used but may be used in the future for breaking changes, so a compliant decoder should ensure they are set to 0. `Unused_Bits` may be used in the future for non-breaking changes, so a compliant decoder should not interpret these bits. `Reserved_Bits` are not currently used but may be used in the future for breaking changes, so a compliant decoder should ensure they are set to 0. `Unused_Bits` may be used in the future for non-breaking changes, so a compliant decoder should not interpret these bits.
#### __`Seek_Table_Entries`__ #### __`Seek_Table_Entries`__
`Seek_Table_Entries` consists of `Number_Of_Chunks` (one for each chunk in the data, not including the seek table frame) entries of the following form, in sequence: `Seek_Table_Entries` consists of `Number_Of_Frames` (one for each frame in the data, not including the seek table frame) entries of the following form, in sequence:
|`Compressed_Size`|`Decompressed_Size`|`[Checksum]`| |`Compressed_Size`|`Decompressed_Size`|`[Checksum]`|
|-----------------|-------------------|------------| |-----------------|-------------------|------------|
@ -101,12 +101,12 @@ If the checksum flag is set, each of the seek table entries contains a 4 byte ch
__`Compressed_Size`__ __`Compressed_Size`__
The compressed size of the chunk. The compressed size of the frame.
The cumulative sum of the `Compressed_Size` fields of chunks `0` to `i` gives the offset in the compressed file of chunk `i+1`. The cumulative sum of the `Compressed_Size` fields of frames `0` to `i` gives the offset in the compressed file of frame `i+1`.
__`Decompressed_Size`__ __`Decompressed_Size`__
The size of the decompressed data contained in the chunk. For skippable or otherwise empty frames, this value is 0. The size of the decompressed data contained in the frame. For skippable or otherwise empty frames, this value is 0.
__`Checksum`__ __`Checksum`__

View File

@ -20,24 +20,24 @@ typedef struct {
U32 cSize; U32 cSize;
U32 dSize; U32 dSize;
U32 checksum; U32 checksum;
} chunklogEntry_t; } framelogEntry_t;
typedef struct { typedef struct {
chunklogEntry_t* entries; framelogEntry_t* entries;
U32 size; U32 size;
U32 capacity; U32 capacity;
} chunklog_t; } framelog_t;
struct ZSTD_seekable_CStream_s { struct ZSTD_seekable_CStream_s {
ZSTD_CStream* cstream; ZSTD_CStream* cstream;
chunklog_t chunklog; framelog_t framelog;
U32 chunkCSize; U32 frameCSize;
U32 chunkDSize; U32 frameDSize;
XXH64_state_t xxhState; XXH64_state_t xxhState;
U32 maxChunkSize; U32 maxFrameSize;
int checksumFlag; int checksumFlag;
@ -56,11 +56,11 @@ ZSTD_seekable_CStream* ZSTD_seekable_createCStream()
if (zcs->cstream == NULL) goto failed1; if (zcs->cstream == NULL) goto failed1;
/* allocate some initial space */ /* allocate some initial space */
{ size_t const CHUNKLOG_STARTING_CAPACITY = 16; { size_t const FRAMELOG_STARTING_CAPACITY = 16;
zcs->chunklog.entries = zcs->framelog.entries =
malloc(sizeof(chunklogEntry_t) * CHUNKLOG_STARTING_CAPACITY); malloc(sizeof(framelogEntry_t) * FRAMELOG_STARTING_CAPACITY);
if (zcs->chunklog.entries == NULL) goto failed2; if (zcs->framelog.entries == NULL) goto failed2;
zcs->chunklog.capacity = CHUNKLOG_STARTING_CAPACITY; zcs->framelog.capacity = FRAMELOG_STARTING_CAPACITY;
} }
return zcs; return zcs;
@ -76,7 +76,7 @@ size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs)
{ {
if (zcs == NULL) return 0; /* support free on null */ if (zcs == NULL) return 0; /* support free on null */
ZSTD_freeCStream(zcs->cstream); ZSTD_freeCStream(zcs->cstream);
free(zcs->chunklog.entries); free(zcs->framelog.entries);
free(zcs); free(zcs);
return 0; return 0;
@ -85,20 +85,20 @@ size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs)
size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs, size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs,
int compressionLevel, int compressionLevel,
int checksumFlag, int checksumFlag,
U32 maxChunkSize) U32 maxFrameSize)
{ {
zcs->chunklog.size = 0; zcs->framelog.size = 0;
zcs->chunkCSize = 0; zcs->frameCSize = 0;
zcs->chunkDSize = 0; zcs->frameDSize = 0;
/* make sure maxChunkSize has a reasonable value */ /* make sure maxFrameSize has a reasonable value */
if (maxChunkSize > ZSTD_SEEKABLE_MAX_CHUNK_DECOMPRESSED_SIZE) { if (maxFrameSize > ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE) {
return ERROR(compressionParameter_unsupported); return ERROR(compressionParameter_unsupported);
} }
zcs->maxChunkSize = maxChunkSize zcs->maxFrameSize = maxFrameSize
? maxChunkSize ? maxFrameSize
: ZSTD_SEEKABLE_MAX_CHUNK_DECOMPRESSED_SIZE; : ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE;
zcs->checksumFlag = checksumFlag; zcs->checksumFlag = checksumFlag;
if (zcs->checksumFlag) { if (zcs->checksumFlag) {
@ -110,58 +110,58 @@ size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs,
return ZSTD_initCStream(zcs->cstream, compressionLevel); return ZSTD_initCStream(zcs->cstream, compressionLevel);
} }
static size_t ZSTD_seekable_logChunk(ZSTD_seekable_CStream* zcs) static size_t ZSTD_seekable_logFrame(ZSTD_seekable_CStream* zcs)
{ {
if (zcs->chunklog.size == ZSTD_SEEKABLE_MAXCHUNKS) if (zcs->framelog.size == ZSTD_SEEKABLE_MAXFRAMES)
return ERROR(chunkIndex_tooLarge); return ERROR(frameIndex_tooLarge);
zcs->chunklog.entries[zcs->chunklog.size] = (chunklogEntry_t) zcs->framelog.entries[zcs->framelog.size] = (framelogEntry_t)
{ {
.cSize = zcs->chunkCSize, .cSize = zcs->frameCSize,
.dSize = zcs->chunkDSize, .dSize = zcs->frameDSize,
}; };
if (zcs->checksumFlag) if (zcs->checksumFlag)
zcs->chunklog.entries[zcs->chunklog.size].checksum = zcs->framelog.entries[zcs->framelog.size].checksum =
/* take lower 32 bits of digest */ /* take lower 32 bits of digest */
XXH64_digest(&zcs->xxhState) & 0xFFFFFFFFU; XXH64_digest(&zcs->xxhState) & 0xFFFFFFFFU;
zcs->chunklog.size++; zcs->framelog.size++;
/* grow the buffer if required */ /* grow the buffer if required */
if (zcs->chunklog.size == zcs->chunklog.capacity) { if (zcs->framelog.size == zcs->framelog.capacity) {
/* exponential size increase for constant amortized runtime */ /* exponential size increase for constant amortized runtime */
size_t const newCapacity = zcs->chunklog.capacity * 2; size_t const newCapacity = zcs->framelog.capacity * 2;
chunklogEntry_t* const newEntries = realloc(zcs->chunklog.entries, framelogEntry_t* const newEntries = realloc(zcs->framelog.entries,
sizeof(chunklogEntry_t) * newCapacity); sizeof(framelogEntry_t) * newCapacity);
if (newEntries == NULL) return ERROR(memory_allocation); if (newEntries == NULL) return ERROR(memory_allocation);
zcs->chunklog.entries = newEntries; zcs->framelog.entries = newEntries;
zcs->chunklog.capacity = newCapacity; zcs->framelog.capacity = newCapacity;
} }
return 0; return 0;
} }
size_t ZSTD_seekable_endChunk(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output) size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output)
{ {
size_t const prevOutPos = output->pos; size_t const prevOutPos = output->pos;
/* end the frame */ /* end the frame */
size_t ret = ZSTD_endStream(zcs->cstream, output); size_t ret = ZSTD_endStream(zcs->cstream, output);
zcs->chunkCSize += output->pos - prevOutPos; zcs->frameCSize += output->pos - prevOutPos;
/* need to flush before doing the rest */ /* need to flush before doing the rest */
if (ret) return ret; if (ret) return ret;
/* frame done */ /* frame done */
/* store the chunk data for later */ /* store the frame data for later */
ret = ZSTD_seekable_logChunk(zcs); ret = ZSTD_seekable_logFrame(zcs);
if (ret) return ret; if (ret) return ret;
/* reset for the next chunk */ /* reset for the next frame */
zcs->chunkCSize = 0; zcs->frameCSize = 0;
zcs->chunkDSize = 0; zcs->frameDSize = 0;
ZSTD_resetCStream(zcs->cstream, 0); ZSTD_resetCStream(zcs->cstream, 0);
if (zcs->checksumFlag) if (zcs->checksumFlag)
@ -175,9 +175,9 @@ size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer*
const BYTE* const inBase = (const BYTE*) input->src + input->pos; const BYTE* const inBase = (const BYTE*) input->src + input->pos;
size_t inLen = input->size - input->pos; size_t inLen = input->size - input->pos;
inLen = MIN(inLen, (size_t)(zcs->maxChunkSize - zcs->chunkDSize)); inLen = MIN(inLen, (size_t)(zcs->maxFrameSize - zcs->frameDSize));
/* if we haven't finished flushing the last chunk, don't start writing a new one */ /* if we haven't finished flushing the last frame, don't start writing a new one */
if (inLen > 0) { if (inLen > 0) {
ZSTD_inBuffer inTmp = { inBase, inLen, 0 }; ZSTD_inBuffer inTmp = { inBase, inLen, 0 };
size_t const prevOutPos = output->pos; size_t const prevOutPos = output->pos;
@ -188,31 +188,31 @@ size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer*
XXH64_update(&zcs->xxhState, inBase, inTmp.pos); XXH64_update(&zcs->xxhState, inBase, inTmp.pos);
} }
zcs->chunkCSize += output->pos - prevOutPos; zcs->frameCSize += output->pos - prevOutPos;
zcs->chunkDSize += inTmp.pos; zcs->frameDSize += inTmp.pos;
input->pos += inTmp.pos; input->pos += inTmp.pos;
if (ZSTD_isError(ret)) return ret; if (ZSTD_isError(ret)) return ret;
} }
if (zcs->maxChunkSize == zcs->chunkDSize) { if (zcs->maxFrameSize == zcs->frameDSize) {
/* log the chunk and start over */ /* log the frame and start over */
size_t const ret = ZSTD_seekable_endChunk(zcs, output); size_t const ret = ZSTD_seekable_endFrame(zcs, output);
if (ZSTD_isError(ret)) return ret; if (ZSTD_isError(ret)) return ret;
/* get the client ready for the next chunk */ /* get the client ready for the next frame */
return (size_t)zcs->maxChunkSize; return (size_t)zcs->maxFrameSize;
} }
return (size_t)(zcs->maxChunkSize - zcs->chunkDSize); return (size_t)(zcs->maxFrameSize - zcs->frameDSize);
} }
static size_t ZSTD_seekable_seekTableSize(ZSTD_seekable_CStream* zcs) static size_t ZSTD_seekable_seekTableSize(ZSTD_seekable_CStream* zcs)
{ {
size_t const sizePerChunk = 8 + (zcs->checksumFlag?4:0); size_t const sizePerFrame = 8 + (zcs->checksumFlag?4:0);
size_t const seekTableLen = ZSTD_skippableHeaderSize + size_t const seekTableLen = ZSTD_skippableHeaderSize +
sizePerChunk * zcs->chunklog.size + sizePerFrame * zcs->framelog.size +
ZSTD_seekTableFooterSize; ZSTD_seekTableFooterSize;
return seekTableLen; return seekTableLen;
@ -224,60 +224,60 @@ static size_t ZSTD_seekable_writeSeekTable(ZSTD_seekable_CStream* zcs, ZSTD_outB
BYTE tmp[4]; /* so that we can work with buffers too small to write a whole word to */ BYTE tmp[4]; /* so that we can work with buffers too small to write a whole word to */
/* repurpose /* repurpose
* zcs->chunkDSize: the current index in the table and * zcs->frameDSize: the current index in the table and
* zcs->chunkCSize: the amount of the table written so far * zcs->frameCSize: the amount of the table written so far
* *
* This function is written this way so that if it has to return early * This function is written this way so that if it has to return early
* because of a small buffer, it can keep going where it left off. * because of a small buffer, it can keep going where it left off.
*/ */
size_t const sizePerChunk = 8 + (zcs->checksumFlag?4:0); size_t const sizePerFrame = 8 + (zcs->checksumFlag?4:0);
size_t const seekTableLen = ZSTD_seekable_seekTableSize(zcs); size_t const seekTableLen = ZSTD_seekable_seekTableSize(zcs);
#define st_write32(x, o) \ #define st_write32(x, o) \
do { \ do { \
if (zcs->chunkCSize < (o) + 4) { \ if (zcs->frameCSize < (o) + 4) { \
size_t const lenWrite = MIN(output->size - output->pos, \ size_t const lenWrite = MIN(output->size - output->pos, \
(o) + 4 - zcs->chunkCSize); \ (o) + 4 - zcs->frameCSize); \
MEM_writeLE32(tmp, (x)); \ MEM_writeLE32(tmp, (x)); \
memcpy(op + output->pos, tmp + (zcs->chunkCSize - (o)), lenWrite); \ memcpy(op + output->pos, tmp + (zcs->frameCSize - (o)), lenWrite); \
zcs->chunkCSize += lenWrite; \ zcs->frameCSize += lenWrite; \
output->pos += lenWrite; \ output->pos += lenWrite; \
if (lenWrite < 4) return seekTableLen - zcs->chunkCSize; \ if (lenWrite < 4) return seekTableLen - zcs->frameCSize; \
} \ } \
} while (0) } while (0)
st_write32(ZSTD_MAGIC_SKIPPABLE_START | 0xE, 0); st_write32(ZSTD_MAGIC_SKIPPABLE_START | 0xE, 0);
st_write32(seekTableLen - ZSTD_skippableHeaderSize, 4); st_write32(seekTableLen - ZSTD_skippableHeaderSize, 4);
while (zcs->chunkDSize < zcs->chunklog.size) { while (zcs->frameDSize < zcs->framelog.size) {
st_write32(zcs->chunklog.entries[zcs->chunkDSize].cSize, st_write32(zcs->framelog.entries[zcs->frameDSize].cSize,
ZSTD_skippableHeaderSize + sizePerChunk * zcs->chunkDSize); ZSTD_skippableHeaderSize + sizePerFrame * zcs->frameDSize);
st_write32(zcs->chunklog.entries[zcs->chunkDSize].dSize, st_write32(zcs->framelog.entries[zcs->frameDSize].dSize,
ZSTD_skippableHeaderSize + sizePerChunk * zcs->chunkDSize + 4); ZSTD_skippableHeaderSize + sizePerFrame * zcs->frameDSize + 4);
if (zcs->checksumFlag) { if (zcs->checksumFlag) {
st_write32(zcs->chunklog.entries[zcs->chunkDSize].checksum, st_write32(zcs->framelog.entries[zcs->frameDSize].checksum,
ZSTD_skippableHeaderSize + sizePerChunk * zcs->chunkDSize + 8); ZSTD_skippableHeaderSize + sizePerFrame * zcs->frameDSize + 8);
} }
zcs->chunkDSize++; zcs->frameDSize++;
} }
st_write32(zcs->chunklog.size, seekTableLen - ZSTD_seekTableFooterSize); st_write32(zcs->framelog.size, seekTableLen - ZSTD_seekTableFooterSize);
if (output->size - output->pos < 1) return seekTableLen - zcs->chunkCSize; if (output->size - output->pos < 1) return seekTableLen - zcs->frameCSize;
if (zcs->chunkCSize < seekTableLen - 4) { if (zcs->frameCSize < seekTableLen - 4) {
BYTE sfd = 0; BYTE sfd = 0;
sfd |= (zcs->checksumFlag) << 7; sfd |= (zcs->checksumFlag) << 7;
op[output->pos] = sfd; op[output->pos] = sfd;
output->pos++; output->pos++;
zcs->chunkCSize++; zcs->frameCSize++;
} }
st_write32(ZSTD_SEEKABLE_MAGICNUMBER, seekTableLen - 4); st_write32(ZSTD_SEEKABLE_MAGICNUMBER, seekTableLen - 4);
if (zcs->chunkCSize != seekTableLen) return ERROR(GENERIC); if (zcs->frameCSize != seekTableLen) return ERROR(GENERIC);
return 0; return 0;
#undef st_write32 #undef st_write32
@ -285,11 +285,11 @@ static size_t ZSTD_seekable_writeSeekTable(ZSTD_seekable_CStream* zcs, ZSTD_outB
size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output) size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output)
{ {
if (!zcs->writingSeekTable && zcs->chunkDSize) { if (!zcs->writingSeekTable && zcs->frameDSize) {
const size_t endChunk = ZSTD_seekable_endChunk(zcs, output); const size_t endFrame = ZSTD_seekable_endFrame(zcs, output);
if (ZSTD_isError(endChunk)) return endChunk; if (ZSTD_isError(endFrame)) return endFrame;
/* return an accurate size hint */ /* return an accurate size hint */
if (endChunk) return endChunk + ZSTD_seekable_seekTableSize(zcs); if (endFrame) return endFrame + ZSTD_seekable_seekTableSize(zcs);
} }
zcs->writingSeekTable = 1; zcs->writingSeekTable = 1;

View File

@ -29,11 +29,11 @@ typedef struct {
int checksumFlag; int checksumFlag;
} seekTable_t; } seekTable_t;
/** ZSTD_seekable_offsetToChunk() : /** ZSTD_seekable_offsetToFrame() :
* Performs a binary search to find the last chunk with a decompressed offset * Performs a binary search to find the last frame with a decompressed offset
* <= pos * <= pos
* @return : the chunk's index */ * @return : the frame's index */
static U32 ZSTD_seekable_offsetToChunk(const seekTable_t* table, U64 pos) static U32 ZSTD_seekable_offsetToFrame(const seekTable_t* table, U64 pos)
{ {
U32 lo = 0; U32 lo = 0;
U32 hi = table->tableLen; U32 hi = table->tableLen;
@ -61,7 +61,7 @@ struct ZSTD_seekable_DStream_s {
ZSTD_DStream* dstream; ZSTD_DStream* dstream;
seekTable_t seekTable; seekTable_t seekTable;
U32 curChunk; U32 curFrame;
U64 compressedOffset; U64 compressedOffset;
U64 decompressedOffset; U64 decompressedOffset;
@ -107,7 +107,7 @@ size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src,
{ {
const BYTE* ip = (const BYTE*)src + srcSize; const BYTE* ip = (const BYTE*)src + srcSize;
U32 numChunks; U32 numFrames;
int checksumFlag; int checksumFlag;
U32 sizePerEntry; U32 sizePerEntry;
@ -129,10 +129,10 @@ size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src,
} }
} }
numChunks = MEM_readLE32(ip-9); numFrames = MEM_readLE32(ip-9);
sizePerEntry = 8 + (checksumFlag?4:0); sizePerEntry = 8 + (checksumFlag?4:0);
{ U32 const tableSize = sizePerEntry * numChunks; { U32 const tableSize = sizePerEntry * numFrames;
U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_skippableHeaderSize; U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_skippableHeaderSize;
const BYTE* base = ip - frameSize; const BYTE* base = ip - frameSize;
@ -148,7 +148,7 @@ size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src,
{ /* Allocate an extra entry at the end so that we can do size { /* Allocate an extra entry at the end so that we can do size
* computations on the last element without special case */ * computations on the last element without special case */
seekEntry_t* entries = malloc(sizeof(seekEntry_t) * (numChunks + 1)); seekEntry_t* entries = malloc(sizeof(seekEntry_t) * (numFrames + 1));
const BYTE* tableBase = base + ZSTD_skippableHeaderSize; const BYTE* tableBase = base + ZSTD_skippableHeaderSize;
U32 idx; U32 idx;
@ -163,7 +163,7 @@ size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src,
} }
/* compute cumulative positions */ /* compute cumulative positions */
for (idx = 0, pos = 0; idx < numChunks; idx++) { for (idx = 0, pos = 0; idx < numFrames; idx++) {
entries[idx].cOffset = cOffset; entries[idx].cOffset = cOffset;
entries[idx].dOffset = dOffset; entries[idx].dOffset = dOffset;
@ -174,11 +174,11 @@ size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src,
pos += 4; pos += 4;
} }
} }
entries[numChunks].cOffset = cOffset; entries[numFrames].cOffset = cOffset;
entries[numChunks].dOffset = dOffset; entries[numFrames].dOffset = dOffset;
zds->seekTable.entries = entries; zds->seekTable.entries = entries;
zds->seekTable.tableLen = numChunks; zds->seekTable.tableLen = numFrames;
zds->seekTable.checksumFlag = checksumFlag; zds->seekTable.checksumFlag = checksumFlag;
return 0; return 0;
} }
@ -197,7 +197,7 @@ size_t ZSTD_seekable_initDStream(ZSTD_seekable_DStream* zds, U64 rangeStart, U64
zds->stage = zsds_seek; zds->stage = zsds_seek;
/* force a seek first */ /* force a seek first */
zds->curChunk = (U32) -1; zds->curFrame = (U32) -1;
zds->compressedOffset = (U64) -1; zds->compressedOffset = (U64) -1;
zds->decompressedOffset = (U64) -1; zds->decompressedOffset = (U64) -1;
@ -271,7 +271,7 @@ size_t ZSTD_seekable_decompressStream(ZSTD_seekable_DStream* zds, ZSTD_outBuffer
/* need more input */ /* need more input */
return MIN( return MIN(
ZSTD_DStreamInSize(), ZSTD_DStreamInSize(),
(size_t)(jt->entries[zds->curChunk + 1] (size_t)(jt->entries[zds->curFrame + 1]
.cOffset - .cOffset -
zds->compressedOffset)); zds->compressedOffset));
} }
@ -281,7 +281,7 @@ size_t ZSTD_seekable_decompressStream(ZSTD_seekable_DStream* zds, ZSTD_outBuffer
{ {
U64 const toDecompress = U64 const toDecompress =
MIN(zds->targetEnd, MIN(zds->targetEnd,
jt->entries[zds->curChunk + 1].dOffset) - jt->entries[zds->curFrame + 1].dOffset) -
zds->decompressedOffset; zds->decompressedOffset;
size_t const prevInputPos = input->pos; size_t const prevInputPos = input->pos;
@ -305,7 +305,7 @@ size_t ZSTD_seekable_decompressStream(ZSTD_seekable_DStream* zds, ZSTD_outBuffer
if (ret == 0) { if (ret == 0) {
/* verify the checksum */ /* verify the checksum */
U32 const digest = XXH64_digest(&zds->xxhState) & 0xFFFFFFFFU; U32 const digest = XXH64_digest(&zds->xxhState) & 0xFFFFFFFFU;
if (digest != jt->entries[zds->curChunk].checksum) { if (digest != jt->entries[zds->curFrame].checksum) {
return ERROR(checksum_wrong); return ERROR(checksum_wrong);
} }
@ -323,9 +323,9 @@ size_t ZSTD_seekable_decompressStream(ZSTD_seekable_DStream* zds, ZSTD_outBuffer
/* frame is done */ /* frame is done */
/* make sure this lines up with the expected frame border */ /* make sure this lines up with the expected frame border */
if (zds->decompressedOffset != if (zds->decompressedOffset !=
jt->entries[zds->curChunk + 1].dOffset || jt->entries[zds->curFrame + 1].dOffset ||
zds->compressedOffset != zds->compressedOffset !=
jt->entries[zds->curChunk + 1].cOffset) jt->entries[zds->curFrame + 1].cOffset)
return ERROR(corruption_detected); return ERROR(corruption_detected);
ZSTD_resetDStream(zds->dstream); ZSTD_resetDStream(zds->dstream);
zds->stage = zsds_seek; zds->stage = zsds_seek;
@ -334,29 +334,29 @@ size_t ZSTD_seekable_decompressStream(ZSTD_seekable_DStream* zds, ZSTD_outBuffer
/* need more input */ /* need more input */
return MIN(ZSTD_DStreamInSize(), (size_t)( return MIN(ZSTD_DStreamInSize(), (size_t)(
jt->entries[zds->curChunk + 1].cOffset - jt->entries[zds->curFrame + 1].cOffset -
zds->compressedOffset)); zds->compressedOffset));
} }
} }
case zsds_seek: { case zsds_seek: {
U32 targetChunk; U32 targetFrame;
if (zds->decompressedOffset < zds->targetStart || if (zds->decompressedOffset < zds->targetStart ||
zds->decompressedOffset >= zds->targetEnd) { zds->decompressedOffset >= zds->targetEnd) {
/* haven't started yet */ /* haven't started yet */
targetChunk = ZSTD_seekable_offsetToChunk(jt, zds->targetStart); targetFrame = ZSTD_seekable_offsetToFrame(jt, zds->targetStart);
} else { } else {
targetChunk = ZSTD_seekable_offsetToChunk(jt, zds->decompressedOffset); targetFrame = ZSTD_seekable_offsetToFrame(jt, zds->decompressedOffset);
} }
zds->curChunk = targetChunk; zds->curFrame = targetFrame;
if (zds->compressedOffset == jt->entries[targetChunk].cOffset) { if (zds->compressedOffset == jt->entries[targetFrame].cOffset) {
zds->stage = zsds_decompress; zds->stage = zsds_decompress;
break; break;
} }
zds->nextSeek = jt->entries[targetChunk].cOffset; zds->nextSeek = jt->entries[targetFrame].cOffset;
zds->decompressedOffset = jt->entries[targetChunk].dOffset; zds->decompressedOffset = jt->entries[targetFrame].dOffset;
/* signal to user that a seek is required */ /* signal to user that a seek is required */
return ERROR(needSeek); return ERROR(needSeek);
} }

View File

@ -38,7 +38,7 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
case PREFIX(dictionary_wrong): return "Dictionary mismatch"; case PREFIX(dictionary_wrong): return "Dictionary mismatch";
case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
case PREFIX(chunkIndex_tooLarge): return "Chunk index is too large"; case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
case PREFIX(needSeek): return "Wrong file position, a seek is required to continue"; case PREFIX(needSeek): return "Wrong file position, a seek is required to continue";
case PREFIX(maxCode): case PREFIX(maxCode):
default: return notErrorCode; default: return notErrorCode;

View File

@ -58,7 +58,7 @@ typedef enum {
ZSTD_error_dictionary_corrupted, ZSTD_error_dictionary_corrupted,
ZSTD_error_dictionary_wrong, ZSTD_error_dictionary_wrong,
ZSTD_error_dictionaryCreation_failed, ZSTD_error_dictionaryCreation_failed,
ZSTD_error_chunkIndex_tooLarge, ZSTD_error_frameIndex_tooLarge,
ZSTD_error_needSeek, ZSTD_error_needSeek,
ZSTD_error_maxCode ZSTD_error_maxCode
} ZSTD_ErrorCode; } ZSTD_ErrorCode;