From 9b8f337357aaf1cb3dde8ed608bac20cc23d2fc1 Mon Sep 17 00:00:00 2001 From: Mark Dittmer Date: Thu, 7 May 2020 09:31:43 -0400 Subject: [PATCH] [contrib] Support seek table-only API Memory constrained use cases that manage multiple archives benefit from retaining multiple archive seek tables without retaining a ZSTD_seekable instance for each. * New opaque type for seek table: ZSTD_seekTable. * ZSTD_seekable_copySeekTable() supports copying seek table out of a ZSTD_seekable. * ZSTD_seekTable_[eachSeekTableOp]() defines seek table API that mirrors existing seek table operations. * Existing ZSTD_seekable_[eachSeekTableOp]() retained; they delegate to ZSTD_seekTable the variant. These changes allow the above-mentioned use cases to initialize a ZSTD_seekable, extract its ZSTD_seekTable, then throw the ZSTD_seekable away to save memory. Standard ZSTD operations can then be used to decompress frames based on seek table offsets. The copy and delegate patterns are intended to minimize impact on existing code and clients. Using copy instead of move for the infrequent operation extracting a seek table ensures that the extraction does not render the ZSTD_seekable useless. Delegating to *new* seek table-oriented APIs ensures that this is not a breaking change for existing clients while supporting all meaningful operations that depend only on seek table data. --- contrib/seekable_format/zstd_seekable.h | 15 ++- contrib/seekable_format/zstdseek_decompress.c | 107 ++++++++++++++---- 2 files changed, 100 insertions(+), 22 deletions(-) diff --git a/contrib/seekable_format/zstd_seekable.h b/contrib/seekable_format/zstd_seekable.h index 7ffd1ba0..c02f097c 100644 --- a/contrib/seekable_format/zstd_seekable.h +++ b/contrib/seekable_format/zstd_seekable.h @@ -29,6 +29,7 @@ extern "C" { typedef struct ZSTD_seekable_CStream_s ZSTD_seekable_CStream; typedef struct ZSTD_seekable_s ZSTD_seekable; +typedef struct ZSTD_seekTable_s ZSTD_seekTable; /*-**************************************************************************** * Seekable compression - HowTo @@ -154,6 +155,10 @@ ZSTDLIB_API size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffe ZSTDLIB_API ZSTD_seekable* ZSTD_seekable_create(void); ZSTDLIB_API size_t ZSTD_seekable_free(ZSTD_seekable* zs); +/*===== Independent seek table management =====*/ +ZSTDLIB_API size_t ZSTD_seekable_copySeekTable(ZSTD_seekable* zs, ZSTD_seekTable** out); +ZSTDLIB_API size_t ZSTD_seekTable_free(ZSTD_seekTable* st); + /*===== Seekable decompression functions =====*/ ZSTDLIB_API size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize); ZSTDLIB_API size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src); @@ -161,7 +166,7 @@ ZSTDLIB_API size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t ZSTDLIB_API size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex); #define ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE (0ULL-2) -/*===== Seek Table access functions =====*/ +/*===== Seekable seek table access functions =====*/ ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs); ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex); ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex); @@ -169,6 +174,14 @@ ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex); ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long offset); +/*===== Direct seek table access functions =====*/ +ZSTDLIB_API unsigned ZSTD_seekTable_getNumFrames(ZSTD_seekTable* const st); +ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameCompressedOffset(ZSTD_seekTable* const st, unsigned frameIndex); +ZSTDLIB_API unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(ZSTD_seekTable* const st, unsigned frameIndex); +ZSTDLIB_API size_t ZSTD_seekTable_getFrameCompressedSize(ZSTD_seekTable* const st, unsigned frameIndex); +ZSTDLIB_API size_t ZSTD_seekTable_getFrameDecompressedSize(ZSTD_seekTable* const st, unsigned frameIndex); +ZSTDLIB_API unsigned ZSTD_seekTable_offsetToFrameIndex(ZSTD_seekTable* const st, unsigned long long offset); + /*===== Seekable advanced I/O API =====*/ typedef int(ZSTD_seekable_read)(void* opaque, void* buffer, size_t n); typedef int(ZSTD_seekable_seek)(void* opaque, long long offset, int origin); diff --git a/contrib/seekable_format/zstdseek_decompress.c b/contrib/seekable_format/zstdseek_decompress.c index abfd1e90..20cde287 100644 --- a/contrib/seekable_format/zstdseek_decompress.c +++ b/contrib/seekable_format/zstdseek_decompress.c @@ -142,18 +142,18 @@ typedef struct { U32 checksum; } seekEntry_t; -typedef struct { +struct ZSTD_seekTable_s { seekEntry_t* entries; size_t tableLen; int checksumFlag; -} seekTable_t; +}; #define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_MAX struct ZSTD_seekable_s { ZSTD_DStream* dstream; - seekTable_t seekTable; + ZSTD_seekTable seekTable; ZSTD_seekable_customFile src; U64 decompressedOffset; @@ -197,23 +197,63 @@ size_t ZSTD_seekable_free(ZSTD_seekable* zs) return 0; } +size_t ZSTD_seekable_copySeekTable(ZSTD_seekable* zs, ZSTD_seekTable** out) +{ + ZSTD_seekTable* st = malloc(sizeof(ZSTD_seekTable)); + if (!st) { + free(st); + return ERROR(memory_allocation); + } + + st->checksumFlag = zs->seekTable.checksumFlag; + st->tableLen = zs->seekTable.tableLen; + + /* Allocate an extra entry at the end to match logic of initial allocation */ + size_t entriesSize = sizeof(seekEntry_t) * (zs->seekTable.tableLen + 1); + seekEntry_t* entries = (seekEntry_t*)malloc(entriesSize); + if (!entries) { + free(entries); + return ERROR(memory_allocation); + } + + memcpy(entries, zs->seekTable.entries, entriesSize); + st->entries = entries; + + *out = st; + return 0; +} + +size_t ZSTD_seekTable_free(ZSTD_seekTable* st) +{ + if (st == NULL) return 0; /* support free on null */ + free(st->entries); + free(st); + + return 0; +} + /** ZSTD_seekable_offsetToFrameIndex() : * Performs a binary search to find the last frame with a decompressed offset * <= pos * @return : the frame's index */ unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long pos) { - U32 lo = 0; - U32 hi = (U32)zs->seekTable.tableLen; - assert(zs->seekTable.tableLen <= UINT_MAX); + return ZSTD_seekTable_offsetToFrameIndex(&zs->seekTable, pos); +} - if (pos >= zs->seekTable.entries[zs->seekTable.tableLen].dOffset) { - return (U32)zs->seekTable.tableLen; +unsigned ZSTD_seekTable_offsetToFrameIndex(ZSTD_seekTable* const st, unsigned long long pos) +{ + U32 lo = 0; + U32 hi = (U32)st->tableLen; + assert(st->tableLen <= UINT_MAX); + + if (pos >= st->entries[st->tableLen].dOffset) { + return (U32)st->tableLen; } while (lo + 1 < hi) { U32 const mid = lo + ((hi - lo) >> 1); - if (zs->seekTable.entries[mid].dOffset <= pos) { + if (st->entries[mid].dOffset <= pos) { lo = mid; } else { hi = mid; @@ -224,34 +264,59 @@ unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs) { - assert(zs->seekTable.tableLen <= UINT_MAX); - return (unsigned)zs->seekTable.tableLen; + return ZSTD_seekTable_getNumFrames(&zs->seekTable); +} + +unsigned ZSTD_seekTable_getNumFrames(ZSTD_seekTable* const st) +{ + assert(st->tableLen <= UINT_MAX); + return (unsigned)st->tableLen; } unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex) { - if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; - return zs->seekTable.entries[frameIndex].cOffset; + return ZSTD_seekTable_getFrameCompressedOffset(&zs->seekTable, frameIndex); +} + +unsigned long long ZSTD_seekTable_getFrameCompressedOffset(ZSTD_seekTable* const st, unsigned frameIndex) +{ + if (frameIndex >= st->tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; + return st->entries[frameIndex].cOffset; } unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex) { - if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; - return zs->seekTable.entries[frameIndex].dOffset; + return ZSTD_seekTable_getFrameDecompressedOffset(&zs->seekTable, frameIndex); +} + +unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(ZSTD_seekTable* const st, unsigned frameIndex) +{ + if (frameIndex >= st->tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; + return st->entries[frameIndex].dOffset; } size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex) { - if (frameIndex >= zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge); - return zs->seekTable.entries[frameIndex + 1].cOffset - - zs->seekTable.entries[frameIndex].cOffset; + return ZSTD_seekTable_getFrameCompressedSize(&zs->seekTable, frameIndex); +} + +size_t ZSTD_seekTable_getFrameCompressedSize(ZSTD_seekTable* const st, unsigned frameIndex) +{ + if (frameIndex >= st->tableLen) return ERROR(frameIndex_tooLarge); + return st->entries[frameIndex + 1].cOffset - + st->entries[frameIndex].cOffset; } size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex) { - if (frameIndex > zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge); - return zs->seekTable.entries[frameIndex + 1].dOffset - - zs->seekTable.entries[frameIndex].dOffset; + return ZSTD_seekTable_getFrameDecompressedSize(&zs->seekTable, frameIndex); +} + +size_t ZSTD_seekTable_getFrameDecompressedSize(ZSTD_seekTable* const st, unsigned frameIndex) +{ + if (frameIndex > st->tableLen) return ERROR(frameIndex_tooLarge); + return st->entries[frameIndex + 1].dOffset - + st->entries[frameIndex].dOffset; } static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs)