From bed6c0a4b022f216933fd63db825a00f013a839a Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 10 Apr 2017 14:49:34 -0700 Subject: [PATCH 1/4] Remove ZSTDLIB_API from decompress.c --- contrib/linux-kernel/lib/zstd/decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/linux-kernel/lib/zstd/decompress.c b/contrib/linux-kernel/lib/zstd/decompress.c index 9eb210af..3fcef010 100644 --- a/contrib/linux-kernel/lib/zstd/decompress.c +++ b/contrib/linux-kernel/lib/zstd/decompress.c @@ -1399,7 +1399,7 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, /** ZSTD_insertBlock() : insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ -ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) +size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) { ZSTD_checkContinuity(dctx, blockStart); dctx->previousDstEnd = (const char*)blockStart + blockSize; From adb1974aec3428e953732c9794922f1746602559 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 10 Apr 2017 14:50:03 -0700 Subject: [PATCH 2/4] Switch comments to kernel style + limit to 80 cols --- contrib/linux-kernel/include/linux/zstd.h | 1509 ++++++++++++++------- 1 file changed, 1006 insertions(+), 503 deletions(-) diff --git a/contrib/linux-kernel/include/linux/zstd.h b/contrib/linux-kernel/include/linux/zstd.h index dcfcebff..38b2bca6 100644 --- a/contrib/linux-kernel/include/linux/zstd.h +++ b/contrib/linux-kernel/include/linux/zstd.h @@ -7,354 +7,749 @@ * of patent rights can be found in the PATENTS file in the same directory. */ -#ifndef ZSTD_H_235446 -#define ZSTD_H_235446 +#ifndef ZSTD_H +#define ZSTD_H /* ====== Dependency ======*/ #include /* size_t */ -/* ===== ZSTDLIB_API : control library symbols visibility ===== */ -#define ZSTDLIB_API - - -/******************************************************************************************************* - Introduction - - zstd, short for Zstandard, is a fast lossless compression algorithm, targeting real-time compression scenarios - at zlib-level and better compression ratios. The zstd compression library provides in-memory compression and - decompression functions. The library supports compression levels from 1 up to ZSTD_maxCLevel() which is 22. - Levels >= 20, labeled `--ultra`, should be used with caution, as they require more memory. - Compression can be done in: - - a single step (described as Simple API) - - a single step, reusing a context (described as Explicit memory management) - - unbounded multiple steps (described as Streaming compression) - The compression ratio achievable on small data can be highly improved using compression with a dictionary in: - - a single step (described as Simple dictionary API) - - a single step, reusing a dictionary (described as Fast dictionary API) -*********************************************************************************************************/ - -/*------ Version ------*/ -#define ZSTD_VERSION_MAJOR 1 -#define ZSTD_VERSION_MINOR 1 -#define ZSTD_VERSION_RELEASE 5 - -#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE -#define ZSTD_QUOTE(str) #str -#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) -#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) - -#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) - +/*-***************************************************************************** + * Introduction + * + * zstd, short for Zstandard, is a fast lossless compression algorithm, + * targeting real-time compression scenarios at zlib-level and better + * compression ratios. The zstd compression library provides in-memory + * compression and decompression functions. The library supports compression + * levels from 1 up to ZSTD_maxCLevel() which is 22. Levels >= 20, labeled + * ultra, should be used with caution, as they require more memory. + * Compression can be done in: + * - a single step, reusing a context (described as Explicit memory management) + * - unbounded multiple steps (described as Streaming compression) + * The compression ratio achievable on small data can be highly improved using + * compression with a dictionary in: + * - a single step (described as Simple dictionary API) + * - a single step, reusing a dictionary (described as Fast dictionary API) + ******************************************************************************/ /*====== Helper functions ======*/ + +/** + * enum ZSTD_ErrorCode - zstd error codes + * + * Functions that return size_t can be checked for errors using ZSTD_isError() + * and the ZSTD_ErrorCode can be extracted using ZSTD_getErrorCode(). + */ typedef enum { - ZSTD_error_no_error, - ZSTD_error_GENERIC, - ZSTD_error_prefix_unknown, - ZSTD_error_version_unsupported, - ZSTD_error_parameter_unknown, - ZSTD_error_frameParameter_unsupported, - ZSTD_error_frameParameter_unsupportedBy32bits, - ZSTD_error_frameParameter_windowTooLarge, - ZSTD_error_compressionParameter_unsupported, - ZSTD_error_init_missing, - ZSTD_error_memory_allocation, - ZSTD_error_stage_wrong, - ZSTD_error_dstSize_tooSmall, - ZSTD_error_srcSize_wrong, - ZSTD_error_corruption_detected, - ZSTD_error_checksum_wrong, - ZSTD_error_tableLog_tooLarge, - ZSTD_error_maxSymbolValue_tooLarge, - ZSTD_error_maxSymbolValue_tooSmall, - ZSTD_error_dictionary_corrupted, - ZSTD_error_dictionary_wrong, - ZSTD_error_dictionaryCreation_failed, - ZSTD_error_maxCode + ZSTD_error_no_error, + ZSTD_error_GENERIC, + ZSTD_error_prefix_unknown, + ZSTD_error_version_unsupported, + ZSTD_error_parameter_unknown, + ZSTD_error_frameParameter_unsupported, + ZSTD_error_frameParameter_unsupportedBy32bits, + ZSTD_error_frameParameter_windowTooLarge, + ZSTD_error_compressionParameter_unsupported, + ZSTD_error_init_missing, + ZSTD_error_memory_allocation, + ZSTD_error_stage_wrong, + ZSTD_error_dstSize_tooSmall, + ZSTD_error_srcSize_wrong, + ZSTD_error_corruption_detected, + ZSTD_error_checksum_wrong, + ZSTD_error_tableLog_tooLarge, + ZSTD_error_maxSymbolValue_tooLarge, + ZSTD_error_maxSymbolValue_tooSmall, + ZSTD_error_dictionary_corrupted, + ZSTD_error_dictionary_wrong, + ZSTD_error_dictionaryCreation_failed, + ZSTD_error_maxCode } ZSTD_ErrorCode; -ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ -ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */ -/*! ZSTD_isError() : -* tells if a `size_t` function result is an error code */ -ZSTDLIB_API static __attribute__((unused)) unsigned ZSTD_isError(size_t code) { +/** + * ZSTD_maxCLevel() - maximum compression level available + * + * Return: Maximum compression level available. + */ +int ZSTD_maxCLevel(void); +/** + * ZSTD_compressBound() - maximum compressed size in worst case scenario + * @srcSize: The size of the data to compress. + * + * Return: The maximum compressed size in the worst case scenario. + */ +size_t ZSTD_compressBound(size_t srcSize); +/** + * ZSTD_isError() - tells if a size_t function result is an error code + * @code: The function result to check for error. + * + * Return: Non-zero iff the code is an error. + */ +static __attribute__((unused)) unsigned int ZSTD_isError(size_t code) +{ return code > (size_t)-ZSTD_error_maxCode; } -/*! ZSTD_getErrorCode() : -* convert a `size_t` function result into a proper ZSTD_errorCode enum */ -ZSTDLIB_API static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult) { - if (!ZSTD_isError(functionResult)) { +/** + * ZSTD_getErrorCode() - translates an error function result to a ZSTD_ErrorCode + * @functionResult: The result of a function for which ZSTD_isError() is true. + * + * Return: The ZSTD_ErrorCode corresponding to the functionResult or 0 + * if the functionResult isn't an error. + */ +static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode( + size_t functionResult) +{ + if (!ZSTD_isError(functionResult)) return (ZSTD_ErrorCode)0; - } return (ZSTD_ErrorCode)(0 - functionResult); } -/*************************************** -* Explicit memory management -***************************************/ - -typedef enum { ZSTD_fast, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt, ZSTD_btopt2 } ZSTD_strategy; /* from faster to stronger */ +/** + * enum ZSTD_strategy - zstd compression search strategy + * + * From faster to stronger. + */ +typedef enum { + ZSTD_fast, + ZSTD_dfast, + ZSTD_greedy, + ZSTD_lazy, + ZSTD_lazy2, + ZSTD_btlazy2, + ZSTD_btopt, + ZSTD_btopt2 +} ZSTD_strategy; +/** + * struct ZSTD_compressionParameters - zstd compression parameters + * @windowLog: Log of the largest match distance. Larger means more + * compression, and more memory needed during decompression. + * @chainLog: Fully searched segment. Larger means more compression, slower, + * and more memory (useless for fast). + * @hashLog: Dispatch table. Larger means more compression, + * slower, and more memory. + * @searchLog: Number of searches. Larger means more compression and slower. + * @searchLength: Match length searched. Larger means faster decompression, + * sometimes less compression. + * @targetLength: Acceptable match size for optimal parser (only). Larger means + * more compression, and slower. + * @strategy: The zstd compression strategy. + */ typedef struct { - unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ - unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ - unsigned hashLog; /**< dispatch table : larger == faster, more memory */ - unsigned searchLog; /**< nb of searches : larger == more compression, slower */ - unsigned searchLength; /**< match length searched : larger == faster decompression, sometimes less compression */ - unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ + unsigned int windowLog; + unsigned int chainLog; + unsigned int hashLog; + unsigned int searchLog; + unsigned int searchLength; + unsigned int targetLength; ZSTD_strategy strategy; } ZSTD_compressionParameters; +/** + * struct ZSTD_frameParameters - zstd frame parameters + * @contentSizeFlag: Controls whether content size will be present in the frame + * header (when known). + * @checksumFlag: Controls whether a 32-bit checksum is generated at the end + * of the frame for error detection. + * @noDictIDFlag: Controls whether dictID will be saved into the frame header + * when using dictionary compression. + * + * The default value is all fields set to 0. + */ typedef struct { - unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */ - unsigned checksumFlag; /**< 1: generate a 32-bits checksum at end of frame, for error detection */ - unsigned noDictIDFlag; /**< 1: no dictID will be saved into frame header (if dictionary compression) */ + unsigned int contentSizeFlag; + unsigned int checksumFlag; + unsigned int noDictIDFlag; } ZSTD_frameParameters; +/** + * struct ZSTD_parameters - zstd parameters + * @cParams: The compression parameters. + * @fParams: The frame parameters. + */ typedef struct { ZSTD_compressionParameters cParams; ZSTD_frameParameters fParams; } ZSTD_parameters; -/*! ZSTD_getCParams() : -* @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. -* `estimatedSrcSize` value is optional, select 0 if not known */ -ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); +/** + * ZSTD_getCParams() - returns ZSTD_compressionParameters for selected level + * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). + * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. + * @dictSize: The dictionary size or 0 if a dictionary isn't being used. + * + * Return: The selected ZSTD_compressionParameters. + */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, + unsigned long long estimatedSrcSize, size_t dictSize); -/*! ZSTD_getParams() : -* same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. -* All fields of `ZSTD_frameParameters` are set to default (0) */ -ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); +/** + * ZSTD_getParams() - returns ZSTD_parameters for selected level + * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). + * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. + * @dictSize: The dictionary size or 0 if a dictionary isn't being used. + * + * The same as ZSTD_getCParams() except also selects the default frame + * parameters (all zero). + * + * Return: The selected ZSTD_parameters. + */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, + unsigned long long estimatedSrcSize, size_t dictSize); +/*-************************************* + * Explicit memory management + **************************************/ -/*! ZSTD_CCtxWorkspaceBound() : -* Returns the minimum amount of memory that needs to be passed to ZSTD_createCCtx() in order to compress with `params.cParams` -* or a `cdict` created with `params.cParams`. */ +/** + * ZSTD_CCtxWorkspaceBound() - the amount of memory needed to create a ZSTD_CCtx + * @cParams: The compression parameters to be used for compression. + * + * If multiple compression parameters might be used, the caller must call + * ZSTD_CCtxWorkspaceBound() for each set of parameters and use the maximum + * size. + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_createCCtx(). + */ size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams); -/*= Compression context -* When compressing many times, -* it is recommended to allocate a context just once, and re-use it for each successive compression operation. -* The context pointer is placed in `workspace`, which must outlive the returned context. -* Use one context per thread for parallel execution in multi-threaded environments. */ +/** + * struct ZSTD_CCtx - the zstd compression context + * + * When compressing many times it is recommended to allocate a context just once + * and reuse it for each successive compression operation. + */ typedef struct ZSTD_CCtx_s ZSTD_CCtx; -ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void* workspace, size_t workspaceSize); +/** + * ZSTD_createCCtx() - create a zstd compression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. Use ZSTD_CCtxWorkspaceBound() to + * determine how large the workspace must be. + * + * Return: A compression context emplaced into workspace. + */ +ZSTD_CCtx *ZSTD_createCCtx(void *workspace, size_t workspaceSize); -/*! ZSTD_compressCCtx() : -* Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). -* Note : The workspace passed to ZSTD_createCCtx() must have been at least ZSTD_CCtxWorkspaceBound(params.cParams) bytes. */ -ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, ZSTD_parameters params); +/** + * ZSTD_compressCCtx() - compress src into dst + * @ctx: The context. Must have been created with a workspace at least + * as large as ZSTD_CCtxWorkspaceBound(params.cParams). + * @dst: The buffer to compress src into. + * @dstCapacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @srcSize: The size of the data to compress. + * @params: The parameters to use for compression. See ZSTD_getParams(). + * + * Return: The compressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize, ZSTD_parameters params); -/*! ZSTD_compress_usingDict() : -* Compression using a predefined Dictionary (see dictBuilder/zdict.h). -* Note : The workspace passed to ZSTD_createCCtx() must have been at least ZSTD_CCtxWorkspaceBound(params.cParams) bytes. -* Note : This function loads the dictionary, resulting in significant startup delay. -* Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ -ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void *dict, size_t dictSize, ZSTD_parameters params); - -/*! ZSTD_DCtxWorkspaceBound() : - * Returns the minimum amount of memory that needs to be passed to ZSTD_createDCtx(). */ +/** + * ZSTD_DCtxWorkspaceBound() - the amount of memory needed to create a ZSTD_DCtx + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_createDCtx(). + */ size_t ZSTD_DCtxWorkspaceBound(void); -/*= Decompression context -* When decompressing many times, -* it is recommended to allocate a context just once, and re-use it for each successive compression operation. -* The context pointer is placed in `workspace`, which must outlive the returned context. -* `workspace` must be at least ZSTD_DCtxWorkspaceBound() bytes. -* Use one context per thread for parallel execution in multi-threaded environments. */ +/** + * struct ZSTD_DCtx - the zstd decompression context + * + * When decompressing many times it is recommended to allocate a context just + * once and reuse it for each successive decompression operation. + */ typedef struct ZSTD_DCtx_s ZSTD_DCtx; -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void* workspace, size_t workspaceSize); +/** + * ZSTD_createDCtx() - create a zstd decompression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. Use ZSTD_DCtxWorkspaceBound() to + * determine how large the workspace must be. + * + * Return: A decompression context emplaced into workspace. + */ +ZSTD_DCtx *ZSTD_createDCtx(void *workspace, size_t workspaceSize); -/*! ZSTD_decompressDCtx() : -* Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()). */ -ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +/** + * ZSTD_decompressDCtx() - decompress zstd compressed src into dst + * @ctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dstCapacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @srcSize: The exact size of the data to decompress. + * + * Return: The decompressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_decompressDCtx(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); -/*! ZSTD_decompress_usingDict() : -* Decompression using a predefined Dictionary (see dictBuilder/zdict.h). -* Dictionary must be identical to the one used during compression. -* Note : This function loads the dictionary, resulting in significant startup delay. -* Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ -ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void *dict, size_t dictSize); +/*-************************ + * Simple dictionary API + **************************/ -/**************************** -* Fast dictionary API -****************************/ -/*! ZSTD_CDictWorkspaceBound() : - * Returns the minimum amount of memory that needs to be passed to ZSTD_createCDict() when called with the given `params.cParams`. */ +/** + * ZSTD_compress_usingDict() - compress src into dst using a dictionary + * @ctx: The context. Must have been created with a workspace at least + * as large as ZSTD_CCtxWorkspaceBound(params.cParams). + * @dst: The buffer to compress src into. + * @dstCapacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @srcSize: The size of the data to compress. + * @dict: The dictionary to use for compression. + * @dictSize: The size of the dictionary. + * @params: The parameters to use for compression. See ZSTD_getParams(). + * + * Compression using a predefined dictionary. The same dictionary must be used + * during decompression. + * + * Return: The compressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize, const void *dict, size_t dictSize, + ZSTD_parameters params); + +/** + * ZSTD_decompress_usingDict() - decompress src into dst using a dictionary + * @ctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dstCapacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @srcSize: The exact size of the data to decompress. + * @dict: The dictionary to use for decompression. The same dictionary + * must've been used to compress the data. + * @dictSize: The size of the dictionary. + * + * Return: The decompressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_decompress_usingDict(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize, const void *dict, size_t dictSize); + +/*-************************** + * Fast dictionary API + ***************************/ + +/** + * ZSTD_CDictWorkspaceBound() - amount of memory needed to create a ZSTD_CDict + * @cParams: The compression parameters to be used for compression. + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_createCDict(). + */ size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams); +/** + * struct ZSTD_CDict - a digested dictionary to be used for compression + */ typedef struct ZSTD_CDict_s ZSTD_CDict; -/*! ZSTD_createCDict() : -* When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once. -* ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. -* ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only. -* `dictBuffer` content is referenced, and it must remain accessible throughout the lifetime of the CDict. -* The cdict pointer is placed in `workspace`, which must outlive the returned cdict. -* `workspace` must be at least ZSTD_CDictWorkspaceBound(params.cParams) bytes. */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, ZSTD_parameters params, void* workspace, size_t workspaceSize); +/** + * ZSTD_createCDict() - create a digested dictionary for compression + * @dictBuffer: The dictionary to digest. The buffer is referenced by the + * ZSTD_CDict so it must outlive the returned ZSTD_CDict. + * @dictSize: The size of the dictionary. + * @params: The parameters to use for compression. See ZSTD_getParams(). + * @workspace: The workspace. It must outlive the returned ZSTD_CDict. + * @workspaceSize: The workspace size. Must be at least + * ZSTD_CDictWorkspaceBound(params.cParams). + * + * When compressing multiple messages / blocks with the same dictionary it is + * recommended to load it just once. The ZSTD_CDict merely references the + * dictBuffer, so it must outlive the returned ZSTD_CDict. + * + * Return: The digested dictionary emplaced into workspace. + */ +ZSTD_CDict *ZSTD_createCDict(const void *dictBuffer, size_t dictSize, + ZSTD_parameters params, void *workspace, size_t workspaceSize); -/*! ZSTD_compress_usingCDict() : -* Compression using a digested Dictionary. -* Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. -* Note that compression level is decided during dictionary creation. */ -ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict); +/** + * ZSTD_compress_usingCDict() - compress src into dst using a ZSTD_CDict + * @ctx: The context. Must have been created with a workspace at least + * as large as ZSTD_CCtxWorkspaceBound(cParams) where cParams are + * the compression parameters used to create cdict. + * @dst: The buffer to compress src into. + * @dstCapacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @srcSize: The size of the data to compress. + * @cdict: The digested dictionary to use for compression. + * @params: The parameters to use for compression. See ZSTD_getParams(). + * + * Compression using a digested dictionary. The same dictionary must be used + * during decompression. + * + * Return: The compressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize, const ZSTD_CDict *cdict); -/*! ZSTD_DDictWorkspaceBound() : - * Returns the minimum amount of memory that needs to be passed to ZSTD_createDDict(). */ +/** + * ZSTD_DDictWorkspaceBound() - amount of memory needed to create a ZSTD_DDict + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_createDDict(). + */ size_t ZSTD_DDictWorkspaceBound(void); +/** + * struct ZSTD_DDict - a digested dictionary to be used for decompression + */ typedef struct ZSTD_DDict_s ZSTD_DDict; -/*! ZSTD_createDDict() : -* Create a digested dictionary, ready to start decompression operation without startup delay. -* `dictBuffer` content is referenced, and it must remain accessible throughout the lifetime of the DDict. -* The ddict pointer is placed in `workspace`, which must outlive the returned ddict. -* `workspace` must be at least ZSTD_DDictWorkspaceBound() bytes. */ -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize, void* workspace, size_t workspaceSize); +/** + * ZSTD_createDDict() - create a digested dictionary for decompression + * @dictBuffer: The dictionary to digest. The buffer is referenced by the + * ZSTD_DDict so it must outlive the returned ZSTD_DDict. + * @dictSize: The size of the dictionary. + * @workspace: The workspace. It must outlive the returned ZSTD_DDict. + * @workspaceSize: The workspace size. Must be at least + * ZSTD_DDictWorkspaceBound(). + * + * When decompressing multiple messages / blocks with the same dictionary it is + * recommended to load it just once. The ZSTD_DDict merely references the + * dictBuffer, so it must outlive the returned ZSTD_DDict. + * + * Return: The digested dictionary emplaced into workspace. + */ +ZSTD_DDict *ZSTD_createDDict(const void *dictBuffer, size_t dictSize, + void *workspace, size_t workspaceSize); -/*! ZSTD_decompress_usingDDict() : -* Decompression using a digested Dictionary. -* Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times. */ -ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_DDict* ddict); +/** + * ZSTD_decompress_usingDDict() - decompress src into dst using a ZSTD_DDict + * @ctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dstCapacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @srcSize: The exact size of the data to decompress. + * @ddict: The digested dictionary to use for decompression. The same + * dictionary must've been used to compress the data. + * + * Return: The decompressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, + size_t dstCapacity, const void *src, size_t srcSize, + const ZSTD_DDict *ddict); -/**************************** -* Streaming -****************************/ +/*-************************** + * Streaming + ***************************/ +/** + * struct ZSTD_inBuffer - input buffer for streaming + * @src: Start of the input buffer. + * @size: Size of the input buffer. + * @pos: Position where reading stopped. Will be updated. + * Necessarily 0 <= pos <= size. + */ typedef struct ZSTD_inBuffer_s { - const void* src; /**< start of input buffer */ - size_t size; /**< size of input buffer */ - size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ + const void *src; + size_t size; + size_t pos; } ZSTD_inBuffer; +/** + * struct ZSTD_outBuffer - output buffer for streaming + * @dst: Start of the output buffer. + * @size: Size of the output buffer. + * @pos: Position where writing stopped. Will be updated. + * Necessarily 0 <= pos <= size. + */ typedef struct ZSTD_outBuffer_s { - void* dst; /**< start of output buffer */ - size_t size; /**< size of output buffer */ - size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ + void *dst; + size_t size; + size_t pos; } ZSTD_outBuffer; -/*-*********************************************************************** -* Streaming compression - HowTo -* -* A ZSTD_CStream object is required to track streaming operation. -* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. -* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. -* It is recommended to re-use ZSTD_CStream in situations where many streaming operations will be achieved consecutively. -* Use one separate ZSTD_CStream per thread for parallel execution. -* -* Start a new compression by initializing ZSTD_CStream. -* Use ZSTD_initCStream() to start a new compression operation. -* Use ZSTD_initCStream_usingCDict() for a compression which requires a dictionary. -* -* Use ZSTD_compressStream() repetitively to consume input stream. -* The function will automatically update both `pos` fields. -* Note that it may not consume the entire input, in which case `pos < size`, -* and it's up to the caller to present again remaining data. -* @return : a size hint, preferred nb of bytes to use as input for next function call -* or an error code, which can be tested using ZSTD_isError(). -* Note 1 : it's just a hint, to help latency a little, any other value will work fine. -* Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize() -* -* At any moment, it's possible to flush whatever data remains within internal buffer, using ZSTD_flushStream(). -* `output->pos` will be updated. -* Note that some content might still be left within internal buffer if `output->size` is too small. -* @return : nb of bytes still present within internal buffer (0 if it's empty) -* or an error code, which can be tested using ZSTD_isError(). -* -* ZSTD_endStream() instructs to finish a frame. -* It will perform a flush and write frame epilogue. -* The epilogue is required for decoders to consider a frame completed. -* Similar to ZSTD_flushStream(), it may not be able to flush the full content if `output->size` is too small. -* In which case, call again ZSTD_endStream() to complete the flush. -* @return : nb of bytes still present within internal buffer (0 if it's empty, hence compression completed) -* or an error code, which can be tested using ZSTD_isError(). -* -* *******************************************************************/ +/*-***************************************************************************** + * Streaming compression - HowTo + * + * A ZSTD_CStream object is required to track streaming operation. + * Use ZSTD_createCStream() to create and initialize a ZSTD_CStream object. + * ZSTD_CStream objects can be reused multiple times on consecutive compression + * operations. It is recommended to re-use ZSTD_CStream in situations where many + * streaming operations will be achieved consecutively. Use one separate + * ZSTD_CStream per thread for parallel execution. + * + * Use ZSTD_compressStream() repetitively to consume input stream. + * The function will automatically update both `pos` fields. + * Note that it may not consume the entire input, in which case `pos < size`, + * and it's up to the caller to present again remaining data. + * It returns a hint for the preferred number of bytes to use as an input for + * the next function call. + * + * At any moment, it's possible to flush whatever data remains within internal + * buffer, using ZSTD_flushStream(). `output->pos` will be updated. There might + * still be some content left within the internal buffer if `output->size` is + * too small. It returns the number of bytes left in the internal buffer and + * must be called until it returns 0. + * + * ZSTD_endStream() instructs to finish a frame. It will perform a flush and + * write frame epilogue. The epilogue is required for decoders to consider a + * frame completed. Similar to ZSTD_flushStream(), it may not be able to flush + * the full content if `output->size` is too small. In which case, call again + * ZSTD_endStream() to complete the flush. It returns the number of bytes left + * in the internal buffer and must be called until it returns 0. + ******************************************************************************/ -/*! ZSTD_CStreamWorkspaceBound() : - * Returns the minimum amount of memory that needs to be passed to ZSTD_createCStream() or ZSTD_createCStream_usingCDict() - * when called with the given `params.cParams` or `cdict` created with `params.cParams`. */ +/** + * ZSTD_CStreamWorkspaceBound() - memory needed to create a ZSTD_CStream + * @cParams: The compression parameters to be used for compression. + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_createCStream() and ZSTD_createCStream_usingCDict(). + */ size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams); +/** + * struct ZSTD_CStream - the zstd streaming compression context + */ typedef struct ZSTD_CStream_s ZSTD_CStream; + /*===== ZSTD_CStream management functions =====*/ -/*! ZSTD_createCStream() : -* Creates a cstream using params. -* Callers may optionally provide the size of the source they intend to compress, or pass 0 if unknown. -* The stream is placed in `workspace`, which must outlive the returned stream. -* `workspace` must be at least ZSTD_CStreamWorkspaceBound(params.cParams) bytes. */ -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(ZSTD_parameters params, unsigned long long pledgedSrcSize, void* workspace, size_t workspaceSize); -/*! ZSTD_createCStream_usingCDict() : -* Similar to ZSTD_createCStream(), but use the given preprocessed dictionary. -*/ -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_usingCDict(const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize, void* workspace, size_t workspaceSize); +/** + * ZSTD_createCStream() - create a zstd streaming compression context + * @params: The zstd compression parameters. + * @pledgedSrcSize: If params.fParams.contentSizeFlag == 1 then the caller must + * pass the source size (zero means empty source). Otherwise, + * the caller may optionally pass the source size, or zero if + * unknown. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use ZSTD_CStreamWorkspaceBound(params.cParams) to determine + * how large the workspace must be. + * + * Return: The zstd streaming compression context. + */ +ZSTD_CStream *ZSTD_createCStream(ZSTD_parameters params, + unsigned long long pledgedSrcSize, void *workspace, + size_t workspaceSize); + +/** + * ZSTD_createCStream_usingCDict() - create a zstd streaming compression context + * @cdict: The digested dictionary to use for compression. + * @pledgedSrcSize: Optionally the source size, or zero if unknown. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. Call ZSTD_CStreamWorkspaceBound() + * with the cParams used to create the cdict to determine how + * large the workspace must be. + * + * Return: The zstd streaming compression context. + */ +ZSTD_CStream *ZSTD_createCStream_usingCDict(const ZSTD_CDict *cdict, + unsigned long long pledgedSrcSize, void *workspace, + size_t workspaceSize); /*===== Streaming compression functions =====*/ -ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); /**< re-use compression parameters from previous init; skip dictionary loading stage; zcs must be init at least once before. note: pledgedSrcSize must be correct, a size of 0 means unknown. for a frame size of 0 use initCStream_advanced */ -ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); -ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/** + * ZSTD_resetCStream() - reset the context using parameters from creation + * @zcs: The zstd streaming compression context to reset. + * @pledgedSrcSize: Optionally the source size, or zero if unknown. + * + * Resets the context using the parameters from creation. Skips dictionary + * loading, since it can be reused. If `pledgedSrcSize` is non-zero the frame + * content size is always written into the frame header. + * + * Return: Zero or an error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize); +/** + * ZSTD_compressStream() - streaming compress some of input into output + * @zcs: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * @input: Source buffer. `input->pos` is updated to indicate how much data was + * read. Note that it may not consume the entire input, in which case + * `input->pos < input->size`, and it's up to the caller to present + * remaining data again. + * + * The `input` and `output` buffers may be any size. Guaranteed to make some + * forward progress if `input` and `output` are not empty. + * + * Return: A hint for the number of bytes to use as the input for the next + * function call or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output, + ZSTD_inBuffer *input); +/** + * ZSTD_flushStream() - flush internal buffers into output + * @zcs: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * + * ZSTD_flushStream() must be called until it returns 0, meaning all the data + * has been flushed. Since ZSTD_flushStream() causes a block to be ended, + * calling it too often will degrade the compression ratio. + * + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); +/** + * ZSTD_endStream() - flush internal buffers into output and end the frame + * @zcs: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * + * ZSTD_endStream() must be called until it returns 0, meaning all the data has + * been flushed and the frame epilogue has been written. + * + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); -ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */ +/** + * ZSTD_CStreamInSize() - recommended size for the input buffer + * + * Return: The recommended size for the input buffer. + */ +size_t ZSTD_CStreamInSize(void); +/** + * ZSTD_CStreamOutSize() - recommended size for the output buffer + * + * When the output buffer is at least this large, it is guaranteed to be large + * enough to flush at least one complete compressed block. + * + * Return: The recommended size for the output buffer. + */ +size_t ZSTD_CStreamOutSize(void); -/*-*************************************************************************** -* Streaming decompression - HowTo -* -* A ZSTD_DStream object is required to track streaming operations. -* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. -* ZSTD_DStream objects can be re-used multiple times. -* -* Use ZSTD_initDStream() to start a new decompression operation, -* or ZSTD_initDStream_usingDict() if decompression requires a dictionary. -* @return : recommended first input size -* -* Use ZSTD_decompressStream() repetitively to consume your input. -* The function will update both `pos` fields. -* If `input.pos < input.size`, some input has not been consumed. -* It's up to the caller to present again remaining data. -* If `output.pos < output.size`, decoder has flushed everything it could. -* @return : 0 when a frame is completely decoded and fully flushed, -* an error code, which can be tested using ZSTD_isError(), -* any other value > 0, which means there is still some decoding to do to complete current frame. -* The return value is a suggested next input size (a hint to improve latency) that will never load more than the current frame. -* *******************************************************************************/ +/*-***************************************************************************** + * Streaming decompression - HowTo + * + * A ZSTD_DStream object is required to track streaming operations. + * Use ZSTD_createDStream() to initialize a ZSTD_DStream object. + * ZSTD_DStream objects can be re-used multiple times. + * + * Use ZSTD_decompressStream() repetitively to consume your input. + * The function will update both `pos` fields. + * If `input->pos < input->size`, some input has not been consumed. + * It's up to the caller to present again remaining data. + * If `output->pos < output->size`, decoder has flushed everything it could. + * Returns 0 iff a frame is completely decoded and fully flushed. + * Otherwise it returns a suggested next input size that will never load more + * than the current frame. + ******************************************************************************/ -/*! ZSTD_DStreamWorkspaceBound() : - * Returns the minimum amount of memory that needs to be passed to ZSTD_createDStream() to decompress frames with windowSize <= maxWindowSize. */ +/** + * ZSTD_DStreamWorkspaceBound() - memory needed to create a ZSTD_DStream + * @maxWindowSize: The maximum window size allowed for compressed frames. + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_createDStream() and ZSTD_createDStream_usingDDict(). + */ size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize); +/** + * struct ZSTD_DStream - the zstd streaming decompression context + */ typedef struct ZSTD_DStream_s ZSTD_DStream; /*===== ZSTD_DStream management functions =====*/ -/*! ZSTD_createDStream() : -* Creates a dstream that can decompress frames with windowSize up to maxWindowSize. -* The stream is placed in `workspace`, which must outlive the returned stream. -* `workspace` must be at least ZSTD_DStreamWorkspaceBound(maxWindowSize) bytes. */ -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(size_t maxWindowSize, void* workspace, size_t workspaceSize); -/*! ZSTD_createDStream_usingDDict() : -* Similar to ZSTD_createCStream(), but use the given preprocessed dictionary. */ -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_usingDDict(size_t maxWindowSize, const ZSTD_DDict* ddict, void* workspace, size_t workspaceSize); +/** + * ZSTD_createDStream() - create a zstd streaming decompression context + * @maxWindowSize: The maximum window size allowed for compressed frames. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine + * how large the workspace must be. + * + * Return: The zstd streaming decompression context. + */ +ZSTD_DStream *ZSTD_createDStream(size_t maxWindowSize, void *workspace, + size_t workspaceSize); +/** + * ZSTD_createDStream_usingDDict() - create zstd streaming decompression context + * @maxWindowSize: The maximum window size allowed for compressed frames. + * @ddict: The digested dictionary to use for decompression. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine + * how large the workspace must be. + * + * Return: The zstd streaming decompression context. + */ +ZSTD_DStream *ZSTD_createDStream_usingDDict(size_t maxWindowSize, + const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize); /*===== Streaming decompression functions =====*/ -ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */ -ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +/** + * ZSTD_resetDStream() - reset the context using parameters from creation + * @zds: The zstd streaming decompression context to reset. + * + * Resets the context using the parameters from creation. Skips dictionary + * loading, since it can be reused. + * + * Return: Zero or an error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_resetDStream(ZSTD_DStream *zds); +/** + * ZSTD_decompressStream() - streaming decompress some of input into output + * @zds: The zstd streaming decompression context. + * @output: Destination buffer. `output.pos` is updated to indicate how much + * decompressed data was written. + * @input: Source buffer. `input.pos` is updated to indicate how much data was + * read. Note that it may not consume the entire input, in which case + * `input.pos < input.size`, and it's up to the caller to present + * remaining data again. + * + * The `input` and `output` buffers may be any size. Guaranteed to make some + * forward progress if `input` and `output` are not empty. + * ZSTD_decompressStream() will not consume the last byte of the frame until + * the entire frame is flushed. + * + * Return: Returns 0 iff a frame is completely decoded and fully flushed. + * Otherwise returns a hint for the number of bytes to use as the input + * for the next function call or an error, which can be checked using + * ZSTD_isError(). The size hint will never load more than the frame. + */ +size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, + ZSTD_inBuffer *input); -ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ +/** + * ZSTD_DStreamInSize() - recommended size for the input buffer + * + * Return: The recommended size for the input buffer. + */ +size_t ZSTD_DStreamInSize(void); +/** + * ZSTD_DStreamOutSize() - recommended size for the output buffer + * + * When the output buffer is at least this large, it is guaranteed to be large + * enough to flush at least one complete decompressed block. + * + * Return: The recommended size for the output buffer. + */ +size_t ZSTD_DStreamOutSize(void); /* --- Constants ---*/ @@ -366,281 +761,389 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output #define ZSTD_WINDOWLOG_MAX_32 27 #define ZSTD_WINDOWLOG_MAX_64 27 -#define ZSTD_WINDOWLOG_MAX ((unsigned)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) -#define ZSTD_WINDOWLOG_MIN 10 -#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX +#define ZSTD_WINDOWLOG_MAX \ + ((unsigned int)(sizeof(size_t) == 4 \ + ? ZSTD_WINDOWLOG_MAX_32 \ + : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX #define ZSTD_HASHLOG_MIN 6 #define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) #define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN #define ZSTD_HASHLOG3_MAX 17 #define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) #define ZSTD_SEARCHLOG_MIN 1 -#define ZSTD_SEARCHLENGTH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ -#define ZSTD_SEARCHLENGTH_MIN 3 /* only for ZSTD_btopt, other strategies are limited to 4 */ +/* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_SEARCHLENGTH_MAX 7 +/* only for ZSTD_btopt, other strategies are limited to 4 */ +#define ZSTD_SEARCHLENGTH_MIN 3 #define ZSTD_TARGETLENGTH_MIN 4 #define ZSTD_TARGETLENGTH_MAX 999 -#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */ +/* for static allocation */ +#define ZSTD_FRAMEHEADERSIZE_MAX 18 #define ZSTD_FRAMEHEADERSIZE_MIN 6 static const size_t ZSTD_frameHeaderSize_prefix = 5; static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN; static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; -static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */ +/* magic number + skippable frame length */ +static const size_t ZSTD_skippableHeaderSize = 8; -/*************************************** -* Compressed size functions -***************************************/ +/*-************************************* + * Compressed size functions + **************************************/ -/*! ZSTD_findFrameCompressedSize() : - * `src` should point to the start of a ZSTD encoded frame or skippable frame - * `srcSize` must be at least as large as the frame - * @return : the compressed size of the frame pointed to by `src`, suitable to pass to - * `ZSTD_decompress` or similar, or an error code if given invalid input. */ -ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); +/** + * ZSTD_findFrameCompressedSize() - returns the size of a compressed frame + * @src: Source buffer. It should point to the start of a zstd encoded frame + * or a skippable frame. + * @srcSize: The size of the source buffer. It must be at least as large as the + * size of the frame. + * + * Return: The compressed size of the frame pointed to by `src` or an error, + * which can be check with ZSTD_isError(). + * Suitable to pass to ZSTD_decompress() or similar functions. + */ +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize); -/*************************************** -* Decompressed size functions -***************************************/ -/*! ZSTD_getFrameContentSize() : -* `src` should point to the start of a ZSTD encoded frame -* `srcSize` must be at least as large as the frame header. A value greater than or equal -* to `ZSTD_frameHeaderSize_max` is guaranteed to be large enough in all cases. -* @return : decompressed size of the frame pointed to be `src` if known, otherwise -* - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined -* - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ -ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); +/*-************************************* + * Decompressed size functions + **************************************/ +/** + * ZSTD_getFrameContentSize() - returns the content size in a zstd frame header + * @src: It should point to the start of a zstd encoded frame. + * @srcSize: The size of the source buffer. It must be at least as large as the + * frame header. `ZSTD_frameHeaderSize_max` is always large enough. + * + * Return: The frame content size stored in the frame header if known. + * `ZSTD_CONTENTSIZE_UNKNOWN` if the content size isn't stored in the + * frame header. `ZSTD_CONTENTSIZE_ERROR` on invalid input. + */ +unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); -/*! ZSTD_findDecompressedSize() : -* `src` should point the start of a series of ZSTD encoded and/or skippable frames -* `srcSize` must be the _exact_ size of this series -* (i.e. there should be a frame boundary exactly `srcSize` bytes after `src`) -* @return : the decompressed size of all data in the contained frames, as a 64-bit value _if known_ -* - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN -* - if an error occurred: ZSTD_CONTENTSIZE_ERROR -* -* note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. -* When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. -* In which case, it's necessary to use streaming mode to decompress data. -* Optionally, application can still use ZSTD_decompress() while relying on implied limits. -* (For example, data may be necessarily cut into blocks <= 16 KB). -* note 2 : decompressed size is always present when compression is done with ZSTD_compress() -* note 3 : decompressed size can be very large (64-bits value), -* potentially larger than what local system can handle as a single memory segment. -* In which case, it's necessary to use streaming mode to decompress data. -* note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. -* Always ensure result fits within application's authorized limits. -* Each application can set its own limits. -* note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to -* read each contained frame header. This is efficient as most of the data is skipped, -* however it does mean that all frame data must be present and valid. */ -ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); +/** + * ZSTD_findDecompressedSize() - returns decompressed size of a series of frames + * @src: It should point to the start of a series of zstd encoded and/or + * skippable frames. + * @srcSize: The exact size of the series of frames. + * + * If any zstd encoded frame in the series doesn't have the frame content size + * set, `ZSTD_CONTENTSIZE_UNKNOWN` is returned. But frame content size is always + * set when using ZSTD_compress(). The decompressed size can be very large. + * If the source is untrusted, the decompressed size could be wrong or + * intentionally modified. Always ensure the result fits within the + * application's authorized limits. ZSTD_findDecompressedSize() handles multiple + * frames, and so it must traverse the input to read each frame header. This is + * efficient as most of the data is skipped, however it does mean that all frame + * data must be present and valid. + * + * Return: Decompressed size of all the data contained in the frames if known. + * `ZSTD_CONTENTSIZE_UNKNOWN` if the decompressed size is unknown. + * `ZSTD_CONTENTSIZE_ERROR` if an error occurred. + */ +unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize); -/*************************************** -* Advanced compression functions -***************************************/ -/*! ZSTD_checkCParams() : -* Ensure param values remain within authorized range */ -ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); +/*-************************************* + * Advanced compression functions + **************************************/ +/** + * ZSTD_checkCParams() - ensure parameter values remain within authorized range + * @cParams: The zstd compression parameters. + * + * Return: Zero or an error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams); -/*! ZSTD_adjustCParams() : -* optimize params for a given `srcSize` and `dictSize`. -* both values are optional, select `0` if unknown. */ -ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); +/** + * ZSTD_adjustCParams() - optimize parameters for a given srcSize and dictSize + * @srcSize: Optionally the estimated source size, or zero if unknown. + * @dictSize: Optionally the estimated dictionary size, or zero if unknown. + * + * Return: The optimized parameters. + */ +ZSTD_compressionParameters ZSTD_adjustCParams( + ZSTD_compressionParameters cParams, unsigned long long srcSize, + size_t dictSize); /*--- Advanced decompression functions ---*/ -/*! ZSTD_isFrame() : - * Tells if the content of `buffer` starts with a valid Frame Identifier. - * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. - * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. - * Note 3 : Skippable Frame Identifiers are considered valid. */ -ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); - -/*! ZSTD_getDictID_fromDict() : - * Provides the dictID stored within dictionary. - * if @return == 0, the dictionary is not conformant with Zstandard specification. - * It can still be loaded, but as a content-only dictionary. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); - -/*! ZSTD_getDictID_fromDDict() : - * Provides the dictID of the dictionary loaded into `ddict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); - -/*! ZSTD_getDictID_fromFrame() : - * Provides the dictID required to decompressed the frame stored within `src`. - * If @return == 0, the dictID could not be decoded. - * This could for one of the following reasons : - * - The frame does not require a dictionary to be decoded (most common case). - * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). - * - This is not a Zstandard frame. - * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); - - -/********************************************************************* -* Buffer-less and synchronous inner streaming functions -* -* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. -* But it's also a complex one, with many restrictions (documented below). -* Prefer using normal streaming API for an easier experience -********************************************************************* */ +/** + * ZSTD_isFrame() - returns true iff the buffer starts with a valid frame + * @buffer: The source buffer to check. + * @size: The size of the source buffer, must be at least 4 bytes. + * + * Return: True iff the buffer starts with a zstd or skippable frame identifier. + */ +unsigned int ZSTD_isFrame(const void *buffer, size_t size); /** - Buffer-less streaming compression (synchronous mode) + * ZSTD_getDictID_fromDict() - returns the dictionary id stored in a dictionary + * @dict: The dictionary buffer. + * @dictSize: The size of the dictionary buffer. + * + * Return: The dictionary id stored within the dictionary or 0 if the + * dictionary is not a zstd dictionary. If it returns 0 the + * dictionary can still be loaded as a content-only dictionary. + */ +unsigned int ZSTD_getDictID_fromDict(const void *dict, size_t dictSize); - A ZSTD_CCtx object is required to track streaming operations. - Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. - ZSTD_CCtx object can be re-used multiple times within successive compression operations. +/** + * ZSTD_getDictID_fromDDict() - returns the dictionary id stored in a ZSTD_DDict + * @ddict: The ddict to find the id of. + * + * Return: The dictionary id stored within `ddict` or 0 if the dictionary is not + * a zstd dictionary. If it returns 0 `ddict` will be loaded as a + * content-only dictionary. + */ +unsigned int ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict); - Start by initializing a context. - Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, - or ZSTD_compressBegin_advanced(), for finer parameter control. - It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() - - Then, consume your input using ZSTD_compressContinue(). - There are some important considerations to keep in mind when using this advanced function : - - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffer only. - - Interface is synchronous : input is consumed entirely and produce 1+ (or more) compressed blocks. - - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. - Worst case evaluation is provided by ZSTD_compressBound(). - ZSTD_compressContinue() doesn't guarantee recover after a failed compression. - - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). - It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) - - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. - In which case, it will "discard" the relevant memory section from its history. - - Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. - It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. - Without last block mark, frames will be considered unfinished (corrupted) by decoders. - - `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame. -*/ - -/*===== Buffer-less streaming compression functions =====*/ -ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */ -ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */ -ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */ -ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - - - -/*- - Buffer-less streaming decompression (synchronous mode) - - A ZSTD_DCtx object is required to track streaming operations. - Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. - A ZSTD_DCtx object can be re-used multiple times. - - First typical operation is to retrieve frame parameters, using ZSTD_getFrameParams(). - It fills a ZSTD_frameParams structure which provide important information to correctly decode the frame, - such as the minimum rolling buffer size to allocate to decompress data (`windowSize`), - and the dictionary ID used. - (Note : content size is optional, it may not be present. 0 means : content size unknown). - Note that these values could be wrong, either because of data malformation, or because an attacker is spoofing deliberate false information. - As a consequence, check that values remain within valid application range, especially `windowSize`, before allocation. - Each application can set its own limit, depending on local restrictions. For extended interoperability, it is recommended to support at least 8 MB. - Frame parameters are extracted from the beginning of the compressed frame. - Data fragment must be large enough to ensure successful decoding, typically `ZSTD_frameHeaderSize_max` bytes. - @result : 0 : successful decoding, the `ZSTD_frameParams` structure is correctly filled. - >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. - errorCode, which can be tested using ZSTD_isError(). - - Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict(). - Alternatively, you can copy a prepared context, using ZSTD_copyDCtx(). - - Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. - ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). - ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. - - @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). - It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some metadata item. - It can also be an error code, which can be tested with ZSTD_isError(). - - ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize`. - They should preferably be located contiguously, prior to current block. - Alternatively, a round buffer of sufficient size is also possible. Sufficient size is determined by frame parameters. - ZSTD_decompressContinue() is very sensitive to contiguity, - if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, - or that previous contiguous segment is large enough to properly handle maximum back-reference. - - A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. - Context can then be reset to start a new decompression. - - Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). - This information is not required to properly decode a frame. - - == Special case : skippable frames == - - Skippable frames allow integration of user-defined data into a flow of concatenated frames. - Skippable frames will be ignored (skipped) by a decompressor. The format of skippable frames is as follows : - a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F - b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits - c) Frame Content - any content (User Data) of length equal to Frame Size - For skippable frames ZSTD_decompressContinue() always returns 0. - For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 what means that a frame is skippable. - Note : If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might actually be a Zstd encoded frame with no content. - For purposes of decompression, it is valid in both cases to skip the frame using - ZSTD_findFrameCompressedSize to find its size in bytes. - It also returns Frame Size as fparamsPtr->frameContentSize. -*/ +/** + * ZSTD_getDictID_fromFrame() - returns the dictionary id stored in a zstd frame + * @src: Source buffer. It must be a zstd encoded frame. + * @srcSize: The size of the source buffer. It must be at least as large as the + * frame header. `ZSTD_frameHeaderSize_max` is always large enough. + * + * Return: The dictionary id required to decompress the frame stored within + * `src` or 0 if the dictionary id could not be decoded. It can return + * 0 if the frame does not require a dictionary, the dictionary id + * wasn't stored in the frame, `src` is not a zstd frame, or `srcSize` + * is too small. + */ +unsigned int ZSTD_getDictID_fromFrame(const void *src, size_t srcSize); +/** + * struct ZSTD_frameParams - zstd frame parameters stored in the frame header + * @frameContentSize: The frame content size, or 0 if not present. + * @windowSize: The window size, or 0 if the frame is a skippable frame. + * @dictID: The dictionary id, or 0 if not present. + * @checksumFlag: Whether a checksum was used. + */ typedef struct { unsigned long long frameContentSize; - unsigned windowSize; - unsigned dictID; - unsigned checksumFlag; + unsigned int windowSize; + unsigned int dictID; + unsigned int checksumFlag; } ZSTD_frameParams; -/*===== Buffer-less streaming decompression functions =====*/ -ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input, see details below */ -ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); -ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; -ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); - /** - Block functions + * ZSTD_getFrameParams() - extracts parameters from a zstd or skippable frame + * @fparamsPtr: On success the frame parameters are written here. + * @src: The source buffer. It must point to a zstd or skippable frame. + * @srcSize: The size of the source buffer. `ZSTD_frameHeaderSize_max` is + * always large enough to succeed. + * + * Return: 0 on success. If more data is required it returns how many bytes + * must be provided to make forward progress. Otherwise it returns + * an error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, + size_t srcSize); - Block functions produce and decode raw zstd blocks, without frame metadata. - Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). - User will have to take in charge required information to regenerate data, such as compressed and content sizes. +/*-***************************************************************************** + * Buffer-less and synchronous inner streaming functions + * + * This is an advanced API, giving full control over buffer management, for + * users which need direct control over memory. + * But it's also a complex one, with many restrictions (documented below). + * Prefer using normal streaming API for an easier experience + ******************************************************************************/ - A few rules to respect : - - Compressing and decompressing require a context structure - + Use ZSTD_createCCtx() and ZSTD_createDCtx() - - It is necessary to init context before starting - + compression : ZSTD_compressBegin() - + decompression : ZSTD_decompressBegin() - + variants _usingDict() are also allowed - + copyCCtx() and copyDCtx() work too - - Block size is limited, it must be <= ZSTD_getBlockSizeMax() - + If you need to compress more, cut data into multiple blocks - + Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large. - - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero. - In which case, nothing is produced into `dst`. - + User must test for such outcome and deal directly with uncompressed data - + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!! - + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history. - Use ZSTD_insertBlock() in such a case. -*/ +/*-***************************************************************************** + * Buffer-less streaming compression (synchronous mode) + * + * A ZSTD_CCtx object is required to track streaming operations. + * Use ZSTD_createCCtx() to create a context. + * ZSTD_CCtx object can be re-used multiple times within successive compression + * operations. + * + * Start by initializing a context. + * Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary + * compression, + * or ZSTD_compressBegin_advanced(), for finer parameter control. + * It's also possible to duplicate a reference context which has already been + * initialized, using ZSTD_copyCCtx() + * + * Then, consume your input using ZSTD_compressContinue(). + * There are some important considerations to keep in mind when using this + * advanced function : + * - ZSTD_compressContinue() has no internal buffer. It uses externally provided + * buffer only. + * - Interface is synchronous : input is consumed entirely and produce 1+ + * (or more) compressed blocks. + * - Caller must ensure there is enough space in `dst` to store compressed data + * under worst case scenario. Worst case evaluation is provided by + * ZSTD_compressBound(). + * ZSTD_compressContinue() doesn't guarantee recover after a failed + * compression. + * - ZSTD_compressContinue() presumes prior input ***is still accessible and + * unmodified*** (up to maximum distance size, see WindowLog). + * It remembers all previous contiguous blocks, plus one separated memory + * segment (which can itself consists of multiple contiguous blocks) + * - ZSTD_compressContinue() detects that prior input has been overwritten when + * `src` buffer overlaps. In which case, it will "discard" the relevant memory + * section from its history. + * + * Finish a frame with ZSTD_compressEnd(), which will write the last block(s) + * and optional checksum. It's possible to use srcSize==0, in which case, it + * will write a final empty block to end the frame. Without last block mark, + * frames will be considered unfinished (corrupted) by decoders. + * + * `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new + * frame. + ******************************************************************************/ -#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) /* define, for static allocation */ +/*===== Buffer-less streaming compression functions =====*/ +size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel); +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict, + size_t dictSize, int compressionLevel); +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict, + size_t dictSize, ZSTD_parameters params, + unsigned long long pledgedSrcSize); +size_t ZSTD_copyCCtx(ZSTD_CCtx *cctx, const ZSTD_CCtx *preparedCCtx, + unsigned long long pledgedSrcSize); +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict, + unsigned long long pledgedSrcSize); +size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); +size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); + + + +/*-***************************************************************************** + * Buffer-less streaming decompression (synchronous mode) + * + * A ZSTD_DCtx object is required to track streaming operations. + * Use ZSTD_createDCtx() to create a context. + * A ZSTD_DCtx object can be re-used multiple times. + * + * First typical operation is to retrieve frame parameters, using + * ZSTD_getFrameParams(). It fills a ZSTD_frameParams structure which provide + * important information to correctly decode the frame, such as the minimum + * rolling buffer size to allocate to decompress data (`windowSize`), and the + * dictionary ID used. + * Note: content size is optional, it may not be present. 0 means unknown. + * Note that these values could be wrong, either because of data malformation, + * or because an attacker is spoofing deliberate false information. As a + * consequence, check that values remain within valid application range, + * especially `windowSize`, before allocation. Each application can set its own + * limit, depending on local restrictions. For extended interoperability, it is + * recommended to support at least 8 MB. + * Frame parameters are extracted from the beginning of the compressed frame. + * Data fragment must be large enough to ensure successful decoding, typically + * `ZSTD_frameHeaderSize_max` bytes. + * Result: 0: successful decoding, the `ZSTD_frameParams` structure is filled. + * >0: `srcSize` is too small, provide at least this many bytes. + * errorCode, which can be tested using ZSTD_isError(). + * + * Start decompression, with ZSTD_decompressBegin() or + * ZSTD_decompressBegin_usingDict(). Alternatively, you can copy a prepared + * context, using ZSTD_copyDCtx(). + * + * Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() + * alternatively. + * ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' + * to ZSTD_decompressContinue(). + * ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will + * fail. + * + * The result of ZSTD_decompressContinue() is the number of bytes regenerated + * within 'dst' (necessarily <= dstCapacity). It can be zero, which is not an + * error; it just means ZSTD_decompressContinue() has decoded some metadata + * item. It can also be an error code, which can be tested with ZSTD_isError(). + * + * ZSTD_decompressContinue() needs previous data blocks during decompression, up + * to `windowSize`. They should preferably be located contiguously, prior to + * current block. Alternatively, a round buffer of sufficient size is also + * possible. Sufficient size is determined by frame parameters. + * ZSTD_decompressContinue() is very sensitive to contiguity, if 2 blocks don't + * follow each other, make sure that either the compressor breaks contiguity at + * the same place, or that previous contiguous segment is large enough to + * properly handle maximum back-reference. + * + * A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + * Context can then be reset to start a new decompression. + * + * Note: it's possible to know if next input to present is a header or a block, + * using ZSTD_nextInputType(). This information is not required to properly + * decode a frame. + * + * == Special case: skippable frames == + * + * Skippable frames allow integration of user-defined data into a flow of + * concatenated frames. Skippable frames will be ignored (skipped) by a + * decompressor. The format of skippable frames is as follows: + * a) Skippable frame ID - 4 Bytes, Little endian format, any value from + * 0x184D2A50 to 0x184D2A5F + * b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + * c) Frame Content - any content (User Data) of length equal to Frame Size + * For skippable frames ZSTD_decompressContinue() always returns 0. + * For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 + * what means that a frame is skippable. + * Note: If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might + * actually be a zstd encoded frame with no content. For purposes of + * decompression, it is valid in both cases to skip the frame using + * ZSTD_findFrameCompressedSize() to find its size in bytes. + * It also returns frame size as fparamsPtr->frameContentSize. + ******************************************************************************/ + +/*===== Buffer-less streaming decompression functions =====*/ +size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx); +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, + size_t dictSize); +void ZSTD_copyDCtx(ZSTD_DCtx *dctx, const ZSTD_DCtx *preparedDCtx); +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx); +size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); +typedef enum { + ZSTDnit_frameHeader, + ZSTDnit_blockHeader, + ZSTDnit_block, + ZSTDnit_lastBlock, + ZSTDnit_checksum, + ZSTDnit_skippableFrame +} ZSTD_nextInputType_e; +ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx); + +/*-***************************************************************************** + * Block functions + * + * Block functions produce and decode raw zstd blocks, without frame metadata. + * Frame metadata cost is typically ~18 bytes, which can be non-negligible for + * very small blocks (< 100 bytes). User will have to take in charge required + * information to regenerate data, such as compressed and content sizes. + * + * A few rules to respect: + * - Compressing and decompressing require a context structure + * + Use ZSTD_createCCtx() and ZSTD_createDCtx() + * - It is necessary to init context before starting + * + compression : ZSTD_compressBegin() + * + decompression : ZSTD_decompressBegin() + * + variants _usingDict() are also allowed + * + copyCCtx() and copyDCtx() work too + * - Block size is limited, it must be <= ZSTD_getBlockSizeMax() + * + If you need to compress more, cut data into multiple blocks + * + Consider using the regular ZSTD_compress() instead, as frame metadata + * costs become negligible when source size is large. + * - When a block is considered not compressible enough, ZSTD_compressBlock() + * result will be zero. In which case, nothing is produced into `dst`. + * + User must test for such outcome and deal directly with uncompressed data + * + ZSTD_decompressBlock() doesn't accept uncompressed data as input!!! + * + In case of multiple successive blocks, decoder must be informed of + * uncompressed block existence to follow proper history. Use + * ZSTD_insertBlock() in such a case. + ******************************************************************************/ + +/* Define for static allocation */ +#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) /*===== Raw zstd block functions =====*/ -ZSTDLIB_API size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx); -ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert block into `dctx` history. Useful for uncompressed blocks */ +size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx); +size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); +size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); +size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, + size_t blockSize); - -#endif /* ZSTD_H_235446 */ +#endif /* ZSTD_H */ From b4dd3378f10b7ba83077826129f59805176ef728 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 11 Apr 2017 11:35:36 -0700 Subject: [PATCH 3/4] Add BtrFS patch for 4.10 kernel --- contrib/linux-kernel/README.md | 10 +- contrib/linux-kernel/btrfs.diff | 633 +++++++++++++++++++++++++++ contrib/linux-kernel/fs/btrfs/zstd.c | 129 +++--- 3 files changed, 703 insertions(+), 69 deletions(-) create mode 100644 contrib/linux-kernel/btrfs.diff diff --git a/contrib/linux-kernel/README.md b/contrib/linux-kernel/README.md index 11938ad6..1cc74cca 100644 --- a/contrib/linux-kernel/README.md +++ b/contrib/linux-kernel/README.md @@ -1,8 +1,7 @@ # Linux Kernel Patch There are two pieces, the `zstd_compress` and `zstd_decompress` kernel modules, and the BtrFS patch. -The patches are based off of the linux kernel version 4.9. -The BtrFS patch is not present in its entirety yet. +The patches are based off of the linux kernel master branch (version 4.10). ## Zstd Kernel modules @@ -21,6 +20,7 @@ The BtrFS patch is not present in its entirety yet. ## BtrFS -* `fs/btrfs/zstd.c` is provided. -* Some more glue is required to integrate it with BtrFS, but I haven't included the patches yet. - In the meantime see https://github.com/terrelln/linux/commit/1914f7d4ca6c539369c84853eafa4ac104883047 if you're interested. +* The patch is located in `btrfs.diff`. +* Additionally `fs/btrfs/zstd.c` is provided as a source for convenience. +* The patch seems to be working, it doesn't crash the kernel, and compresses at speeds and ratios athat are expected. + It can still use some more testing for fringe features, like printing options. diff --git a/contrib/linux-kernel/btrfs.diff b/contrib/linux-kernel/btrfs.diff new file mode 100644 index 00000000..b0f8b924 --- /dev/null +++ b/contrib/linux-kernel/btrfs.diff @@ -0,0 +1,633 @@ +diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig +index 80e9c18..a26c63b 100644 +--- a/fs/btrfs/Kconfig ++++ b/fs/btrfs/Kconfig +@@ -6,6 +6,8 @@ config BTRFS_FS + select ZLIB_DEFLATE + select LZO_COMPRESS + select LZO_DECOMPRESS ++ select ZSTD_COMPRESS ++ select ZSTD_DECOMPRESS + select RAID6_PQ + select XOR_BLOCKS + select SRCU +diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile +index 128ce17..962a95a 100644 +--- a/fs/btrfs/Makefile ++++ b/fs/btrfs/Makefile +@@ -6,7 +6,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ + transaction.o inode.o file.o tree-defrag.o \ + extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ + extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ +- export.o tree-log.o free-space-cache.o zlib.o lzo.o \ ++ export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \ + compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ + reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ + uuid-tree.o props.o hash.o free-space-tree.o +diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c +index c7721a6..66d4ced 100644 +--- a/fs/btrfs/compression.c ++++ b/fs/btrfs/compression.c +@@ -761,6 +761,7 @@ static struct { + static const struct btrfs_compress_op * const btrfs_compress_op[] = { + &btrfs_zlib_compress, + &btrfs_lzo_compress, ++ &btrfs_zstd_compress, + }; + + void __init btrfs_init_compress(void) +diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h +index 39ec43a..d99fc21 100644 +--- a/fs/btrfs/compression.h ++++ b/fs/btrfs/compression.h +@@ -60,8 +60,9 @@ enum btrfs_compression_type { + BTRFS_COMPRESS_NONE = 0, + BTRFS_COMPRESS_ZLIB = 1, + BTRFS_COMPRESS_LZO = 2, +- BTRFS_COMPRESS_TYPES = 2, +- BTRFS_COMPRESS_LAST = 3, ++ BTRFS_COMPRESS_ZSTD = 3, ++ BTRFS_COMPRESS_TYPES = 3, ++ BTRFS_COMPRESS_LAST = 4, + }; + + struct btrfs_compress_op { +@@ -92,5 +93,6 @@ struct btrfs_compress_op { + + extern const struct btrfs_compress_op btrfs_zlib_compress; + extern const struct btrfs_compress_op btrfs_lzo_compress; ++extern const struct btrfs_compress_op btrfs_zstd_compress; + + #endif +diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h +index 29b7fc2..878b23b9 100644 +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -270,6 +270,7 @@ struct btrfs_super_block { + BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ + BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ + BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ ++ BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \ + BTRFS_FEATURE_INCOMPAT_RAID56 | \ + BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \ +diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c +index 08b74da..0c43e4e 100644 +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -2853,6 +2853,8 @@ int open_ctree(struct super_block *sb, + features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; + if (fs_info->compress_type == BTRFS_COMPRESS_LZO) + features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; ++ else if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_ZSTD) ++ features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD; + + if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) + btrfs_info(fs_info, "has skinny extents"); +diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c +index dabfc7a..d8ea727 100644 +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -327,8 +327,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) + + if (fs_info->compress_type == BTRFS_COMPRESS_LZO) + comp = "lzo"; +- else ++ else if (fs_info->compress_type == BTRFS_COMPRESS_ZLIB) + comp = "zlib"; ++ else ++ comp = "zstd"; + ret = btrfs_set_prop(inode, "btrfs.compression", + comp, strlen(comp), 0); + if (ret) +@@ -1463,6 +1465,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, + + if (range->compress_type == BTRFS_COMPRESS_LZO) { + btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); ++ } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) { ++ btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); + } + + ret = defrag_count; +diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c +index d6cb155..162105f 100644 +--- a/fs/btrfs/props.c ++++ b/fs/btrfs/props.c +@@ -383,6 +383,8 @@ static int prop_compression_validate(const char *value, size_t len) + return 0; + else if (!strncmp("zlib", value, len)) + return 0; ++ else if (!strncmp("zstd", value, len)) ++ return 0; + + return -EINVAL; + } +@@ -405,6 +407,8 @@ static int prop_compression_apply(struct inode *inode, + type = BTRFS_COMPRESS_LZO; + else if (!strncmp("zlib", value, len)) + type = BTRFS_COMPRESS_ZLIB; ++ else if (!strncmp("zstd", value, len)) ++ type = BTRFS_COMPRESS_ZSTD; + else + return -EINVAL; + +@@ -422,6 +426,8 @@ static const char *prop_compression_extract(struct inode *inode) + return "zlib"; + case BTRFS_COMPRESS_LZO: + return "lzo"; ++ case BTRFS_COMPRESS_ZSTD: ++ return "zstd"; + } + + return NULL; +diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c +index da687dc..b064456 100644 +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -513,6 +513,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, + btrfs_clear_opt(info->mount_opt, NODATASUM); + btrfs_set_fs_incompat(info, COMPRESS_LZO); + no_compress = 0; ++ } else if (strcmp(args[0].from, "zstd") == 0) { ++ compress_type = "zstd"; ++ info->compress_type = BTRFS_COMPRESS_ZSTD; ++ btrfs_set_opt(info->mount_opt, COMPRESS); ++ btrfs_clear_opt(info->mount_opt, NODATACOW); ++ btrfs_clear_opt(info->mount_opt, NODATASUM); ++ btrfs_set_fs_incompat(info, COMPRESS_ZSTD); ++ no_compress = 0; + } else if (strncmp(args[0].from, "no", 2) == 0) { + compress_type = "no"; + btrfs_clear_opt(info->mount_opt, COMPRESS); +@@ -1230,8 +1238,10 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) + if (btrfs_test_opt(info, COMPRESS)) { + if (info->compress_type == BTRFS_COMPRESS_ZLIB) + compress_type = "zlib"; +- else ++ else if (info->compress_type == BTRFS_COMPRESS_LZO) + compress_type = "lzo"; ++ else ++ compress_type = "zstd"; + if (btrfs_test_opt(info, FORCE_COMPRESS)) + seq_printf(seq, ",compress-force=%s", compress_type); + else +diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c +index 1f157fb..b0dec90 100644 +--- a/fs/btrfs/sysfs.c ++++ b/fs/btrfs/sysfs.c +@@ -200,6 +200,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(mixed_backref, MIXED_BACKREF); + BTRFS_FEAT_ATTR_INCOMPAT(default_subvol, DEFAULT_SUBVOL); + BTRFS_FEAT_ATTR_INCOMPAT(mixed_groups, MIXED_GROUPS); + BTRFS_FEAT_ATTR_INCOMPAT(compress_lzo, COMPRESS_LZO); ++BTRFS_FEAT_ATTR_INCOMPAT(compress_zstd, COMPRESS_ZSTD); + BTRFS_FEAT_ATTR_INCOMPAT(big_metadata, BIG_METADATA); + BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF); + BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56); +@@ -212,6 +213,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = { + BTRFS_FEAT_ATTR_PTR(default_subvol), + BTRFS_FEAT_ATTR_PTR(mixed_groups), + BTRFS_FEAT_ATTR_PTR(compress_lzo), ++ BTRFS_FEAT_ATTR_PTR(compress_zstd), + BTRFS_FEAT_ATTR_PTR(big_metadata), + BTRFS_FEAT_ATTR_PTR(extended_iref), + BTRFS_FEAT_ATTR_PTR(raid56), +diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c +new file mode 100644 +index 0000000..b7f319e +--- /dev/null ++++ b/fs/btrfs/zstd.c +@@ -0,0 +1,415 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "compression.h" ++ ++#define ZSTD_BTRFS_MAX_WINDOWLOG 17 ++#define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG) ++ ++static ZSTD_parameters zstd_get_btrfs_parameters(size_t src_len) ++{ ++ ZSTD_parameters params = ZSTD_getParams(3, src_len, 0); ++ ++ if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG) ++ params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG; ++ WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT); ++ return params; ++} ++ ++struct workspace { ++ void *mem; ++ size_t size; ++ char *buf; ++ struct list_head list; ++}; ++ ++static void zstd_free_workspace(struct list_head *ws) ++{ ++ struct workspace *workspace = list_entry(ws, struct workspace, list); ++ ++ vfree(workspace->mem); ++ kfree(workspace->buf); ++ kfree(workspace); ++} ++ ++static struct list_head *zstd_alloc_workspace(void) ++{ ++ ZSTD_parameters params = ++ zstd_get_btrfs_parameters(ZSTD_BTRFS_MAX_INPUT); ++ struct workspace *workspace; ++ ++ workspace = kzalloc(sizeof(*workspace), GFP_NOFS); ++ if (!workspace) ++ return ERR_PTR(-ENOMEM); ++ ++ workspace->size = max_t(size_t, ++ ZSTD_CStreamWorkspaceBound(params.cParams), ++ ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT)); ++ workspace->mem = vmalloc(workspace->size); ++ workspace->buf = kmalloc(PAGE_SIZE, GFP_NOFS); ++ if (!workspace->mem || !workspace->buf) ++ goto fail; ++ ++ INIT_LIST_HEAD(&workspace->list); ++ ++ return &workspace->list; ++fail: ++ zstd_free_workspace(&workspace->list); ++ return ERR_PTR(-ENOMEM); ++} ++ ++static int zstd_compress_pages(struct list_head *ws, ++ struct address_space *mapping, ++ u64 start, ++ struct page **pages, ++ unsigned long *out_pages, ++ unsigned long *total_in, ++ unsigned long *total_out) ++{ ++ struct workspace *workspace = list_entry(ws, struct workspace, list); ++ ZSTD_CStream *stream; ++ int ret = 0; ++ int nr_pages = 0; ++ struct page *in_page = NULL; /* The current page to read */ ++ struct page *out_page = NULL; /* The current page to write to */ ++ ZSTD_inBuffer in_buf = { NULL, 0, 0 }; ++ ZSTD_outBuffer out_buf = { NULL, 0, 0 }; ++ unsigned long tot_in = 0; ++ unsigned long tot_out = 0; ++ unsigned long len = *total_out; ++ const unsigned long nr_dest_pages = *out_pages; ++ unsigned long max_out = nr_dest_pages * PAGE_SIZE; ++ ZSTD_parameters params = zstd_get_btrfs_parameters(len); ++ ++ *out_pages = 0; ++ *total_out = 0; ++ *total_in = 0; ++ ++ /* Initialize the stream */ ++ stream = ZSTD_createCStream(params, len, workspace->mem, ++ workspace->size); ++ if (!stream) { ++ pr_warn("BTRFS: ZSTD_createStream failed\n"); ++ ret = -EIO; ++ goto out; ++ } ++ ++ /* map in the first page of input data */ ++ in_page = find_get_page(mapping, start >> PAGE_SHIFT); ++ in_buf.src = kmap(in_page); ++ in_buf.pos = 0; ++ in_buf.size = min_t(size_t, len, PAGE_SIZE); ++ ++ ++ /* Allocate and map in the output buffer */ ++ out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); ++ if (out_page == NULL) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ pages[nr_pages++] = out_page; ++ out_buf.dst = kmap(out_page); ++ out_buf.pos = 0; ++ out_buf.size = min_t(size_t, max_out, PAGE_SIZE); ++ ++ while (1) { ++ size_t ret2; ++ ++ ret2 = ZSTD_compressStream(stream, &out_buf, &in_buf); ++ if (ZSTD_isError(ret2)) { ++ pr_debug("BTRFS: ZSTD_compressStream returned %d\n", ++ ZSTD_getErrorCode(ret2)); ++ ret = -EIO; ++ goto out; ++ } ++ ++ /* Check to see if we are making it bigger */ ++ if (tot_in + in_buf.pos > 8192 && ++ tot_in + in_buf.pos < ++ tot_out + out_buf.pos) { ++ ret = -E2BIG; ++ goto out; ++ } ++ ++ /* We've reached the end of our output range */ ++ if (out_buf.pos >= max_out) { ++ tot_out += out_buf.pos; ++ ret = -E2BIG; ++ goto out; ++ } ++ ++ /* Check if we need more output space */ ++ if (out_buf.pos == out_buf.size) { ++ tot_out += PAGE_SIZE; ++ max_out -= PAGE_SIZE; ++ kunmap(out_page); ++ if (nr_pages == nr_dest_pages) { ++ out_page = NULL; ++ ret = -E2BIG; ++ goto out; ++ } ++ out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); ++ if (out_page == NULL) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ pages[nr_pages++] = out_page; ++ out_buf.dst = kmap(out_page); ++ out_buf.pos = 0; ++ out_buf.size = min_t(size_t, max_out, PAGE_SIZE); ++ } ++ ++ /* We've reached the end of the input */ ++ if (in_buf.pos >= len) { ++ tot_in += in_buf.pos; ++ break; ++ } ++ ++ /* Check if we need more input */ ++ if (in_buf.pos == in_buf.size) { ++ tot_in += PAGE_SIZE; ++ kunmap(in_page); ++ put_page(in_page); ++ ++ start += PAGE_SIZE; ++ len -= PAGE_SIZE; ++ in_page = find_get_page(mapping, start >> PAGE_SHIFT); ++ in_buf.src = kmap(in_page); ++ in_buf.pos = 0; ++ in_buf.size = min_t(size_t, len, PAGE_SIZE); ++ } ++ } ++ while (1) { ++ size_t ret2; ++ ++ ret2 = ZSTD_endStream(stream, &out_buf); ++ if (ZSTD_isError(ret2)) { ++ pr_debug("BTRFS: ZSTD_endStream returned %d\n", ++ ZSTD_getErrorCode(ret2)); ++ ret = -EIO; ++ goto out; ++ } ++ if (ret2 == 0) { ++ tot_out += out_buf.pos; ++ break; ++ } ++ if (out_buf.pos >= max_out) { ++ tot_out += out_buf.pos; ++ ret = -E2BIG; ++ goto out; ++ } ++ ++ tot_out += PAGE_SIZE; ++ max_out -= PAGE_SIZE; ++ kunmap(out_page); ++ if (nr_pages == nr_dest_pages) { ++ out_page = NULL; ++ ret = -E2BIG; ++ goto out; ++ } ++ out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); ++ if (out_page == NULL) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ pages[nr_pages++] = out_page; ++ out_buf.dst = kmap(out_page); ++ out_buf.pos = 0; ++ out_buf.size = min_t(size_t, max_out, PAGE_SIZE); ++ } ++ ++ if (tot_out >= tot_in) { ++ ret = -E2BIG; ++ goto out; ++ } ++ ++ ret = 0; ++ *total_in = tot_in; ++ *total_out = tot_out; ++out: ++ *out_pages = nr_pages; ++ /* Cleanup */ ++ if (in_page) { ++ kunmap(in_page); ++ put_page(in_page); ++ } ++ if (out_page) ++ kunmap(out_page); ++ return ret; ++} ++ ++static int zstd_decompress_bio(struct list_head *ws, struct page **pages_in, ++ u64 disk_start, ++ struct bio *orig_bio, ++ size_t srclen) ++{ ++ struct workspace *workspace = list_entry(ws, struct workspace, list); ++ ZSTD_DStream *stream; ++ int ret = 0; ++ unsigned long page_in_index = 0; ++ unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); ++ unsigned long buf_start; ++ unsigned long total_out = 0; ++ ZSTD_inBuffer in_buf = { NULL, 0, 0 }; ++ ZSTD_outBuffer out_buf = { NULL, 0, 0 }; ++ ++ stream = ZSTD_createDStream( ++ ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); ++ if (!stream) { ++ pr_debug("BTRFS: ZSTD_createDStream failed\n"); ++ ret = -EIO; ++ goto done; ++ } ++ ++ in_buf.src = kmap(pages_in[page_in_index]); ++ in_buf.pos = 0; ++ in_buf.size = min_t(size_t, srclen, PAGE_SIZE); ++ ++ out_buf.dst = workspace->buf; ++ out_buf.pos = 0; ++ out_buf.size = PAGE_SIZE; ++ ++ while (1) { ++ size_t ret2; ++ ++ ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf); ++ if (ZSTD_isError(ret2)) { ++ pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", ++ ZSTD_getErrorCode(ret2)); ++ ret = -EIO; ++ goto done; ++ } ++ buf_start = total_out; ++ total_out += out_buf.pos; ++ out_buf.pos = 0; ++ ++ ret = btrfs_decompress_buf2page(out_buf.dst, buf_start, ++ total_out, disk_start, orig_bio); ++ if (ret == 0) ++ break; ++ ++ if (in_buf.pos >= srclen) ++ break; ++ ++ /* Check if we've hit the end of a frame */ ++ if (ret2 == 0) ++ break; ++ ++ if (in_buf.pos == in_buf.size) { ++ kunmap(pages_in[page_in_index++]); ++ if (page_in_index >= total_pages_in) { ++ in_buf.src = NULL; ++ ret = -EIO; ++ goto done; ++ } ++ srclen -= PAGE_SIZE; ++ in_buf.src = kmap(pages_in[page_in_index]); ++ in_buf.pos = 0; ++ in_buf.size = min_t(size_t, srclen, PAGE_SIZE); ++ } ++ } ++ ret = 0; ++ zero_fill_bio(orig_bio); ++done: ++ if (in_buf.src) ++ kunmap(pages_in[page_in_index]); ++ return ret; ++} ++ ++static int zstd_decompress(struct list_head *ws, unsigned char *data_in, ++ struct page *dest_page, ++ unsigned long start_byte, ++ size_t srclen, size_t destlen) ++{ ++ struct workspace *workspace = list_entry(ws, struct workspace, list); ++ ZSTD_DStream *stream; ++ int ret = 0; ++ size_t ret2; ++ ZSTD_inBuffer in_buf = { NULL, 0, 0 }; ++ ZSTD_outBuffer out_buf = { NULL, 0, 0 }; ++ unsigned long total_out = 0; ++ unsigned long pg_offset = 0; ++ char *kaddr; ++ ++ stream = ZSTD_createDStream( ++ ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); ++ if (!stream) { ++ pr_warn("BTRFS: ZSTD_createDStream failed\n"); ++ ret = -EIO; ++ goto finish; ++ } ++ ++ destlen = min_t(size_t, destlen, PAGE_SIZE); ++ ++ in_buf.src = data_in; ++ in_buf.pos = 0; ++ in_buf.size = srclen; ++ ++ out_buf.dst = workspace->buf; ++ out_buf.pos = 0; ++ out_buf.size = PAGE_SIZE; ++ ++ ret2 = 1; ++ while (pg_offset < destlen && in_buf.pos < in_buf.size) { ++ unsigned long buf_start; ++ unsigned long buf_offset; ++ unsigned long bytes; ++ ++ /* Check if the frame is over and we still need more input */ ++ if (ret2 == 0) { ++ pr_debug("BTRFS: ZSTD_decompressStream ended early\n"); ++ ret = -EIO; ++ goto finish; ++ } ++ ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf); ++ if (ZSTD_isError(ret2)) { ++ pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", ++ ZSTD_getErrorCode(ret2)); ++ ret = -EIO; ++ goto finish; ++ } ++ ++ buf_start = total_out; ++ total_out += out_buf.pos; ++ out_buf.pos = 0; ++ ++ if (total_out <= start_byte) ++ continue; ++ ++ if (total_out > start_byte && buf_start < start_byte) ++ buf_offset = start_byte - buf_start; ++ else ++ buf_offset = 0; ++ ++ bytes = min_t(unsigned long, destlen - pg_offset, ++ out_buf.size - buf_offset); ++ ++ kaddr = kmap_atomic(dest_page); ++ memcpy(kaddr + pg_offset, out_buf.dst + buf_offset, bytes); ++ kunmap_atomic(kaddr); ++ ++ pg_offset += bytes; ++ } ++ ret = 0; ++finish: ++ if (pg_offset < destlen) { ++ kaddr = kmap_atomic(dest_page); ++ memset(kaddr + pg_offset, 0, destlen - pg_offset); ++ kunmap_atomic(kaddr); ++ } ++ return ret; ++} ++ ++const struct btrfs_compress_op btrfs_zstd_compress = { ++ .alloc_workspace = zstd_alloc_workspace, ++ .free_workspace = zstd_free_workspace, ++ .compress_pages = zstd_compress_pages, ++ .decompress_bio = zstd_decompress_bio, ++ .decompress = zstd_decompress, ++}; +diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h +index db4c253..f26c34f 100644 +--- a/include/uapi/linux/btrfs.h ++++ b/include/uapi/linux/btrfs.h +@@ -255,13 +255,7 @@ struct btrfs_ioctl_fs_info_args { + #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) + #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) + #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) +-/* +- * some patches floated around with a second compression method +- * lets save that incompat here for when they do get in +- * Note we don't actually support it, we're just reserving the +- * number +- */ +-#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4) ++#define BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD (1ULL << 4) + + /* + * older kernels tried to do bigger metadata blocks, but the diff --git a/contrib/linux-kernel/fs/btrfs/zstd.c b/contrib/linux-kernel/fs/btrfs/zstd.c index 23a3692a..b7f319e7 100644 --- a/contrib/linux-kernel/fs/btrfs/zstd.c +++ b/contrib/linux-kernel/fs/btrfs/zstd.c @@ -15,9 +15,10 @@ static ZSTD_parameters zstd_get_btrfs_parameters(size_t src_len) { ZSTD_parameters params = ZSTD_getParams(3, src_len, 0); - BUG_ON(src_len > ZSTD_BTRFS_MAX_INPUT); - BUG_ON(params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG); - params.fParams.checksumFlag = 1; + + if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG) + params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG; + WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT); return params; } @@ -39,17 +40,21 @@ static void zstd_free_workspace(struct list_head *ws) static struct list_head *zstd_alloc_workspace(void) { - ZSTD_parameters params = zstd_get_btrfs_parameters(ZSTD_BTRFS_MAX_INPUT); + ZSTD_parameters params = + zstd_get_btrfs_parameters(ZSTD_BTRFS_MAX_INPUT); struct workspace *workspace; workspace = kzalloc(sizeof(*workspace), GFP_NOFS); - if (!workspace) return ERR_PTR(-ENOMEM); + if (!workspace) + return ERR_PTR(-ENOMEM); - workspace->size = max_t(size_t, ZSTD_CStreamWorkspaceBound(params.cParams), + workspace->size = max_t(size_t, + ZSTD_CStreamWorkspaceBound(params.cParams), ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT)); workspace->mem = vmalloc(workspace->size); workspace->buf = kmalloc(PAGE_SIZE, GFP_NOFS); - if (!workspace->mem || !workspace->buf) goto fail; + if (!workspace->mem || !workspace->buf) + goto fail; INIT_LIST_HEAD(&workspace->list); @@ -61,16 +66,13 @@ fail: static int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, - u64 start, unsigned long len, + u64 start, struct page **pages, - unsigned long nr_dest_pages, unsigned long *out_pages, unsigned long *total_in, - unsigned long *total_out, - unsigned long max_out) + unsigned long *total_out) { struct workspace *workspace = list_entry(ws, struct workspace, list); - ZSTD_parameters params = zstd_get_btrfs_parameters(len); ZSTD_CStream *stream; int ret = 0; int nr_pages = 0; @@ -80,13 +82,18 @@ static int zstd_compress_pages(struct list_head *ws, ZSTD_outBuffer out_buf = { NULL, 0, 0 }; unsigned long tot_in = 0; unsigned long tot_out = 0; + unsigned long len = *total_out; + const unsigned long nr_dest_pages = *out_pages; + unsigned long max_out = nr_dest_pages * PAGE_SIZE; + ZSTD_parameters params = zstd_get_btrfs_parameters(len); *out_pages = 0; *total_out = 0; *total_in = 0; /* Initialize the stream */ - stream = ZSTD_createCStream(params, len, workspace->mem, workspace->size); + stream = ZSTD_createCStream(params, len, workspace->mem, + workspace->size); if (!stream) { pr_warn("BTRFS: ZSTD_createStream failed\n"); ret = -EIO; @@ -112,10 +119,12 @@ static int zstd_compress_pages(struct list_head *ws, out_buf.size = min_t(size_t, max_out, PAGE_SIZE); while (1) { - const size_t rc = ZSTD_compressStream(stream, &out_buf, &in_buf); - if (ZSTD_isError(rc)) { + size_t ret2; + + ret2 = ZSTD_compressStream(stream, &out_buf, &in_buf); + if (ZSTD_isError(ret2)) { pr_debug("BTRFS: ZSTD_compressStream returned %d\n", - ZSTD_getErrorCode(rc)); + ZSTD_getErrorCode(ret2)); ret = -EIO; goto out; } @@ -177,14 +186,16 @@ static int zstd_compress_pages(struct list_head *ws, } } while (1) { - const size_t rc = ZSTD_endStream(stream, &out_buf); - if (ZSTD_isError(rc)) { + size_t ret2; + + ret2 = ZSTD_endStream(stream, &out_buf); + if (ZSTD_isError(ret2)) { pr_debug("BTRFS: ZSTD_endStream returned %d\n", - ZSTD_getErrorCode(rc)); + ZSTD_getErrorCode(ret2)); ret = -EIO; goto out; } - if (rc == 0) { + if (ret2 == 0) { tot_out += out_buf.pos; break; } @@ -228,24 +239,22 @@ out: kunmap(in_page); put_page(in_page); } - if (out_page) { kunmap(out_page); } + if (out_page) + kunmap(out_page); return ret; } -static int zstd_decompress_biovec(struct list_head *ws, struct page **pages_in, +static int zstd_decompress_bio(struct list_head *ws, struct page **pages_in, u64 disk_start, - struct bio_vec *bvec, - int vcnt, + struct bio *orig_bio, size_t srclen) { struct workspace *workspace = list_entry(ws, struct workspace, list); ZSTD_DStream *stream; int ret = 0; unsigned long page_in_index = 0; - unsigned long page_out_index = 0; unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); unsigned long buf_start; - unsigned long pg_offset; unsigned long total_out = 0; ZSTD_inBuffer in_buf = { NULL, 0, 0 }; ZSTD_outBuffer out_buf = { NULL, 0, 0 }; @@ -266,13 +275,13 @@ static int zstd_decompress_biovec(struct list_head *ws, struct page **pages_in, out_buf.pos = 0; out_buf.size = PAGE_SIZE; - pg_offset = 0; - while (1) { - const size_t rc = ZSTD_decompressStream(stream, &out_buf, &in_buf); - if (ZSTD_isError(rc)) { + size_t ret2; + + ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf); + if (ZSTD_isError(ret2)) { pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", - ZSTD_getErrorCode(rc)); + ZSTD_getErrorCode(ret2)); ret = -EIO; goto done; } @@ -280,23 +289,17 @@ static int zstd_decompress_biovec(struct list_head *ws, struct page **pages_in, total_out += out_buf.pos; out_buf.pos = 0; - { - int ret2 = btrfs_decompress_buf2page(out_buf.dst, buf_start, - total_out, disk_start, bvec, vcnt, - &page_out_index, &pg_offset); - if (ret2 == 0) { - break; - } - } - - if (in_buf.pos >= srclen) { + ret = btrfs_decompress_buf2page(out_buf.dst, buf_start, + total_out, disk_start, orig_bio); + if (ret == 0) + break; + + if (in_buf.pos >= srclen) break; - } /* Check if we've hit the end of a frame */ - if (rc == 0) { + if (ret2 == 0) break; - } if (in_buf.pos == in_buf.size) { kunmap(pages_in[page_in_index++]); @@ -311,10 +314,11 @@ static int zstd_decompress_biovec(struct list_head *ws, struct page **pages_in, in_buf.size = min_t(size_t, srclen, PAGE_SIZE); } } - btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset); ret = 0; + zero_fill_bio(orig_bio); done: - if (in_buf.src) { kunmap(pages_in[page_in_index]); } + if (in_buf.src) + kunmap(pages_in[page_in_index]); return ret; } @@ -326,6 +330,7 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in, struct workspace *workspace = list_entry(ws, struct workspace, list); ZSTD_DStream *stream; int ret = 0; + size_t ret2; ZSTD_inBuffer in_buf = { NULL, 0, 0 }; ZSTD_outBuffer out_buf = { NULL, 0, 0 }; unsigned long total_out = 0; @@ -350,41 +355,37 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in, out_buf.pos = 0; out_buf.size = PAGE_SIZE; - ret = 1; + ret2 = 1; while (pg_offset < destlen && in_buf.pos < in_buf.size) { unsigned long buf_start; unsigned long buf_offset; unsigned long bytes; /* Check if the frame is over and we still need more input */ - if (ret == 0) { - pr_debug("BTRFS: ZSTD_decompressStream frame ended to early\n"); + if (ret2 == 0) { + pr_debug("BTRFS: ZSTD_decompressStream ended early\n"); ret = -EIO; goto finish; } - { - const size_t rc = ZSTD_decompressStream(stream, &out_buf, &in_buf); - if (ZSTD_isError(rc)) { - pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", - ZSTD_getErrorCode(rc)); - ret = -EIO; - goto finish; - } - ret = rc > 0; + ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf); + if (ZSTD_isError(ret2)) { + pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", + ZSTD_getErrorCode(ret2)); + ret = -EIO; + goto finish; } + buf_start = total_out; total_out += out_buf.pos; out_buf.pos = 0; - if (total_out <= start_byte) { + if (total_out <= start_byte) continue; - } - if (total_out > start_byte && buf_start < start_byte) { + if (total_out > start_byte && buf_start < start_byte) buf_offset = start_byte - buf_start; - } else { + else buf_offset = 0; - } bytes = min_t(unsigned long, destlen - pg_offset, out_buf.size - buf_offset); @@ -409,6 +410,6 @@ const struct btrfs_compress_op btrfs_zstd_compress = { .alloc_workspace = zstd_alloc_workspace, .free_workspace = zstd_free_workspace, .compress_pages = zstd_compress_pages, - .decompress_biovec = zstd_decompress_biovec, + .decompress_bio = zstd_decompress_bio, .decompress = zstd_decompress, }; From b633377d0e6e2857e2ad2ffaa57f3015b7bc0b8f Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 11 Apr 2017 12:40:53 -0700 Subject: [PATCH 4/4] Add BtrFS benchmarks --- contrib/linux-kernel/README.md | 26 ++++++ contrib/linux-kernel/btrfs-benchmark.sh | 104 ++++++++++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100755 contrib/linux-kernel/btrfs-benchmark.sh diff --git a/contrib/linux-kernel/README.md b/contrib/linux-kernel/README.md index 1cc74cca..a6283851 100644 --- a/contrib/linux-kernel/README.md +++ b/contrib/linux-kernel/README.md @@ -24,3 +24,29 @@ The patches are based off of the linux kernel master branch (version 4.10). * Additionally `fs/btrfs/zstd.c` is provided as a source for convenience. * The patch seems to be working, it doesn't crash the kernel, and compresses at speeds and ratios athat are expected. It can still use some more testing for fringe features, like printing options. + +### Benchmarks + +Benchmarks run on a Ubuntu 14.04 with 2 cores and 4 GiB of RAM. +The VM is running on a Macbook Pro with a 3.1 GHz Intel Core i7 processor, +16 GB of ram, and a SSD. + +The compression benchmark is copying 10 copies of the +unzipped [silesia corpus](http://mattmahoney.net/dc/silesia.html) into a BtrFS +filesystem mounted with `-o compress-force={none, lzo, zlib, zstd}`. +The decompression benchmark is timing how long it takes to `tar` all 10 copies +into `/dev/null`. +The compression ratio is measured by comparing the output of `df` and `du`. +See `btrfs-benchmark.sh` for details. + +| Algorithm | Compression ratio | Compression speed | Decompression speed | +|-----------|-------------------|-------------------|---------------------| +| None | 0.99 | 504 MB/s | 686 MB/s | +| lzo | 1.66 | 398 MB/s | 442 MB/s | +| zlib | 2.58 | 65 MB/s | 241 MB/s | +| zstd 1 | 2.57 | 260 MB/s | 383 MB/s | +| zstd 3 | 2.71 | 174 MB/s | 408 MB/s | +| zstd 6 | 2.87 | 70 MB/s | 398 MB/s | +| zstd 9 | 2.92 | 43 MB/s | 406 MB/s | +| zstd 12 | 2.93 | 21 MB/s | 408 MB/s | +| zstd 15 | 3.01 | 11 MB/s | 354 MB/s | diff --git a/contrib/linux-kernel/btrfs-benchmark.sh b/contrib/linux-kernel/btrfs-benchmark.sh new file mode 100755 index 00000000..5e28da9c --- /dev/null +++ b/contrib/linux-kernel/btrfs-benchmark.sh @@ -0,0 +1,104 @@ +# !/bin/sh +set -e + +# Benchmarks run on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM. +# The VM is running on a Macbook Pro with a 3.1 GHz Intel Core i7 processor and +# 16 GB of RAM and an SSD. + +# silesia is a directory that can be downloaded from +# http://mattmahoney.net/dc/silesia.html +# ls -l silesia/ +# total 203M +# -rwxr-xr-x 1 terrelln 9.8M Apr 12 2002 dickens +# -rwxr-xr-x 1 terrelln 49M May 31 2002 mozilla +# -rwxr-xr-x 1 terrelln 9.6M Mar 20 2003 mr +# -rwxr-xr-x 1 terrelln 32M Apr 2 2002 nci +# -rwxr-xr-x 1 terrelln 5.9M Jul 4 2002 ooffice +# -rwxr-xr-x 1 terrelln 9.7M Apr 11 2002 osdb +# -rwxr-xr-x 1 terrelln 6.4M Apr 2 2002 reymont +# -rwxr-xr-x 1 terrelln 21M Mar 25 2002 samba +# -rwxr-xr-x 1 terrelln 7.0M Mar 24 2002 sao +# -rwxr-xr-x 1 terrelln 40M Mar 25 2002 webster +# -rwxr-xr-x 1 terrelln 8.1M Apr 4 2002 x-ray +# -rwxr-xr-x 1 terrelln 5.1M Nov 30 2000 xml + +# $HOME is on a ext4 filesystem +BENCHMARK_DIR="$HOME/silesia/" +N=10 + +# Normalize the environment +sudo umount /mnt/btrfs 2> /dev/null > /dev/null || true +sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs +sudo rm -rf /mnt/btrfs/* +sync +sudo umount /mnt/btrfs +sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs + +# Run the benchmark +echo "Compression" +time sh -c "for i in \$(seq $N); do sudo cp -r $BENCHMARK_DIR /mnt/btrfs/\$i; done; sync" + +echo "Approximate compression ratio" +printf "%d / %d\n" \ + $(df /mnt/btrfs --output=used -B 1 | tail -n 1) \ + $(sudo du /mnt/btrfs -b -d 0 | tr '\t' '\n' | head -n 1); + +# Unmount and remount to avoid any caching +sudo umount /mnt/btrfs +sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs + +echo "Decompression" +time sudo tar -c /mnt/btrfs 2> /dev/null | wc -c > /dev/null + +sudo rm -rf /mnt/btrfs/* +sudo umount /mnt/btrfs + +# Run for each of -o compress-force={none, lzo, zlib, zstd} 5 times and take the +# min time and ratio. +# Ran zstd with compression levels {1, 3, 6, 9, 12, 15}. +# Original size: 2119415342 B (using du /mnt/btrfs) + +# none +# compress: 4.205 s +# decompress: 3.090 s +# ratio: 0.99 + +# lzo +# compress: 5.328 s +# decompress: 4.793 s +# ratio: 1.66 + +# zlib +# compress: 32.588 s +# decompress: 8.791 s +# ratio : 2.58 + +# zstd 1 +# compress: 8.147 s +# decompress: 5.527 s +# ratio : 2.57 + +# zstd 3 +# compress: 12.207 s +# decompress: 5.195 s +# ratio : 2.71 + +# zstd 6 +# compress: 30.253 s +# decompress: 5.324 s +# ratio : 2.87 + +# zstd 9 +# compress: 49.659 s +# decompress: 5.220 s +# ratio : 2.92 + +# zstd 12 +# compress: 99.245 s +# decompress: 5.193 s +# ratio : 2.93 + +# zstd 15 +# compress: 196.997 s +# decompress: 5.992 s +# ratio : 3.01