zstd/lib/decompress/zstd_decompress.c

1740 lines
70 KiB
C
Raw Normal View History

/*
2016-08-30 10:04:33 -07:00
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
2016-08-30 10:04:33 -07:00
*/
/* ***************************************************************
* Tuning parameters
*****************************************************************/
/*!
* HEAPMODE :
* Select how default decompression function ZSTD_decompress() allocates its context,
* on stack (0), or into heap (1, default; requires malloc()).
* Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
*/
#ifndef ZSTD_HEAPMODE
# define ZSTD_HEAPMODE 1
#endif
/*!
* LEGACY_SUPPORT :
* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
*/
#ifndef ZSTD_LEGACY_SUPPORT
# define ZSTD_LEGACY_SUPPORT 0
#endif
/*!
* MAXWINDOWSIZE_DEFAULT :
* maximum window size accepted by DStream __by default__.
* Frames requiring more memory will be rejected.
* It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
*/
2016-08-23 07:58:10 -07:00
#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1)
#endif
/*!
* NO_FORWARD_PROGRESS_MAX :
* maximum allowed nb of calls to ZSTD_decompressStream()
* without any forward progress
* (defined as: no byte read from input, and no byte flushed to output)
* before triggering an error.
*/
#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX
# define ZSTD_NO_FORWARD_PROGRESS_MAX 16
#endif
2016-01-27 15:18:06 -08:00
/*-*******************************************************
2016-02-04 06:28:14 -08:00
* Dependencies
*********************************************************/
2016-06-06 10:52:35 -07:00
#include <string.h> /* memcpy, memmove, memset */
#include "cpu.h" /* bmi2 */
#include "mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "huf.h"
#include "zstd_internal.h" /* blockProperties_t */
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
#include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
# include "zstd_legacy.h"
#endif
/*-*************************************************************
* Context management
***************************************************************/
size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx)
{
if (dctx==NULL) return 0; /* support sizeof NULL */
return sizeof(*dctx)
+ ZSTD_sizeof_DDict(dctx->ddictLocal)
+ dctx->inBuffSize + dctx->outBuffSize;
}
2016-01-27 15:18:06 -08:00
2016-07-11 04:46:25 -07:00
size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
static size_t ZSTD_startingInputLength(ZSTD_format_e format)
{
size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE :
ZSTD_FRAMEHEADERSIZE_PREFIX;
ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
2017-09-26 15:36:57 -07:00
/* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
return startingInputLength;
}
2017-05-24 17:41:41 -07:00
static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
2016-05-23 07:24:52 -07:00
{
dctx->format = ZSTD_f_zstd1; /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */
dctx->staticSize = 0;
dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
dctx->ddict = NULL;
dctx->ddictLocal = NULL;
dctx->dictEnd = NULL;
dctx->ddictIsCold = 0;
dctx->inBuff = NULL;
dctx->inBuffSize = 0;
dctx->outBuffSize = 0;
2017-05-24 17:41:41 -07:00
dctx->streamStage = zdss_init;
dctx->legacyContext = NULL;
dctx->previousLegacyVersion = 0;
dctx->noForwardProgress = 0;
2018-02-02 18:03:09 -08:00
dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
}
ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
{
ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace;
if ((size_t)workspace & 7) return NULL; /* 8-aligned */
if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */
ZSTD_initDCtx_internal(dctx);
dctx->staticSize = workspaceSize;
dctx->inBuff = (char*)(dctx+1);
return dctx;
2017-05-24 17:41:41 -07:00
}
2016-05-23 07:24:52 -07:00
ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
{
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
2016-05-23 07:24:52 -07:00
{ ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem);
if (!dctx) return NULL;
dctx->customMem = customMem;
ZSTD_initDCtx_internal(dctx);
return dctx;
}
2017-05-24 17:41:41 -07:00
}
ZSTD_DCtx* ZSTD_createDCtx(void)
{
DEBUGLOG(3, "ZSTD_createDCtx");
return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
}
size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
{
if (dctx==NULL) return 0; /* support free on NULL */
RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
{ ZSTD_customMem const cMem = dctx->customMem;
ZSTD_freeDDict(dctx->ddictLocal);
dctx->ddictLocal = NULL;
ZSTD_free(dctx->inBuff, cMem);
dctx->inBuff = NULL;
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (dctx->legacyContext)
ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion);
#endif
ZSTD_free(dctx, cMem);
return 0;
}
}
/* no longer useful */
2016-01-26 06:58:49 -08:00
void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
{
2017-05-24 17:41:41 -07:00
size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx);
memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */
2016-01-26 06:58:49 -08:00
}
/*-*************************************************************
* Frame header decoding
***************************************************************/
2016-01-23 10:28:41 -08:00
/*! ZSTD_isFrame() :
* Tells if the content of `buffer` starts with a valid Frame Identifier.
* Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
* Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
* Note 3 : Skippable Frame Identifiers are considered valid. */
unsigned ZSTD_isFrame(const void* buffer, size_t size)
{
if (size < ZSTD_FRAMEIDSIZE) return 0;
{ U32 const magic = MEM_readLE32(buffer);
if (magic == ZSTD_MAGICNUMBER) return 1;
if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
}
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (ZSTD_isLegacy(buffer, size)) return 1;
#endif
return 0;
}
/** ZSTD_frameHeaderSize_internal() :
* srcSize must be large enough to reach header size fields.
* note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
* @return : size of the Frame Header
* or an error code, which can be tested with ZSTD_isError() */
static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
2015-11-25 05:42:45 -08:00
{
size_t const minInputSize = ZSTD_startingInputLength(format);
RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong);
{ BYTE const fhd = ((const BYTE*)src)[minInputSize-1];
U32 const dictID= fhd & 3;
U32 const singleSegment = (fhd >> 5) & 1;
U32 const fcsId = fhd >> 6;
return minInputSize + !singleSegment
+ ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId]
+ (singleSegment && !fcsId);
2016-05-28 20:01:04 -07:00
}
2015-11-25 05:42:45 -08:00
}
/** ZSTD_frameHeaderSize() :
* srcSize must be >= ZSTD_frameHeaderSize_prefix.
* @return : size of the Frame Header,
* or an error code (if srcSize is too small) */
size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
{
return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1);
}
2015-11-25 05:42:45 -08:00
/** ZSTD_getFrameHeader_advanced() :
* decode Frame Header, or require larger `srcSize`.
* note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
* @return : 0, `zfhPtr` is correctly filled,
* >0, `srcSize` is too small, value is wanted `srcSize` amount,
* or an error code, which can be tested using ZSTD_isError() */
size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
2015-11-25 05:42:45 -08:00
{
const BYTE* ip = (const BYTE*)src;
size_t const minInputSize = ZSTD_startingInputLength(format);
2017-05-09 16:18:17 -07:00
memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
if (srcSize < minInputSize) return minInputSize;
RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
if ( (format != ZSTD_f_zstd1_magicless)
&& (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
2017-05-09 16:18:17 -07:00
/* skippable frame */
if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
memset(zfhPtr, 0, sizeof(*zfhPtr));
zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
zfhPtr->frameType = ZSTD_skippableFrame;
2016-05-31 03:43:46 -07:00
return 0;
}
RETURN_ERROR(prefix_unknown);
2016-05-31 03:43:46 -07:00
}
2016-03-14 17:33:36 -07:00
/* ensure there is enough `srcSize` to fully read/decode frame header */
{ size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format);
2017-07-07 15:51:24 -07:00
if (srcSize < fhsize) return fhsize;
zfhPtr->headerSize = (U32)fhsize;
}
{ BYTE const fhdByte = ip[minInputSize-1];
size_t pos = minInputSize;
U32 const dictIDSizeCode = fhdByte&3;
U32 const checksumFlag = (fhdByte>>2)&1;
U32 const singleSegment = (fhdByte>>5)&1;
U32 const fcsID = fhdByte>>6;
U64 windowSize = 0;
U32 dictID = 0;
U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN;
RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported,
"reserved bits, must be zero");
if (!singleSegment) {
BYTE const wlByte = ip[pos++];
U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge);
2017-07-07 16:09:47 -07:00
windowSize = (1ULL << windowLog);
windowSize += (windowSize >> 3) * (wlByte&7);
}
switch(dictIDSizeCode)
2016-05-28 20:01:04 -07:00
{
default: assert(0); /* impossible */
case 0 : break;
case 1 : dictID = ip[pos]; pos++; break;
case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break;
2016-05-28 20:01:04 -07:00
}
switch(fcsID)
2016-03-14 17:33:36 -07:00
{
default: assert(0); /* impossible */
case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
}
if (singleSegment) windowSize = frameContentSize;
zfhPtr->frameType = ZSTD_frame;
zfhPtr->frameContentSize = frameContentSize;
zfhPtr->windowSize = windowSize;
zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
zfhPtr->dictID = dictID;
zfhPtr->checksumFlag = checksumFlag;
}
2015-11-25 05:42:45 -08:00
return 0;
}
/** ZSTD_getFrameHeader() :
* decode Frame Header, or require larger `srcSize`.
* note : this function does not consume input, it only reads it.
* @return : 0, `zfhPtr` is correctly filled,
* >0, `srcSize` is too small, value is wanted `srcSize` amount,
* or an error code, which can be tested using ZSTD_isError() */
size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
{
return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
}
/** ZSTD_getFrameContentSize() :
* compatible with legacy mode
* @return : decompressed size of the single frame pointed to be `src` if known, otherwise
* - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
* - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
{
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (ZSTD_isLegacy(src, srcSize)) {
unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize);
return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret;
}
#endif
{ ZSTD_frameHeader zfh;
if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0)
return ZSTD_CONTENTSIZE_ERROR;
if (zfh.frameType == ZSTD_skippableFrame) {
return 0;
} else {
return zfh.frameContentSize;
} }
}
static size_t readSkippableFrameSize(void const* src, size_t srcSize)
{
size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
U32 sizeU32;
RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong);
sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
frameParameter_unsupported);
return skippableHeaderSize + sizeU32;
}
/** ZSTD_findDecompressedSize() :
* compatible with legacy mode
* `srcSize` must be the exact length of some number of ZSTD compressed and/or
* skippable frames
* @return : decompressed size of the frames contained */
unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
{
unsigned long long totalDstSize = 0;
while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
U32 const magicNumber = MEM_readLE32(src);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
if (ZSTD_isError(skippableSize))
return skippableSize;
if (srcSize < skippableSize) {
return ZSTD_CONTENTSIZE_ERROR;
}
src = (const BYTE *)src + skippableSize;
srcSize -= skippableSize;
continue;
}
{ unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
/* check for overflow */
if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
totalDstSize += ret;
}
{ size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
if (ZSTD_isError(frameSrcSize)) {
return ZSTD_CONTENTSIZE_ERROR;
}
src = (const BYTE *)src + frameSrcSize;
srcSize -= frameSrcSize;
}
} /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
if (srcSize) return ZSTD_CONTENTSIZE_ERROR;
return totalDstSize;
}
2016-07-07 04:14:21 -07:00
/** ZSTD_getDecompressedSize() :
* compatible with legacy mode
* @return : decompressed size if known, 0 otherwise
note : 0 can mean any of the following :
- frame content is empty
- decompressed size field is not present in frame header
2016-07-07 04:14:21 -07:00
- frame header unknown / not supported
- frame header not complete (`srcSize` too small) */
unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
{
unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN);
return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret;
2016-07-07 04:14:21 -07:00
}
/** ZSTD_decodeFrameHeader() :
* `headerSize` must be the size provided by ZSTD_frameHeaderSize().
* @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
2015-11-26 03:43:28 -08:00
{
size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
if (ZSTD_isError(result)) return result; /* invalid header */
RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
dictionary_wrong);
2016-05-31 09:13:56 -07:00
if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
return 0;
2015-11-26 03:43:28 -08:00
}
2019-02-28 22:55:18 -08:00
/**
* Contains the compressed frame size and an upper-bound for the decompressed frame size.
2019-03-01 00:03:50 -08:00
* Note: before using `compressedSize` you must check for errors using ZSTD_isError().
* similarly, before using `decompressedBound`, you must check for errors using:
* `decompressedBound` != ZSTD_CONTENTSIZE_UNKNOWN
2019-02-28 22:55:18 -08:00
*/
typedef struct {
size_t compressedSize;
2019-03-01 00:03:50 -08:00
unsigned long long decompressedBound;
2019-02-28 22:55:18 -08:00
} ZSTD_frameSizeInfo;
2019-03-01 18:29:35 -08:00
static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
{
ZSTD_frameSizeInfo frameSizeInfo;
frameSizeInfo.compressedSize = ret;
frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_UNKNOWN;
return frameSizeInfo;
}
2019-02-28 22:55:18 -08:00
static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
{
2019-02-28 22:55:18 -08:00
ZSTD_frameSizeInfo frameSizeInfo;
memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
2019-03-01 18:29:35 -08:00
if (ZSTD_isLegacy(src, srcSize))
return ZSTD_errorFrameSizeInfo(ZSTD_findFrameCompressedSizeLegacy(src, srcSize));
#endif
2019-02-28 22:55:18 -08:00
if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
&& (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
return frameSizeInfo;
} else {
const BYTE* ip = (const BYTE*)src;
const BYTE* const ipstart = ip;
size_t remainingSize = srcSize;
size_t nbBlocks = 0;
2019-02-28 22:55:18 -08:00
ZSTD_frameHeader zfh;
2017-07-07 15:51:24 -07:00
/* Extract Frame Header */
2019-03-01 23:11:15 -08:00
{ size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
2019-03-01 18:29:35 -08:00
if (ZSTD_isError(ret))
return ZSTD_errorFrameSizeInfo(ret);
if (ret > 0)
return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
}
2017-07-07 15:51:24 -07:00
ip += zfh.headerSize;
remainingSize -= zfh.headerSize;
2019-02-28 22:55:18 -08:00
/* Iterate over each block */
while (1) {
blockProperties_t blockProperties;
size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
2019-03-01 18:29:35 -08:00
if (ZSTD_isError(cBlockSize))
return ZSTD_errorFrameSizeInfo(cBlockSize);
2019-03-01 18:29:35 -08:00
if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
ip += ZSTD_blockHeaderSize + cBlockSize;
remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
nbBlocks++;
if (blockProperties.lastBlock) break;
}
2019-02-28 22:55:18 -08:00
/* Final frame content checksum */
if (zfh.checksumFlag) {
2019-03-01 18:29:35 -08:00
if (remainingSize < 4)
return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
ip += 4;
}
2019-02-28 22:55:18 -08:00
frameSizeInfo.compressedSize = ip - ipstart;
frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
? zfh.frameContentSize
: nbBlocks * zfh.blockSizeMax;
return frameSizeInfo;
}
}
2016-06-06 10:52:35 -07:00
/** ZSTD_findFrameCompressedSize() :
* compatible with legacy mode
* `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
* `srcSize` must be at least as large as the frame contained
* @return : the compressed size of the frame starting at `src` */
size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
{
2019-03-01 18:29:35 -08:00
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
2019-02-28 22:55:18 -08:00
return frameSizeInfo.compressedSize;
}
/** ZSTD_decompressBound() :
* currently incompatible with legacy mode
* `src` must point to the start of a ZSTD frame or a skippeable frame
* `srcSize` must be at least as large as the frame contained
2019-03-01 00:03:50 -08:00
* @return : the maximum decompressed size of the compressed source
*/
2019-03-01 00:03:50 -08:00
unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
{
2019-03-01 00:03:50 -08:00
unsigned long long bound = 0;
2019-02-28 22:55:18 -08:00
/* Iterate over each frame */
while (srcSize > 0) {
2019-03-01 18:29:35 -08:00
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
size_t const compressedSize = frameSizeInfo.compressedSize;
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
2019-03-01 23:11:15 -08:00
if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
2019-03-01 00:03:50 -08:00
return ZSTD_CONTENTSIZE_ERROR;
2019-02-28 22:55:18 -08:00
src = (const BYTE*)src + compressedSize;
srcSize -= compressedSize;
bound += decompressedBound;
}
return bound;
}
/*-*************************************************************
* Frame decoding
***************************************************************/
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
{
if (dst != dctx->previousDstEnd) { /* not contiguous */
dctx->dictEnd = dctx->previousDstEnd;
dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
dctx->prefixStart = dst;
dctx->previousDstEnd = dst;
}
}
/** ZSTD_insertBlock() :
insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
{
ZSTD_checkContinuity(dctx, blockStart);
dctx->previousDstEnd = (const char*)blockStart + blockSize;
return blockSize;
}
static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
{
DEBUGLOG(5, "ZSTD_copyRawBlock");
if (dst == NULL) {
if (srcSize == 0) return 0;
RETURN_ERROR(dstBuffer_null);
}
RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall);
memcpy(dst, src, srcSize);
return srcSize;
}
static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
BYTE b,
size_t regenSize)
{
if (dst == NULL) {
if (regenSize == 0) return 0;
RETURN_ERROR(dstBuffer_null);
}
RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall);
memset(dst, b, regenSize);
return regenSize;
}
/*! ZSTD_decompressFrame() :
* @dctx must be properly initialized
* will update *srcPtr and *srcSizePtr,
* to make *srcPtr progress by one frame. */
2016-03-11 12:58:04 -08:00
static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
2017-07-05 18:10:07 -07:00
void* dst, size_t dstCapacity,
const void** srcPtr, size_t *srcSizePtr)
{
const BYTE* ip = (const BYTE*)(*srcPtr);
BYTE* const ostart = (BYTE* const)dst;
2016-03-19 06:14:31 -07:00
BYTE* const oend = ostart + dstCapacity;
2016-06-03 06:41:51 -07:00
BYTE* op = ostart;
size_t remainingSrcSize = *srcSizePtr;
DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr);
2016-03-19 06:14:31 -07:00
/* check */
RETURN_ERROR_IF(
remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize,
srcSize_wrong);
2016-03-19 06:14:31 -07:00
/* Frame Header */
{ size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX);
2015-11-25 05:42:45 -08:00
if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
srcSize_wrong);
FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) );
ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
2015-11-25 05:42:45 -08:00
}
/* Loop on each block */
2016-03-19 06:14:31 -07:00
while (1) {
2016-06-10 15:23:43 -07:00
size_t decodedSize;
2016-06-03 06:41:51 -07:00
blockProperties_t blockProperties;
size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
if (ZSTD_isError(cBlockSize)) return cBlockSize;
ip += ZSTD_blockHeaderSize;
remainingSrcSize -= ZSTD_blockHeaderSize;
RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong);
switch(blockProperties.blockType)
{
case bt_compressed:
decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1);
break;
case bt_raw :
2015-11-12 07:19:30 -08:00
decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
break;
case bt_rle :
decodedSize = ZSTD_setRleBlock(op, oend-op, *ip, blockProperties.origSize);
break;
2016-07-27 15:55:43 -07:00
case bt_reserved :
default:
RETURN_ERROR(corruption_detected);
}
if (ZSTD_isError(decodedSize)) return decodedSize;
2017-07-05 18:10:07 -07:00
if (dctx->fParams.checksumFlag)
XXH64_update(&dctx->xxhState, op, decodedSize);
op += decodedSize;
ip += cBlockSize;
remainingSrcSize -= cBlockSize;
2016-07-27 15:55:43 -07:00
if (blockProperties.lastBlock) break;
}
if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize,
corruption_detected);
}
2017-07-05 18:10:07 -07:00
if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
2016-07-27 15:55:43 -07:00
U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
U32 checkRead;
RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong);
2016-07-27 15:55:43 -07:00
checkRead = MEM_readLE32(ip);
RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong);
ip += 4;
remainingSrcSize -= 4;
2016-07-27 15:55:43 -07:00
}
2017-02-28 01:15:28 -08:00
/* Allow caller to get size read */
*srcPtr = ip;
*srcSizePtr = remainingSrcSize;
return op-ostart;
}
static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
2017-07-05 18:10:07 -07:00
const void* dict, size_t dictSize,
const ZSTD_DDict* ddict)
{
void* const dststart = dst;
int moreThan1Frame = 0;
DEBUGLOG(5, "ZSTD_decompressMultiFrame");
2017-07-05 18:10:07 -07:00
assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */
if (ddict) {
dict = ZSTD_DDict_dictContent(ddict);
dictSize = ZSTD_DDict_dictSize(ddict);
}
while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (ZSTD_isLegacy(src, srcSize)) {
size_t decodedSize;
size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
if (ZSTD_isError(frameSize)) return frameSize;
RETURN_ERROR_IF(dctx->staticSize, memory_allocation,
"legacy support is not compatible with static dctx");
decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
if (ZSTD_isError(decodedSize)) return decodedSize;
assert(decodedSize <=- dstCapacity);
dst = (BYTE*)dst + decodedSize;
dstCapacity -= decodedSize;
src = (const BYTE*)src + frameSize;
srcSize -= frameSize;
continue;
}
#endif
{ U32 const magicNumber = MEM_readLE32(src);
DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
(unsigned)magicNumber, ZSTD_MAGICNUMBER);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
if (ZSTD_isError(skippableSize))
return skippableSize;
RETURN_ERROR_IF(srcSize < skippableSize, srcSize_wrong);
src = (const BYTE *)src + skippableSize;
srcSize -= skippableSize;
continue;
} }
if (ddict) {
/* we were called from ZSTD_decompress_usingDDict */
FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict));
} else {
/* this will initialize correctly with no dict if dict == NULL, so
* use this in all cases but ddict */
FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
}
ZSTD_checkContinuity(dctx, dst);
{ const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
&src, &srcSize);
RETURN_ERROR_IF(
(ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
&& (moreThan1Frame==1),
srcSize_wrong,
"at least one frame successfully completed, but following "
"bytes are garbage: it's more likely to be a srcSize error, "
"specifying more bytes than compressed size of frame(s). This "
"error message replaces ERROR(prefix_unknown), which would be "
"confusing, as the first header is actually correct. Note that "
"one could be unlucky, it might be a corruption error instead, "
"happening right at the place where we expect zstd magic "
"bytes. But this is _much_ less likely than a srcSize field "
"error.");
if (ZSTD_isError(res)) return res;
assert(res <= dstCapacity);
dst = (BYTE*)dst + res;
dstCapacity -= res;
}
moreThan1Frame = 1;
2017-07-05 18:10:07 -07:00
} /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
return (BYTE*)dst - (BYTE*)dststart;
}
2015-12-17 16:26:48 -08:00
2016-01-26 06:58:49 -08:00
size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
2016-03-19 06:14:31 -07:00
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const void* dict, size_t dictSize)
2016-01-26 06:58:49 -08:00
{
return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
2016-01-26 06:58:49 -08:00
}
2016-03-19 06:14:31 -07:00
size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2015-12-17 16:26:48 -08:00
{
2016-03-19 06:14:31 -07:00
return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0);
2015-12-17 16:26:48 -08:00
}
2016-03-11 12:58:04 -08:00
2016-03-19 06:14:31 -07:00
size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
size_t regenSize;
2016-06-03 06:41:51 -07:00
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
RETURN_ERROR_IF(dctx==NULL, memory_allocation);
2016-03-19 06:14:31 -07:00
regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
ZSTD_freeDCtx(dctx);
return regenSize;
#else /* stack mode */
2015-12-17 16:26:48 -08:00
ZSTD_DCtx dctx;
ZSTD_initDCtx_internal(&dctx);
2016-03-19 06:14:31 -07:00
return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
2016-01-21 07:04:35 -08:00
#endif
}
2016-08-22 15:30:31 -07:00
/*-**************************************
* Advanced Streaming Decompression API
* Bufferless and synchronous
****************************************/
2016-07-28 10:55:09 -07:00
size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
switch(dctx->stage)
{
default: /* should not happen */
assert(0);
case ZSTDds_getFrameHeaderSize:
case ZSTDds_decodeFrameHeader:
return ZSTDnit_frameHeader;
case ZSTDds_decodeBlockHeader:
return ZSTDnit_blockHeader;
case ZSTDds_decompressBlock:
return ZSTDnit_block;
case ZSTDds_decompressLastBlock:
return ZSTDnit_lastBlock;
case ZSTDds_checkChecksum:
return ZSTDnit_checksum;
case ZSTDds_decodeSkippableHeader:
case ZSTDds_skipFrame:
return ZSTDnit_skippableFrame;
}
}
static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }
2016-05-31 03:43:46 -07:00
2016-05-31 09:13:56 -07:00
/** ZSTD_decompressContinue() :
* srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress())
* @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
* or an error code, which can be tested using ZSTD_isError() */
size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
/* Sanity check */
RETURN_ERROR_IF(srcSize != dctx->expected, srcSize_wrong, "not allowed");
if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
2016-01-25 07:54:05 -08:00
switch (dctx->stage)
{
2015-11-25 05:42:45 -08:00
case ZSTDds_getFrameHeaderSize :
assert(src != NULL);
2017-09-25 15:12:09 -07:00
if (dctx->format == ZSTD_f_zstd1) { /* allows header */
assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */
if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
2017-09-25 15:12:09 -07:00
memcpy(dctx->headerBuffer, src, srcSize);
dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */
2017-09-25 15:12:09 -07:00
dctx->stage = ZSTDds_decodeSkippableHeader;
return 0;
} }
dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format);
if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
2017-09-25 15:12:09 -07:00
memcpy(dctx->headerBuffer, src, srcSize);
dctx->expected = dctx->headerSize - srcSize;
dctx->stage = ZSTDds_decodeFrameHeader;
return 0;
2015-11-25 05:42:45 -08:00
case ZSTDds_decodeFrameHeader:
assert(src != NULL);
2017-09-25 15:12:09 -07:00
memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize));
2016-09-06 06:36:19 -07:00
dctx->expected = ZSTD_blockHeaderSize;
dctx->stage = ZSTDds_decodeBlockHeader;
return 0;
2015-11-25 05:42:45 -08:00
case ZSTDds_decodeBlockHeader:
{ blockProperties_t bp;
2016-03-23 17:27:55 -07:00
size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
if (ZSTD_isError(cBlockSize)) return cBlockSize;
2016-07-27 15:55:43 -07:00
dctx->expected = cBlockSize;
dctx->bType = bp.blockType;
dctx->rleSize = bp.origSize;
if (cBlockSize) {
dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
return 0;
}
/* empty block */
if (bp.lastBlock) {
if (dctx->fParams.checksumFlag) {
2016-07-27 15:55:43 -07:00
dctx->expected = 4;
dctx->stage = ZSTDds_checkChecksum;
} else {
dctx->expected = 0; /* end of frame */
dctx->stage = ZSTDds_getFrameHeaderSize;
}
2016-03-23 17:27:55 -07:00
} else {
dctx->expected = ZSTD_blockHeaderSize; /* jump to next header */
2016-07-27 15:55:43 -07:00
dctx->stage = ZSTDds_decodeBlockHeader;
2015-11-25 05:42:45 -08:00
}
return 0;
}
2017-09-25 15:12:09 -07:00
2016-07-27 15:55:43 -07:00
case ZSTDds_decompressLastBlock:
case ZSTDds_decompressBlock:
DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
{ size_t rSize;
2016-01-25 07:54:05 -08:00
switch(dctx->bType)
2015-11-25 05:42:45 -08:00
{
case bt_compressed:
DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
2015-11-25 05:42:45 -08:00
break;
case bt_raw :
rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
2015-11-25 05:42:45 -08:00
break;
case bt_rle :
rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize);
2015-11-25 05:42:45 -08:00
break;
2016-07-27 15:55:43 -07:00
case bt_reserved : /* should never happen */
2015-11-25 05:42:45 -08:00
default:
RETURN_ERROR(corruption_detected);
2015-11-25 05:42:45 -08:00
}
2016-05-31 15:44:36 -07:00
if (ZSTD_isError(rSize)) return rSize;
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
dctx->decodedSize += rSize;
if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
2016-07-27 15:55:43 -07:00
if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize);
RETURN_ERROR_IF(
dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
&& dctx->decodedSize != dctx->fParams.frameContentSize,
corruption_detected);
2016-07-27 15:55:43 -07:00
if (dctx->fParams.checksumFlag) { /* another round for frame checksum */
2016-07-28 10:55:09 -07:00
dctx->expected = 4;
2016-07-27 15:55:43 -07:00
dctx->stage = ZSTDds_checkChecksum;
2016-07-28 10:55:09 -07:00
} else {
dctx->expected = 0; /* ends here */
dctx->stage = ZSTDds_getFrameHeaderSize;
2016-07-27 15:55:43 -07:00
}
} else {
dctx->stage = ZSTDds_decodeBlockHeader;
dctx->expected = ZSTD_blockHeaderSize;
dctx->previousDstEnd = (char*)dst + rSize;
}
2015-11-25 05:42:45 -08:00
return rSize;
}
2017-09-25 15:12:09 -07:00
2016-07-27 15:55:43 -07:00
case ZSTDds_checkChecksum:
assert(srcSize == 4); /* guaranteed by dctx->expected */
2016-07-27 15:55:43 -07:00
{ U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
U32 const check32 = MEM_readLE32(src);
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
RETURN_ERROR_IF(check32 != h32, checksum_wrong);
2016-07-27 15:55:43 -07:00
dctx->expected = 0;
dctx->stage = ZSTDds_getFrameHeaderSize;
return 0;
}
2017-09-25 15:12:09 -07:00
2016-05-31 03:43:46 -07:00
case ZSTDds_decodeSkippableHeader:
assert(src != NULL);
assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */
dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */
dctx->stage = ZSTDds_skipFrame;
return 0;
2017-09-25 15:12:09 -07:00
2016-05-31 03:43:46 -07:00
case ZSTDds_skipFrame:
dctx->expected = 0;
dctx->stage = ZSTDds_getFrameHeaderSize;
return 0;
2017-09-25 15:12:09 -07:00
2015-11-25 05:42:45 -08:00
default:
assert(0); /* impossible */
RETURN_ERROR(GENERIC); /* some compiler require default to do something */
}
}
static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
{
2016-01-25 07:54:05 -08:00
dctx->dictEnd = dctx->previousDstEnd;
dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
dctx->prefixStart = dict;
2016-01-25 07:54:05 -08:00
dctx->previousDstEnd = (const char*)dict + dictSize;
return 0;
}
2016-01-25 18:14:20 -08:00
/*! ZSTD_loadDEntropy() :
* dict : must point at beginning of a valid zstd dictionary.
* @return : size of entropy tables read */
size_t
ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
const void* const dict, size_t const dictSize)
2016-01-25 18:14:20 -08:00
{
const BYTE* dictPtr = (const BYTE*)dict;
2016-06-15 16:41:50 -07:00
const BYTE* const dictEnd = dictPtr + dictSize;
2016-01-27 15:18:06 -08:00
RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted);
assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */
dictPtr += 8; /* skip header = magic + dictID */
ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable));
ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable));
ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
{ void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */
size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
#ifdef HUF_FORCE_DECOMPRESS_X1
2018-11-16 15:28:53 -08:00
/* in minimal huffman, we always use X1 variants */
size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
dictPtr, dictEnd - dictPtr,
workspace, workspaceSize);
#else
size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
dictPtr, dictEnd - dictPtr,
workspace, workspaceSize);
2018-11-16 15:28:53 -08:00
#endif
RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted);
dictPtr += hSize;
}
2016-01-27 15:18:06 -08:00
{ short offcodeNCount[MaxOff+1];
fix confusion between unsigned <-> U32 as suggested in #1441. generally U32 and unsigned are the same thing, except when they are not ... case : 32-bit compilation for MIPS (uint32_t == unsigned long) A vast majority of transformation consists in transforming U32 into unsigned. In rare cases, it's the other way around (typically for internal code, such as seeds). Among a few issues this patches solves : - some parameters were declared with type `unsigned` in *.h, but with type `U32` in their implementation *.c . - some parameters have type unsigned*, but the caller user a pointer to U32 instead. These fixes are useful. However, the bulk of changes is about %u formating, which requires unsigned type, but generally receives U32 values instead, often just for brevity (U32 is shorter than unsigned). These changes are generally minor, or even annoying. As a consequence, the amount of code changed is larger than I would expect for such a patch. Testing is also a pain : it requires manually modifying `mem.h`, in order to lie about `U32` and force it to be an `unsigned long` typically. On a 64-bit system, this will break the equivalence unsigned == U32. Unfortunately, it will also break a few static_assert(), controlling structure sizes. So it also requires modifying `debug.h` to make `static_assert()` a noop. And then reverting these changes. So it's inconvenient, and as a consequence, this property is currently not checked during CI tests. Therefore, these problems can emerge again in the future. I wonder if it is worth ensuring proper distinction of U32 != unsigned in CI tests. It's another restriction for coding, adding more frustration during merge tests, since most platforms don't need this distinction (hence contributor will not see it), and while this can matter in theory, the number of platforms impacted seems minimal. Thoughts ?
2018-12-21 16:19:44 -08:00
unsigned offcodeMaxValue = MaxOff, offcodeLog;
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted);
RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted);
RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted);
ZSTD_buildFSETable( entropy->OFTable,
offcodeNCount, offcodeMaxValue,
OF_base, OF_bits,
offcodeLog);
dictPtr += offcodeHeaderSize;
}
{ short matchlengthNCount[MaxML+1];
unsigned matchlengthMaxValue = MaxML, matchlengthLog;
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted);
RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted);
RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted);
ZSTD_buildFSETable( entropy->MLTable,
matchlengthNCount, matchlengthMaxValue,
ML_base, ML_bits,
matchlengthLog);
dictPtr += matchlengthHeaderSize;
}
{ short litlengthNCount[MaxLL+1];
unsigned litlengthMaxValue = MaxLL, litlengthLog;
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted);
RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted);
RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted);
ZSTD_buildFSETable( entropy->LLTable,
litlengthNCount, litlengthMaxValue,
LL_base, LL_bits,
litlengthLog);
dictPtr += litlengthHeaderSize;
}
2016-01-27 15:18:06 -08:00
RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted);
2017-02-25 18:33:31 -08:00
{ int i;
size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
2017-02-25 18:33:31 -08:00
for (i=0; i<3; i++) {
U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
RETURN_ERROR_IF(rep==0 || rep >= dictContentSize,
dictionary_corrupted);
entropy->rep[i] = rep;
2017-02-25 18:33:31 -08:00
} }
return dictPtr - (const BYTE*)dict;
2016-01-25 18:14:20 -08:00
}
2016-01-26 06:58:49 -08:00
static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
2016-01-25 18:14:20 -08:00
{
if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize);
2016-05-28 20:01:04 -07:00
{ U32 const magic = MEM_readLE32(dict);
if (magic != ZSTD_MAGIC_DICTIONARY) {
return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */
} }
dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
/* load entropy tables */
{ size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted);
dict = (const char*)dict + eSize;
dictSize -= eSize;
2016-01-25 18:14:20 -08:00
}
2017-02-25 18:33:31 -08:00
dctx->litEntropy = dctx->fseEntropy = 1;
/* reference dictionary content */
return ZSTD_refDictContent(dctx, dict, dictSize);
2016-01-25 18:14:20 -08:00
}
size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
{
assert(dctx != NULL);
dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */
dctx->stage = ZSTDds_getFrameHeaderSize;
dctx->decodedSize = 0;
dctx->previousDstEnd = NULL;
dctx->prefixStart = NULL;
dctx->virtualStart = NULL;
dctx->dictEnd = NULL;
dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
dctx->litEntropy = dctx->fseEntropy = 0;
dctx->dictID = 0;
ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
dctx->LLTptr = dctx->entropy.LLTable;
dctx->MLTptr = dctx->entropy.MLTable;
dctx->OFTptr = dctx->entropy.OFTable;
dctx->HUFptr = dctx->entropy.hufTable;
return 0;
}
2016-01-26 06:58:49 -08:00
size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
{
FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) );
2017-07-05 18:10:07 -07:00
if (dict && dictSize)
RETURN_ERROR_IF(
ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)),
dictionary_corrupted);
2016-01-26 06:58:49 -08:00
return 0;
}
/* ====== ZSTD_DDict ====== */
size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
{
DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
assert(dctx != NULL);
if (ddict) {
const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict);
size_t const dictSize = ZSTD_DDict_dictSize(ddict);
const void* const dictEnd = dictStart + dictSize;
dctx->ddictIsCold = (dctx->dictEnd != dictEnd);
2018-09-12 16:40:28 -07:00
DEBUGLOG(4, "DDict is %s",
dctx->ddictIsCold ? "~cold~" : "hot!");
}
FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) );
if (ddict) { /* NULL ddict is equivalent to no dictionary */
ZSTD_copyDDictParameters(dctx, ddict);
2017-05-25 15:44:06 -07:00
}
return 0;
}
/*! ZSTD_getDictID_fromDict() :
* Provides the dictID stored within dictionary.
* if @return == 0, the dictionary is not conformant with Zstandard specification.
* It can still be loaded, but as a content-only dictionary. */
unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
{
if (dictSize < 8) return 0;
if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
}
/*! ZSTD_getDictID_fromFrame() :
2017-05-01 11:22:24 -07:00
* Provides the dictID required to decompresse frame stored within `src`.
* If @return == 0, the dictID could not be decoded.
* This could for one of the following reasons :
2017-05-01 11:22:24 -07:00
* - The frame does not require a dictionary (most common case).
* - The frame was built with dictID intentionally removed.
* Needed dictionary is a hidden information.
* Note : this use case also happens when using a non-conformant dictionary.
2017-05-01 11:22:24 -07:00
* - `srcSize` is too small, and as a result, frame header could not be decoded.
* Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
* - This is not a Zstandard frame.
2017-05-01 11:22:24 -07:00
* When identifying the exact failure cause, it's possible to use
* ZSTD_getFrameHeader(), which will provide a more precise error code. */
unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
{
ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 };
size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
if (ZSTD_isError(hError)) return 0;
return zfp.dictID;
}
/*! ZSTD_decompress_usingDDict() :
* Decompression using a pre-digested Dictionary
2016-06-09 15:12:26 -07:00
* Use dictionary without significant overhead. */
size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_DDict* ddict)
{
/* pass content and size in case legacy frames are encountered */
return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize,
NULL, 0,
ddict);
}
2016-08-12 04:04:27 -07:00
/*=====================================
* Streaming decompression
*====================================*/
ZSTD_DStream* ZSTD_createDStream(void)
{
DEBUGLOG(3, "ZSTD_createDStream");
return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
2016-08-12 04:04:27 -07:00
}
ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
{
return ZSTD_initStaticDCtx(workspace, workspaceSize);
}
2016-08-12 04:04:27 -07:00
ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
{
return ZSTD_createDCtx_advanced(customMem);
2016-08-12 04:04:27 -07:00
}
size_t ZSTD_freeDStream(ZSTD_DStream* zds)
{
return ZSTD_freeDCtx(zds);
2016-08-12 04:04:27 -07:00
}
/* *** Initialization *** */
2016-08-12 04:04:27 -07:00
size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
2016-08-12 04:04:27 -07:00
size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
const void* dict, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
ZSTD_dictContentType_e dictContentType)
{
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
ZSTD_freeDDict(dctx->ddictLocal);
if (dict && dictSize >= 8) {
dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation);
} else {
dctx->ddictLocal = NULL;
}
dctx->ddict = dctx->ddictLocal;
return 0;
}
size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
{
return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
}
size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
{
return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
}
2018-03-20 15:45:56 -07:00
size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
{
return ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType);
}
size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
{
return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent);
}
2018-03-20 15:43:49 -07:00
/* ZSTD_initDStream_usingDict() :
* return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
2018-03-20 15:43:49 -07:00
* this function cannot fail */
2016-08-12 04:04:27 -07:00
size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
{
DEBUGLOG(4, "ZSTD_initDStream_usingDict");
2018-03-20 15:43:49 -07:00
zds->streamStage = zdss_init;
zds->noForwardProgress = 0;
FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
return ZSTD_FRAMEHEADERSIZE_PREFIX;
2016-08-12 04:04:27 -07:00
}
2017-09-25 16:21:17 -07:00
/* note : this variant can't fail */
2016-08-12 04:04:27 -07:00
size_t ZSTD_initDStream(ZSTD_DStream* zds)
{
DEBUGLOG(4, "ZSTD_initDStream");
2016-08-12 04:04:27 -07:00
return ZSTD_initDStream_usingDict(zds, NULL, 0);
}
2017-05-01 11:22:24 -07:00
/* ZSTD_initDStream_usingDDict() :
2017-09-25 16:21:17 -07:00
* ddict will just be referenced, and must outlive decompression session
* this function cannot fail */
size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
{
size_t const initResult = ZSTD_initDStream(dctx);
dctx->ddict = ddict;
return initResult;
}
2018-03-20 15:43:49 -07:00
/* ZSTD_resetDStream() :
* return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
2018-03-20 15:43:49 -07:00
* this function cannot fail */
size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
{
DEBUGLOG(4, "ZSTD_resetDStream");
dctx->streamStage = zdss_loadHeader;
dctx->lhSize = dctx->inPos = dctx->outStart = dctx->outEnd = 0;
dctx->legacyVersion = 0;
dctx->hostageByte = 0;
return ZSTD_FRAMEHEADERSIZE_PREFIX;
}
2016-08-23 07:58:10 -07:00
size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
{
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
dctx->ddict = ddict;
return 0;
}
2018-12-04 17:06:48 -08:00
/* ZSTD_DCtx_setMaxWindowSize() :
* note : no direct equivalence in ZSTD_DCtx_setParameter,
* since this version sets windowSize, and the other sets windowLog */
size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
{
2018-12-04 17:06:48 -08:00
ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
size_t const min = (size_t)1 << bounds.lowerBound;
size_t const max = (size_t)1 << bounds.upperBound;
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound);
RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound);
dctx->maxWindowSize = maxWindowSize;
return 0;
}
size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
{
2018-12-04 17:06:48 -08:00
return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format);
}
ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
{
ZSTD_bounds bounds = { 0, 0, 0 };
switch(dParam) {
case ZSTD_d_windowLogMax:
bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN;
bounds.upperBound = ZSTD_WINDOWLOG_MAX;
return bounds;
case ZSTD_d_format:
bounds.lowerBound = (int)ZSTD_f_zstd1;
bounds.upperBound = (int)ZSTD_f_zstd1_magicless;
ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
return bounds;
default:;
}
bounds.error = ERROR(parameter_unsupported);
return bounds;
}
/* ZSTD_dParam_withinBounds:
* @return 1 if value is within dParam bounds,
* 0 otherwise */
static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
{
ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam);
if (ZSTD_isError(bounds.error)) return 0;
if (value < bounds.lowerBound) return 0;
if (value > bounds.upperBound) return 0;
return 1;
}
#define CHECK_DBOUNDS(p,v) { \
RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound); \
}
size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
{
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
switch(dParam) {
case ZSTD_d_windowLogMax:
CHECK_DBOUNDS(ZSTD_d_windowLogMax, value);
2018-12-04 15:57:16 -08:00
dctx->maxWindowSize = ((size_t)1) << value;
return 0;
case ZSTD_d_format:
CHECK_DBOUNDS(ZSTD_d_format, value);
dctx->format = (ZSTD_format_e)value;
return 0;
default:;
}
RETURN_ERROR(parameter_unsupported);
}
size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
{
if ( (reset == ZSTD_reset_session_only)
|| (reset == ZSTD_reset_session_and_parameters) ) {
(void)ZSTD_initDStream(dctx);
}
if ( (reset == ZSTD_reset_parameters)
|| (reset == ZSTD_reset_session_and_parameters) ) {
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
dctx->format = ZSTD_f_zstd1;
dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
}
return 0;
}
2016-08-23 07:58:10 -07:00
size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
2016-08-22 15:30:31 -07:00
{
return ZSTD_sizeof_DCtx(dctx);
2016-08-22 15:30:31 -07:00
}
size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
{
size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
size_t const minRBSize = (size_t) neededSize;
RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
frameParameter_windowTooLarge);
return minRBSize;
}
size_t ZSTD_estimateDStreamSize(size_t windowSize)
2017-05-09 16:18:17 -07:00
{
size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
2017-05-09 16:18:17 -07:00
size_t const inBuffSize = blockSize; /* no block can be larger */
size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN);
return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize;
2017-05-09 16:18:17 -07:00
}
size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
{
U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */
ZSTD_frameHeader zfh;
size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
if (ZSTD_isError(err)) return err;
RETURN_ERROR_IF(err>0, srcSize_wrong);
RETURN_ERROR_IF(zfh.windowSize > windowSizeMax,
frameParameter_windowTooLarge);
2017-07-07 17:13:12 -07:00
return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
}
2016-08-12 04:04:27 -07:00
/* ***** Decompression ***** */
2016-08-12 04:04:27 -07:00
MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
size_t const length = MIN(dstCapacity, srcSize);
memcpy(dst, src, length);
return length;
}
2016-08-16 16:39:22 -07:00
size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
2016-08-12 04:04:27 -07:00
{
2016-08-16 16:39:22 -07:00
const char* const istart = (const char*)(input->src) + input->pos;
const char* const iend = (const char*)(input->src) + input->size;
2016-08-12 04:04:27 -07:00
const char* ip = istart;
2016-08-16 16:39:22 -07:00
char* const ostart = (char*)(output->dst) + output->pos;
char* const oend = (char*)(output->dst) + output->size;
2016-08-12 04:04:27 -07:00
char* op = ostart;
U32 someMoreWork = 1;
DEBUGLOG(5, "ZSTD_decompressStream");
RETURN_ERROR_IF(
input->pos > input->size,
srcSize_wrong,
"forbidden. in: pos: %u vs size: %u",
(U32)input->pos, (U32)input->size);
RETURN_ERROR_IF(
output->pos > output->size,
dstSize_tooSmall,
"forbidden. out: pos: %u vs size: %u",
(U32)output->pos, (U32)output->size);
DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
2016-08-12 04:04:27 -07:00
while (someMoreWork) {
switch(zds->streamStage)
2016-08-12 04:04:27 -07:00
{
case zdss_init :
2018-03-20 17:48:22 -07:00
DEBUGLOG(5, "stage zdss_init => transparent reset ");
ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */
/* fall-through */
2016-08-12 04:04:27 -07:00
case zdss_loadHeader :
2017-09-25 15:12:09 -07:00
DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
2018-03-20 17:48:22 -07:00
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
if (zds->legacyVersion) {
RETURN_ERROR_IF(zds->staticSize, memory_allocation,
"legacy support is incompatible with static dctx");
2018-03-20 17:48:22 -07:00
{ size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
if (hint==0) zds->streamStage = zdss_init;
return hint;
} }
#endif
{ size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
2017-09-25 15:12:09 -07:00
DEBUGLOG(5, "header size : %u", (U32)hSize);
if (ZSTD_isError(hSize)) {
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
if (legacyVersion) {
const void* const dict = zds->ddict ? ZSTD_DDict_dictContent(zds->ddict) : NULL;
size_t const dictSize = zds->ddict ? ZSTD_DDict_dictSize(zds->ddict) : 0;
2018-03-20 17:48:22 -07:00
DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
RETURN_ERROR_IF(zds->staticSize, memory_allocation,
"legacy support is incompatible with static dctx");
FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext,
zds->previousLegacyVersion, legacyVersion,
dict, dictSize));
2016-08-28 08:19:47 -07:00
zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
2018-03-20 17:48:22 -07:00
{ size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */
return hint;
} }
#endif
return hSize; /* error */
}
2016-08-12 04:04:27 -07:00
if (hSize != 0) { /* need more input */
size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */
size_t const remainingInput = (size_t)(iend-ip);
assert(iend >= ip);
if (toLoad > remainingInput) { /* not enough input to load full header */
if (remainingInput > 0) {
memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
zds->lhSize += remainingInput;
}
2016-08-16 16:39:22 -07:00
input->pos = input->size;
return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
2016-08-12 04:04:27 -07:00
}
assert(ip != NULL);
2016-08-12 04:04:27 -07:00
memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
break;
} }
/* check for single-pass mode opportunity */
2017-02-28 01:15:28 -08:00
if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */
&& (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart);
if (cSize <= (size_t)(iend-istart)) {
/* shortcut : using single-pass mode */
size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, zds->ddict);
if (ZSTD_isError(decompressedSize)) return decompressedSize;
DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
2017-02-28 02:12:42 -08:00
ip = istart + cSize;
op += decompressedSize;
zds->expected = 0;
zds->streamStage = zdss_init;
someMoreWork = 0;
break;
} }
2017-06-20 17:44:55 -07:00
/* Consume header (see ZSTDds_decodeFrameHeader) */
DEBUGLOG(4, "Consume header");
FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, zds->ddict));
if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
zds->stage = ZSTDds_skipFrame;
} else {
FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize));
zds->expected = ZSTD_blockHeaderSize;
zds->stage = ZSTDds_decodeBlockHeader;
}
2016-08-12 04:04:27 -07:00
2017-06-20 17:44:55 -07:00
/* control buffer memory usage */
DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)",
(U32)(zds->fParams.windowSize >>10),
(U32)(zds->maxWindowSize >> 10) );
2016-08-12 04:04:27 -07:00
zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
frameParameter_windowTooLarge);
2016-08-12 04:04:27 -07:00
/* Adapt buffer sizes to frame header instructions */
{ size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
size_t const neededOutBuffSize = ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize);
if ((zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize)) {
size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
DEBUGLOG(4, "inBuff : from %u to %u",
(U32)zds->inBuffSize, (U32)neededInBuffSize);
DEBUGLOG(4, "outBuff : from %u to %u",
(U32)zds->outBuffSize, (U32)neededOutBuffSize);
2017-05-24 17:41:41 -07:00
if (zds->staticSize) { /* static DCtx */
DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */
RETURN_ERROR_IF(
bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
memory_allocation);
2017-05-24 17:41:41 -07:00
} else {
ZSTD_free(zds->inBuff, zds->customMem);
zds->inBuffSize = 0;
2017-05-24 17:41:41 -07:00
zds->outBuffSize = 0;
zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem);
RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation);
}
zds->inBuffSize = neededInBuffSize;
zds->outBuff = zds->inBuff + zds->inBuffSize;
zds->outBuffSize = neededOutBuffSize;
2016-08-12 04:04:27 -07:00
} }
zds->streamStage = zdss_read;
2017-08-08 12:32:26 -07:00
/* fall-through */
2016-08-12 04:04:27 -07:00
case zdss_read:
DEBUGLOG(5, "stage zdss_read");
{ size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
2016-08-12 04:04:27 -07:00
if (neededInSize==0) { /* end of frame */
zds->streamStage = zdss_init;
2016-08-12 04:04:27 -07:00
someMoreWork = 0;
break;
}
if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */
int const isSkipFrame = ZSTD_isSkipFrame(zds);
size_t const decodedSize = ZSTD_decompressContinue(zds,
2016-08-12 04:04:27 -07:00
zds->outBuff + zds->outStart, (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart),
ip, neededInSize);
if (ZSTD_isError(decodedSize)) return decodedSize;
ip += neededInSize;
if (!decodedSize && !isSkipFrame) break; /* this was just a header */
zds->outEnd = zds->outStart + decodedSize;
zds->streamStage = zdss_flush;
2016-08-12 04:04:27 -07:00
break;
2017-05-24 17:41:41 -07:00
} }
if (ip==iend) { someMoreWork = 0; break; } /* no more input */
zds->streamStage = zdss_load;
2017-08-08 12:32:26 -07:00
/* fall-through */
2018-03-20 17:48:22 -07:00
2016-08-12 04:04:27 -07:00
case zdss_load:
{ size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
2017-11-01 12:49:51 -07:00
size_t const toLoad = neededInSize - zds->inPos;
int const isSkipFrame = ZSTD_isSkipFrame(zds);
2016-08-12 04:04:27 -07:00
size_t loadedSize;
2017-11-01 12:49:51 -07:00
if (isSkipFrame) {
loadedSize = MIN(toLoad, (size_t)(iend-ip));
} else {
RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
corruption_detected,
"should never happen");
2017-11-01 12:49:51 -07:00
loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip);
}
2016-08-12 04:04:27 -07:00
ip += loadedSize;
zds->inPos += loadedSize;
if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */
/* decode loaded input */
2017-11-01 12:49:51 -07:00
{ size_t const decodedSize = ZSTD_decompressContinue(zds,
2016-08-12 04:04:27 -07:00
zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart,
zds->inBuff, neededInSize);
if (ZSTD_isError(decodedSize)) return decodedSize;
zds->inPos = 0; /* input is consumed */
if (!decodedSize && !isSkipFrame) { zds->streamStage = zdss_read; break; } /* this was just a header */
2016-08-12 04:04:27 -07:00
zds->outEnd = zds->outStart + decodedSize;
} }
2017-05-24 17:41:41 -07:00
zds->streamStage = zdss_flush;
2017-08-08 12:32:26 -07:00
/* fall-through */
2018-03-20 17:48:22 -07:00
2016-08-12 04:04:27 -07:00
case zdss_flush:
{ size_t const toFlushSize = zds->outEnd - zds->outStart;
size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize);
op += flushedSize;
zds->outStart += flushedSize;
if (flushedSize == toFlushSize) { /* flush completed */
zds->streamStage = zdss_read;
if ( (zds->outBuffSize < zds->fParams.frameContentSize)
&& (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
(int)(zds->outBuffSize - zds->outStart),
(U32)zds->fParams.blockSizeMax);
2016-08-12 04:04:27 -07:00
zds->outStart = zds->outEnd = 0;
}
2016-08-12 04:04:27 -07:00
break;
2017-05-24 17:41:41 -07:00
} }
/* cannot complete flush */
someMoreWork = 0;
break;
default:
assert(0); /* impossible */
RETURN_ERROR(GENERIC); /* some compiler require default to do something */
2016-08-12 04:04:27 -07:00
} }
/* result */
input->pos = (size_t)(ip - (const char*)(input->src));
output->pos = (size_t)(op - (char*)(output->dst));
if ((ip==istart) && (op==ostart)) { /* no forward progress */
zds->noForwardProgress ++;
if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
RETURN_ERROR_IF(op==oend, dstSize_tooSmall);
RETURN_ERROR_IF(ip==iend, srcSize_wrong);
assert(0);
}
} else {
zds->noForwardProgress = 0;
}
{ size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
if (!nextSrcSizeHint) { /* frame fully decoded */
if (zds->outEnd == zds->outStart) { /* output fully flushed */
if (zds->hostageByte) {
if (input->pos >= input->size) {
/* can't release hostage (not present) */
zds->streamStage = zdss_read;
return 1;
}
input->pos++; /* release hostage */
2017-05-24 17:41:41 -07:00
} /* zds->hostageByte */
return 0;
2017-05-24 17:41:41 -07:00
} /* zds->outEnd == zds->outStart */
if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */
zds->hostageByte=1;
}
return 1;
2017-05-24 17:41:41 -07:00
} /* nextSrcSizeHint==0 */
nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */
assert(zds->inPos <= nextSrcSizeHint);
nextSrcSizeHint -= zds->inPos; /* part already loaded*/
2016-08-12 04:04:27 -07:00
return nextSrcSizeHint;
}
}
size_t ZSTD_decompressStream_simpleArgs (
ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity, size_t* dstPos,
const void* src, size_t srcSize, size_t* srcPos)
{
ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
ZSTD_inBuffer input = { src, srcSize, *srcPos };
/* ZSTD_compress_generic() will check validity of dstPos and srcPos */
size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
*dstPos = output.pos;
*srcPos = input.pos;
return cErr;
}