Merge pull request #1011 from terrelln/bmi2

[decompress] Support BMI2
dev
Yann Collet 2018-02-15 11:40:34 -08:00 committed by GitHub
commit 7117ea8bec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 1135 additions and 693 deletions

View File

@ -25,6 +25,9 @@ cxx_library(
name='decompress',
header_namespace='',
visibility=['PUBLIC'],
headers=subdir_glob([
('decompress', '*_impl.h'),
]),
srcs=glob(['decompress/zstd*.c']),
deps=[
':common',
@ -80,6 +83,15 @@ cxx_library(
]),
)
cxx_library(
name='cpu',
header_namespace='',
visibility=['PUBLIC'],
exported_headers=subdir_glob([
('common', 'cpu.h'),
]),
)
cxx_library(
name='bitstream',
header_namespace='',
@ -196,6 +208,7 @@ cxx_library(
deps=[
':bitstream',
':compiler',
':cpu',
':entropy',
':errors',
':mem',

View File

@ -63,6 +63,24 @@
# endif
#endif
/* target attribute */
#if defined(__GNUC__)
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
#else
# define TARGET_ATTRIBUTE(target)
#endif
/* Enable runtime BMI2 dispatch based on the CPU.
* Enabled for clang/gcc on x86 when BMI2 isn't enabled by default.
*/
#ifndef DYNAMIC_BMI2
#if defined(__GNUC__) && (defined(__x86_64__) || defined(_M_X86)) && !defined(__BMI2__)
# define DYNAMIC_BMI2 1
#else
# define DYNAMIC_BMI2 0
#endif
#endif
/* prefetch */
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */

216
lib/common/cpu.h Normal file
View File

@ -0,0 +1,216 @@
/*
* Copyright (c) 2018-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_COMMON_CPU_H
#define ZSTD_COMMON_CPU_H
/**
* Implementation taken from folly/CpuId.h
* https://github.com/facebook/folly/blob/master/folly/CpuId.h
*/
#include <string.h>
#include "mem.h"
#ifdef _MSC_VER
#include <intrin.h>
#endif
typedef struct {
U32 f1c;
U32 f1d;
U32 f7b;
U32 f7c;
} ZSTD_cpuid_t;
MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
U32 f1c = 0;
U32 f1d = 0;
U32 f7b = 0;
U32 f7c = 0;
#ifdef _MSC_VER
int reg[4];
__cpuid((int*)reg, 0);
{
int const n = reg[0];
if (n >= 1) {
__cpuid((int*)reg, 1);
f1c = (U32)reg[2];
f1d = (U32)reg[3];
}
if (n >= 7) {
__cpuidex((int*)reg, 7, 0);
f7b = (U32)reg[1];
f7c = (U32)reg[2];
}
}
#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
/* The following block like the normal cpuid branch below, but gcc
* reserves ebx for use of its pic register so we must specially
* handle the save and restore to avoid clobbering the register
*/
U32 n;
__asm__(
"pushl %%ebx\n\t"
"cpuid\n\t"
"popl %%ebx\n\t"
: "=a"(n)
: "a"(0)
: "ecx", "edx");
if (n >= 1) {
U32 f1a;
__asm__(
"pushl %%ebx\n\t"
"cpuid\n\t"
"popl %%ebx\n\t"
: "=a"(f1a), "=c"(f1c), "=d"(f1d)
: "a"(1)
:);
}
if (n >= 7) {
__asm__(
"pushl %%ebx\n\t"
"cpuid\n\t"
"movl %%ebx, %%eax\n\r"
"popl %%ebx"
: "=a"(f7b), "=c"(f7c)
: "a"(7), "c"(0)
: "edx");
}
#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
U32 n;
__asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
if (n >= 1) {
U32 f1a;
__asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
}
if (n >= 7) {
U32 f7a;
__asm__("cpuid"
: "=a"(f7a), "=b"(f7b), "=c"(f7c)
: "a"(7), "c"(0)
: "edx");
}
#endif
{
ZSTD_cpuid_t cpuid;
cpuid.f1c = f1c;
cpuid.f1d = f1d;
cpuid.f7b = f7b;
cpuid.f7c = f7c;
return cpuid;
}
}
#define X(name, r, bit) \
MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \
return ((cpuid.r) & (1U << bit)) != 0; \
}
/* cpuid(1): Processor Info and Feature Bits. */
#define C(name, bit) X(name, f1c, bit)
C(sse3, 0)
C(pclmuldq, 1)
C(dtes64, 2)
C(monitor, 3)
C(dscpl, 4)
C(vmx, 5)
C(smx, 6)
C(eist, 7)
C(tm2, 8)
C(ssse3, 9)
C(cnxtid, 10)
C(fma, 12)
C(cx16, 13)
C(xtpr, 14)
C(pdcm, 15)
C(pcid, 17)
C(dca, 18)
C(sse41, 19)
C(sse42, 20)
C(x2apic, 21)
C(movbe, 22)
C(popcnt, 23)
C(tscdeadline, 24)
C(aes, 25)
C(xsave, 26)
C(osxsave, 27)
C(avx, 28)
C(f16c, 29)
C(rdrand, 30)
#undef C
#define D(name, bit) X(name, f1d, bit)
D(fpu, 0)
D(vme, 1)
D(de, 2)
D(pse, 3)
D(tsc, 4)
D(msr, 5)
D(pae, 6)
D(mce, 7)
D(cx8, 8)
D(apic, 9)
D(sep, 11)
D(mtrr, 12)
D(pge, 13)
D(mca, 14)
D(cmov, 15)
D(pat, 16)
D(pse36, 17)
D(psn, 18)
D(clfsh, 19)
D(ds, 21)
D(acpi, 22)
D(mmx, 23)
D(fxsr, 24)
D(sse, 25)
D(sse2, 26)
D(ss, 27)
D(htt, 28)
D(tm, 29)
D(pbe, 31)
#undef D
/* cpuid(7): Extended Features. */
#define B(name, bit) X(name, f7b, bit)
B(bmi1, 3)
B(hle, 4)
B(avx2, 5)
B(smep, 7)
B(bmi2, 8)
B(erms, 9)
B(invpcid, 10)
B(rtm, 11)
B(mpx, 14)
B(avx512f, 16)
B(avx512dq, 17)
B(rdseed, 18)
B(adx, 19)
B(smap, 20)
B(avx512ifma, 21)
B(pcommit, 22)
B(clflushopt, 23)
B(clwb, 24)
B(avx512pf, 26)
B(avx512er, 27)
B(avx512cd, 28)
B(sha, 29)
B(avx512bw, 30)
B(avx512vl, 31)
#undef B
#define C(name, bit) X(name, f7c, bit)
C(prefetchwt1, 0)
C(avx512vbmi, 1)
#undef C
#undef X
#endif /* ZSTD_COMMON_CPU_H */

View File

@ -295,6 +295,14 @@ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cS
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
/* BMI2 variants.
* If the CPU has BMI2 support pass bmi2=1, otherwise pass bmi2=0.
*/
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
#endif /* HUF_STATIC_LINKING_ONLY */
#if defined (__cplusplus)

View File

@ -144,72 +144,54 @@ size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
workSpace, sizeof(workSpace));
}
typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
{
size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
BYTE const c = dt[val].byte;
BIT_skipBits(Dstream, dt[val].nbBits);
return c;
}
#define FUNCTION(fn) fn##_default
#define TARGET
#include "huf_decompress_impl.h"
#undef TARGET
#undef FUNCTION
#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
*ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
#if DYNAMIC_BMI2
#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
#define FUNCTION(fn) fn##_bmi2
#define TARGET TARGET_ATTRIBUTE("bmi2")
#include "huf_decompress_impl.h"
#undef TARGET
#undef FUNCTION
#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
if (MEM_64bits()) \
HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
#endif
HINT_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
{
BYTE* const pStart = p;
/* up to 4 symbols at a time */
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) {
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
const void *cSrc,
size_t cSrcSize,
const HUF_DTable *DTable);
#if DYNAMIC_BMI2
#define X(fn) \
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
{ \
if (bmi2) { \
return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
} \
return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
}
#else
#define X(fn) \
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
{ \
(void)bmi2; \
return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
}
#endif
/* closer to the end */
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
X(HUF_decompress1X2_usingDTable_internal)
X(HUF_decompress4X2_usingDTable_internal)
X(HUF_decompress1X4_usingDTable_internal)
X(HUF_decompress4X4_usingDTable_internal)
/* no more data to retrieve from bitstream, hence no need to reload */
while (p < pEnd)
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
return pEnd-pStart;
}
static size_t HUF_decompress1X2_usingDTable_internal(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const HUF_DTable* DTable)
{
BYTE* op = (BYTE*)dst;
BYTE* const oend = op + dstSize;
const void* dtPtr = DTable + 1;
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
BIT_DStream_t bitD;
DTableDesc const dtd = HUF_getDTableDesc(DTable);
U32 const dtLog = dtd.tableLog;
{ size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
if (HUF_isError(errorCode)) return errorCode; }
HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
/* check */
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
return dstSize;
}
#undef X
size_t HUF_decompress1X2_usingDTable(
void* dst, size_t dstSize,
@ -218,7 +200,7 @@ size_t HUF_decompress1X2_usingDTable(
{
DTableDesc dtd = HUF_getDTableDesc(DTable);
if (dtd.tableType != 0) return ERROR(GENERIC);
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
@ -232,7 +214,7 @@ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize;
return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
}
@ -250,100 +232,6 @@ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS
return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
}
static size_t HUF_decompress4X2_usingDTable_internal(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const HUF_DTable* DTable)
{
/* Check */
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
{ const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const void* const dtPtr = DTable + 1;
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
/* Init */
BIT_DStream_t bitD1;
BIT_DStream_t bitD2;
BIT_DStream_t bitD3;
BIT_DStream_t bitD4;
size_t const length1 = MEM_readLE16(istart);
size_t const length2 = MEM_readLE16(istart+2);
size_t const length3 = MEM_readLE16(istart+4);
size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
const BYTE* const istart1 = istart + 6; /* jumpTable */
const BYTE* const istart2 = istart1 + length1;
const BYTE* const istart3 = istart2 + length2;
const BYTE* const istart4 = istart3 + length3;
const size_t segmentSize = (dstSize+3) / 4;
BYTE* const opStart2 = ostart + segmentSize;
BYTE* const opStart3 = opStart2 + segmentSize;
BYTE* const opStart4 = opStart3 + segmentSize;
BYTE* op1 = ostart;
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
U32 endSignal;
DTableDesc const dtd = HUF_getDTableDesc(DTable);
U32 const dtLog = dtd.tableLog;
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
{ size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
if (HUF_isError(errorCode)) return errorCode; }
/* 16-32 symbols per loop (4-8 symbols per stream) */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) {
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
}
/* check corruption */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
/* note : op4 supposed already verified within main loop */
/* finish bitStreams one by one */
HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
/* check */
endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
if (!endSignal) return ERROR(corruption_detected);
/* decoded size */
return dstSize;
}
}
size_t HUF_decompress4X2_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
@ -351,13 +239,12 @@ size_t HUF_decompress4X2_usingDTable(
{
DTableDesc dtd = HUF_getDTableDesc(DTable);
if (dtd.tableType != 0) return ERROR(GENERIC);
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
void* workSpace, size_t wkspSize)
void* workSpace, size_t wkspSize, int bmi2)
{
const BYTE* ip = (const BYTE*) cSrc;
@ -367,7 +254,14 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize;
return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx);
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
}
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
void* workSpace, size_t wkspSize)
{
return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
}
@ -387,8 +281,6 @@ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS
/* *************************/
/* double-symbols decoding */
/* *************************/
typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
/* HUF_fillDTableX4Level2() :
@ -588,95 +480,6 @@ size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
workSpace, sizeof(workSpace));
}
static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
{
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
memcpy(op, dt+val, 2);
BIT_skipBits(DStream, dt[val].nbBits);
return dt[val].length;
}
static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
{
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
memcpy(op, dt+val, 1);
if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
else {
if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
BIT_skipBits(DStream, dt[val].nbBits);
if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
} }
return 1;
}
#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
if (MEM_64bits()) \
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
HINT_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
{
BYTE* const pStart = p;
/* up to 8 symbols at a time */
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
}
/* closer to end : up to 2 symbols at a time */
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
while (p <= pEnd-2)
HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
if (p < pEnd)
p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
return p-pStart;
}
static size_t HUF_decompress1X4_usingDTable_internal(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const HUF_DTable* DTable)
{
BIT_DStream_t bitD;
/* Init */
{ size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
if (HUF_isError(errorCode)) return errorCode;
}
/* decode */
{ BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
DTableDesc const dtd = HUF_getDTableDesc(DTable);
HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
}
/* check */
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
/* decoded size */
return dstSize;
}
size_t HUF_decompress1X4_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
@ -684,7 +487,7 @@ size_t HUF_decompress1X4_usingDTable(
{
DTableDesc dtd = HUF_getDTableDesc(DTable);
if (dtd.tableType != 1) return ERROR(GENERIC);
return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
@ -699,7 +502,7 @@ size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize;
return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
}
@ -717,99 +520,6 @@ size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cS
return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
}
static size_t HUF_decompress4X4_usingDTable_internal(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const HUF_DTable* DTable)
{
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
{ const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const void* const dtPtr = DTable+1;
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
/* Init */
BIT_DStream_t bitD1;
BIT_DStream_t bitD2;
BIT_DStream_t bitD3;
BIT_DStream_t bitD4;
size_t const length1 = MEM_readLE16(istart);
size_t const length2 = MEM_readLE16(istart+2);
size_t const length3 = MEM_readLE16(istart+4);
size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
const BYTE* const istart1 = istart + 6; /* jumpTable */
const BYTE* const istart2 = istart1 + length1;
const BYTE* const istart3 = istart2 + length2;
const BYTE* const istart4 = istart3 + length3;
size_t const segmentSize = (dstSize+3) / 4;
BYTE* const opStart2 = ostart + segmentSize;
BYTE* const opStart3 = opStart2 + segmentSize;
BYTE* const opStart4 = opStart3 + segmentSize;
BYTE* op1 = ostart;
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
U32 endSignal;
DTableDesc const dtd = HUF_getDTableDesc(DTable);
U32 const dtLog = dtd.tableLog;
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
{ size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
if (HUF_isError(errorCode)) return errorCode; }
/* 16-32 symbols per loop (4-8 symbols per stream) */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
}
/* check corruption */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
/* note : op4 already verified within main loop */
/* finish bitStreams one by one */
HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
/* check */
{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
if (!endCheck) return ERROR(corruption_detected); }
/* decoded size */
return dstSize;
}
}
size_t HUF_decompress4X4_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
@ -817,13 +527,12 @@ size_t HUF_decompress4X4_usingDTable(
{
DTableDesc dtd = HUF_getDTableDesc(DTable);
if (dtd.tableType != 1) return ERROR(GENERIC);
return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
static size_t HUF_decompress4X4_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
void* workSpace, size_t wkspSize)
void* workSpace, size_t wkspSize, int bmi2)
{
const BYTE* ip = (const BYTE*) cSrc;
@ -833,7 +542,14 @@ size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize;
return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
}
size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
void* workSpace, size_t wkspSize)
{
return HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
}
@ -861,8 +577,8 @@ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
const HUF_DTable* DTable)
{
DTableDesc const dtd = HUF_getDTableDesc(DTable);
return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
@ -870,8 +586,8 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
const HUF_DTable* DTable)
{
DTableDesc const dtd = HUF_getDTableDesc(DTable);
return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
@ -994,3 +710,42 @@ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
workSpace, sizeof(workSpace));
}
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
{
DTableDesc const dtd = HUF_getDTableDesc(DTable);
return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
}
size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
{
const BYTE* ip = (const BYTE*) cSrc;
size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
if (HUF_isError(hSize)) return hSize;
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize;
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
}
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
{
DTableDesc const dtd = HUF_getDTableDesc(DTable);
return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
}
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
{
/* validation checks */
if (dstSize == 0) return ERROR(dstSize_tooSmall);
if (cSrcSize == 0) return ERROR(corruption_detected);
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
return algoNb ? HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
}
}

View File

@ -0,0 +1,365 @@
/*
* Copyright (c) 2018-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef FUNCTION
# error "FUNCTION(name) must be defined"
#endif
#ifndef TARGET
# error "TARGET must be defined"
#endif
static TARGET BYTE FUNCTION(HUF_decodeSymbolX2)(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
{
size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
BYTE const c = dt[val].byte;
BIT_skipBits(Dstream, dt[val].nbBits);
return c;
}
#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
*ptr++ = FUNCTION(HUF_decodeSymbolX2)(DStreamPtr, dt, dtLog)
#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
if (MEM_64bits()) \
HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
HINT_INLINE TARGET size_t FUNCTION(HUF_decodeStreamX2)(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
{
BYTE* const pStart = p;
/* up to 4 symbols at a time */
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) {
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
}
/* closer to the end */
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
/* no more data to retrieve from bitstream, hence no need to reload */
while (p < pEnd)
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
return pEnd-pStart;
}
static TARGET size_t FUNCTION(HUF_decompress1X2_usingDTable_internal)(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const HUF_DTable* DTable)
{
BYTE* op = (BYTE*)dst;
BYTE* const oend = op + dstSize;
const void* dtPtr = DTable + 1;
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
BIT_DStream_t bitD;
DTableDesc const dtd = HUF_getDTableDesc(DTable);
U32 const dtLog = dtd.tableLog;
{ size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
if (HUF_isError(errorCode)) return errorCode; }
FUNCTION(HUF_decodeStreamX2)(op, &bitD, oend, dt, dtLog);
/* check */
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
return dstSize;
}
static TARGET size_t
FUNCTION(HUF_decompress4X2_usingDTable_internal)(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const HUF_DTable* DTable)
{
/* Check */
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
{ const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const void* const dtPtr = DTable + 1;
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
/* Init */
BIT_DStream_t bitD1;
BIT_DStream_t bitD2;
BIT_DStream_t bitD3;
BIT_DStream_t bitD4;
size_t const length1 = MEM_readLE16(istart);
size_t const length2 = MEM_readLE16(istart+2);
size_t const length3 = MEM_readLE16(istart+4);
size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
const BYTE* const istart1 = istart + 6; /* jumpTable */
const BYTE* const istart2 = istart1 + length1;
const BYTE* const istart3 = istart2 + length2;
const BYTE* const istart4 = istart3 + length3;
const size_t segmentSize = (dstSize+3) / 4;
BYTE* const opStart2 = ostart + segmentSize;
BYTE* const opStart3 = opStart2 + segmentSize;
BYTE* const opStart4 = opStart3 + segmentSize;
BYTE* op1 = ostart;
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
U32 endSignal;
DTableDesc const dtd = HUF_getDTableDesc(DTable);
U32 const dtLog = dtd.tableLog;
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
{ size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
if (HUF_isError(errorCode)) return errorCode; }
/* 16-32 symbols per loop (4-8 symbols per stream) */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) {
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
}
/* check corruption */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
/* note : op4 supposed already verified within main loop */
/* finish bitStreams one by one */
FUNCTION(HUF_decodeStreamX2)(op1, &bitD1, opStart2, dt, dtLog);
FUNCTION(HUF_decodeStreamX2)(op2, &bitD2, opStart3, dt, dtLog);
FUNCTION(HUF_decodeStreamX2)(op3, &bitD3, opStart4, dt, dtLog);
FUNCTION(HUF_decodeStreamX2)(op4, &bitD4, oend, dt, dtLog);
/* check */
endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
if (!endSignal) return ERROR(corruption_detected);
/* decoded size */
return dstSize;
}
}
#undef HUF_DECODE_SYMBOLX2_0
#undef HUF_DECODE_SYMBOLX2_1
#undef HUF_DECODE_SYMBOLX2_2
static TARGET U32 FUNCTION(HUF_decodeSymbolX4)(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
{
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
memcpy(op, dt+val, 2);
BIT_skipBits(DStream, dt[val].nbBits);
return dt[val].length;
}
static TARGET U32 FUNCTION(HUF_decodeLastSymbolX4)(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
{
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
memcpy(op, dt+val, 1);
if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
else {
if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
BIT_skipBits(DStream, dt[val].nbBits);
if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
} }
return 1;
}
#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
ptr += FUNCTION(HUF_decodeSymbolX4)(ptr, DStreamPtr, dt, dtLog)
#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
ptr += FUNCTION(HUF_decodeSymbolX4)(ptr, DStreamPtr, dt, dtLog)
#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
if (MEM_64bits()) \
ptr += FUNCTION(HUF_decodeSymbolX4)(ptr, DStreamPtr, dt, dtLog)
HINT_INLINE TARGET size_t FUNCTION(HUF_decodeStreamX4)(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
{
BYTE* const pStart = p;
/* up to 8 symbols at a time */
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
}
/* closer to end : up to 2 symbols at a time */
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
while (p <= pEnd-2)
HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
if (p < pEnd)
p += FUNCTION(HUF_decodeLastSymbolX4)(p, bitDPtr, dt, dtLog);
return p-pStart;
}
static TARGET size_t FUNCTION(HUF_decompress1X4_usingDTable_internal)(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const HUF_DTable* DTable)
{
BIT_DStream_t bitD;
/* Init */
{ size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
if (HUF_isError(errorCode)) return errorCode;
}
/* decode */
{ BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
DTableDesc const dtd = HUF_getDTableDesc(DTable);
FUNCTION(HUF_decodeStreamX4)(ostart, &bitD, oend, dt, dtd.tableLog);
}
/* check */
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
/* decoded size */
return dstSize;
}
static TARGET size_t FUNCTION(HUF_decompress4X4_usingDTable_internal)(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const HUF_DTable* DTable)
{
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
{ const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const void* const dtPtr = DTable+1;
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
/* Init */
BIT_DStream_t bitD1;
BIT_DStream_t bitD2;
BIT_DStream_t bitD3;
BIT_DStream_t bitD4;
size_t const length1 = MEM_readLE16(istart);
size_t const length2 = MEM_readLE16(istart+2);
size_t const length3 = MEM_readLE16(istart+4);
size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
const BYTE* const istart1 = istart + 6; /* jumpTable */
const BYTE* const istart2 = istart1 + length1;
const BYTE* const istart3 = istart2 + length2;
const BYTE* const istart4 = istart3 + length3;
size_t const segmentSize = (dstSize+3) / 4;
BYTE* const opStart2 = ostart + segmentSize;
BYTE* const opStart3 = opStart2 + segmentSize;
BYTE* const opStart4 = opStart3 + segmentSize;
BYTE* op1 = ostart;
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
U32 endSignal;
DTableDesc const dtd = HUF_getDTableDesc(DTable);
U32 const dtLog = dtd.tableLog;
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
{ size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
if (HUF_isError(errorCode)) return errorCode; }
{ size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
if (HUF_isError(errorCode)) return errorCode; }
/* 16-32 symbols per loop (4-8 symbols per stream) */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
}
/* check corruption */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
/* note : op4 already verified within main loop */
/* finish bitStreams one by one */
FUNCTION(HUF_decodeStreamX4)(op1, &bitD1, opStart2, dt, dtLog);
FUNCTION(HUF_decodeStreamX4)(op2, &bitD2, opStart3, dt, dtLog);
FUNCTION(HUF_decodeStreamX4)(op3, &bitD3, opStart4, dt, dtLog);
FUNCTION(HUF_decodeStreamX4)(op4, &bitD4, oend, dt, dtLog);
/* check */
{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
if (!endCheck) return ERROR(corruption_detected); }
/* decoded size */
return dstSize;
}
}
#undef HUF_DECODE_SYMBOLX4_0
#undef HUF_DECODE_SYMBOLX4_1
#undef HUF_DECODE_SYMBOLX4_2

View File

@ -43,6 +43,7 @@
* Dependencies
*********************************************************/
#include <string.h> /* memcpy, memmove, memset */
#include "cpu.h"
#include "mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
@ -131,6 +132,7 @@ struct ZSTD_DCtx_s
size_t litSize;
size_t rleSize;
size_t staticSize;
int bmi2;
/* streaming */
ZSTD_DDict* ddictLocal;
@ -188,6 +190,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
dctx->inBuffSize = 0;
dctx->outBuffSize = 0;
dctx->streamStage = zdss_init;
dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
}
ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
@ -586,13 +589,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
if (HUF_isError((litEncType==set_repeat) ?
( singleStream ?
HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) :
HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) ) :
HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) :
HUF_decompress4X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) ) :
( singleStream ?
HUF_decompress1X2_DCtx_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
dctx->entropy.workspace, sizeof(dctx->entropy.workspace)) :
HUF_decompress4X_hufOnly_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
dctx->entropy.workspace, sizeof(dctx->entropy.workspace)))))
HUF_decompress1X2_DCtx_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2) :
HUF_decompress4X_hufOnly_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2))))
return ERROR(corruption_detected);
dctx->litPtr = dctx->litBuffer;
@ -1063,105 +1066,6 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
}
static void
ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
{
ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
U32 const nbBits = DInfo.nbBits;
size_t const lowBits = BIT_readBits(bitD, nbBits);
DStatePtr->state = DInfo.nextState + lowBits;
}
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
* bits before reloading. This value is the maximum number of bytes we read
* after reloading when we are decoding long offets.
*/
#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
(ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
: 0)
static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
{
seq_t seq;
U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
U32 const totalBits = llBits+mlBits+ofBits;
U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
/* sequence */
{ size_t offset;
if (!ofBits)
offset = 0;
else {
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
assert(ofBits <= MaxOff);
if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
BIT_reloadDStream(&seqState->DStream);
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
} else {
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
}
}
if (ofBits <= 1) {
offset += (llBase==0);
if (offset) {
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
seqState->prevOffset[1] = seqState->prevOffset[0];
seqState->prevOffset[0] = offset = temp;
} else { /* offset == 0 */
offset = seqState->prevOffset[0];
}
} else {
seqState->prevOffset[2] = seqState->prevOffset[1];
seqState->prevOffset[1] = seqState->prevOffset[0];
seqState->prevOffset[0] = offset;
}
seq.offset = offset;
}
seq.matchLength = mlBase
+ ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
BIT_reloadDStream(&seqState->DStream);
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
BIT_reloadDStream(&seqState->DStream);
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
seq.litLength = llBase
+ ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
if (MEM_32bits())
BIT_reloadDStream(&seqState->DStream);
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
/* ANS state update */
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
return seq;
}
HINT_INLINE
size_t ZSTD_execSequence(BYTE* op,
BYTE* const oend, seq_t sequence,
@ -1243,162 +1147,6 @@ size_t ZSTD_execSequence(BYTE* op,
}
static void
ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
{
const void* ptr = dt;
const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
(U32)DStatePtr->state, DTableH->tableLog);
BIT_reloadDStream(bitD);
DStatePtr->table = dt + 1;
}
static size_t ZSTD_decompressSequences(
ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize,
const ZSTD_longOffset_e isLongOffset)
{
const BYTE* ip = (const BYTE*)seqStart;
const BYTE* const iend = ip + seqSize;
BYTE* const ostart = (BYTE* const)dst;
BYTE* const oend = ostart + maxDstSize;
BYTE* op = ostart;
const BYTE* litPtr = dctx->litPtr;
const BYTE* const litEnd = litPtr + dctx->litSize;
const BYTE* const base = (const BYTE*) (dctx->base);
const BYTE* const vBase = (const BYTE*) (dctx->vBase);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
int nbSeq;
DEBUGLOG(5, "ZSTD_decompressSequences");
/* Build Decoding Tables */
{ size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
DEBUGLOG(5, "ZSTD_decodeSeqHeaders: size=%u, nbSeq=%i",
(U32)seqHSize, nbSeq);
if (ZSTD_isError(seqHSize)) return seqHSize;
ip += seqHSize;
}
/* Regen sequences */
if (nbSeq) {
seqState_t seqState;
dctx->fseEntropy = 1;
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
nbSeq--;
{ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
op += oneSeqSize;
} }
/* check if reached exact end */
DEBUGLOG(5, "ZSTD_decompressSequences: after decode loop, remaining nbSeq : %i", nbSeq);
if (nbSeq) return ERROR(corruption_detected);
/* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
}
/* last literal segment */
{ size_t const lastLLSize = litEnd - litPtr;
if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
memcpy(op, litPtr, lastLLSize);
op += lastLLSize;
}
return op-ostart;
}
HINT_INLINE
seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
{
seq_t seq;
U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
U32 const totalBits = llBits+mlBits+ofBits;
U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
/* sequence */
{ size_t offset;
if (!ofBits)
offset = 0;
else {
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
assert(ofBits <= MaxOff);
if (MEM_32bits() && longOffsets) {
U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
} else {
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
}
}
if (ofBits <= 1) {
offset += (llBase==0);
if (offset) {
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
seqState->prevOffset[1] = seqState->prevOffset[0];
seqState->prevOffset[0] = offset = temp;
} else {
offset = seqState->prevOffset[0];
}
} else {
seqState->prevOffset[2] = seqState->prevOffset[1];
seqState->prevOffset[1] = seqState->prevOffset[0];
seqState->prevOffset[0] = offset;
}
seq.offset = offset;
}
seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
BIT_reloadDStream(&seqState->DStream);
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
BIT_reloadDStream(&seqState->DStream);
/* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
if (MEM_32bits())
BIT_reloadDStream(&seqState->DStream);
{ size_t const pos = seqState->pos + seq.litLength;
const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
* No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
seqState->pos = pos + seq.matchLength;
}
/* ANS state update */
ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
return seq;
}
HINT_INLINE
size_t ZSTD_execSequenceLong(BYTE* op,
BYTE* const oend, seq_t sequence,
@ -1478,88 +1226,51 @@ size_t ZSTD_execSequenceLong(BYTE* op,
return sequenceLength;
}
static size_t ZSTD_decompressSequencesLong(
ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize,
const ZSTD_longOffset_e isLongOffset)
typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
#define FUNCTION(fn) fn##_default
#define TARGET
#include "zstd_decompress_impl.h"
#undef TARGET
#undef FUNCTION
#if DYNAMIC_BMI2
#define FUNCTION(fn) fn##_bmi2
#define TARGET TARGET_ATTRIBUTE("bmi2")
#include "zstd_decompress_impl.h"
#undef TARGET
#undef FUNCTION
#endif
typedef size_t (*ZSTD_decompressSequences_t)(
ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart,
size_t seqSize, const ZSTD_longOffset_e isLongOffset);
static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize,
const ZSTD_longOffset_e isLongOffset)
{
const BYTE* ip = (const BYTE*)seqStart;
const BYTE* const iend = ip + seqSize;
BYTE* const ostart = (BYTE* const)dst;
BYTE* const oend = ostart + maxDstSize;
BYTE* op = ostart;
const BYTE* litPtr = dctx->litPtr;
const BYTE* const litEnd = litPtr + dctx->litSize;
const BYTE* const prefixStart = (const BYTE*) (dctx->base);
const BYTE* const dictStart = (const BYTE*) (dctx->vBase);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
int nbSeq;
/* Build Decoding Tables */
{ size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
if (ZSTD_isError(seqHSize)) return seqHSize;
ip += seqHSize;
#if DYNAMIC_BMI2
if (dctx->bmi2) {
return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, isLongOffset);
}
/* Regen sequences */
if (nbSeq) {
#define STORED_SEQS 4
#define STOSEQ_MASK (STORED_SEQS-1)
#define ADVANCED_SEQS 4
seq_t sequences[STORED_SEQS];
int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
seqState_t seqState;
int seqNb;
dctx->fseEntropy = 1;
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
seqState.prefixStart = prefixStart;
seqState.pos = (size_t)(op-prefixStart);
seqState.dictEnd = dictEnd;
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
/* prepare in advance */
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
}
if (seqNb<seqAdvance) return ERROR(corruption_detected);
/* decode and decompress */
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
PREFETCH(sequence.match); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
sequences[seqNb&STOSEQ_MASK] = sequence;
op += oneSeqSize;
}
if (seqNb<nbSeq) return ERROR(corruption_detected);
/* finish queue */
seqNb -= seqAdvance;
for ( ; seqNb<nbSeq ; seqNb++) {
size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
op += oneSeqSize;
}
/* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
}
/* last literal segment */
{ size_t const lastLLSize = litEnd - litPtr;
if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
memcpy(op, litPtr, lastLLSize);
op += lastLLSize;
}
return op-ostart;
#endif
return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, isLongOffset);
}
static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize,
const ZSTD_longOffset_e isLongOffset)
{
#if DYNAMIC_BMI2
if (dctx->bmi2) {
return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, isLongOffset);
}
#endif
return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, isLongOffset);
}
static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,

View File

@ -0,0 +1,356 @@
/*
* Copyright (c) 2018-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef FUNCTION
# error "FUNCTION(name) must be defined"
#endif
#ifndef TARGET
# error "TARGET must be defined"
#endif
static TARGET void
FUNCTION(ZSTD_updateFseState)(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
{
ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
U32 const nbBits = DInfo.nbBits;
size_t const lowBits = BIT_readBits(bitD, nbBits);
DStatePtr->state = DInfo.nextState + lowBits;
}
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
* bits before reloading. This value is the maximum number of bytes we read
* after reloading when we are decoding long offets.
*/
#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
(ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
: 0)
static TARGET seq_t
FUNCTION(ZSTD_decodeSequence)(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
{
seq_t seq;
U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
U32 const totalBits = llBits+mlBits+ofBits;
U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
/* sequence */
{ size_t offset;
if (!ofBits)
offset = 0;
else {
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
assert(ofBits <= MaxOff);
if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
BIT_reloadDStream(&seqState->DStream);
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
} else {
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
}
}
if (ofBits <= 1) {
offset += (llBase==0);
if (offset) {
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
seqState->prevOffset[1] = seqState->prevOffset[0];
seqState->prevOffset[0] = offset = temp;
} else { /* offset == 0 */
offset = seqState->prevOffset[0];
}
} else {
seqState->prevOffset[2] = seqState->prevOffset[1];
seqState->prevOffset[1] = seqState->prevOffset[0];
seqState->prevOffset[0] = offset;
}
seq.offset = offset;
}
seq.matchLength = mlBase
+ ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
BIT_reloadDStream(&seqState->DStream);
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
BIT_reloadDStream(&seqState->DStream);
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
seq.litLength = llBase
+ ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
if (MEM_32bits())
BIT_reloadDStream(&seqState->DStream);
DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
/* ANS state update */
FUNCTION(ZSTD_updateFseState)(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
FUNCTION(ZSTD_updateFseState)(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
FUNCTION(ZSTD_updateFseState)(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
return seq;
}
HINT_INLINE seq_t
FUNCTION(ZSTD_decodeSequenceLong)(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
{
seq_t seq;
U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
U32 const totalBits = llBits+mlBits+ofBits;
U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
/* sequence */
{ size_t offset;
if (!ofBits)
offset = 0;
else {
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
assert(ofBits <= MaxOff);
if (MEM_32bits() && longOffsets) {
U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
} else {
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
}
}
if (ofBits <= 1) {
offset += (llBase==0);
if (offset) {
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
seqState->prevOffset[1] = seqState->prevOffset[0];
seqState->prevOffset[0] = offset = temp;
} else {
offset = seqState->prevOffset[0];
}
} else {
seqState->prevOffset[2] = seqState->prevOffset[1];
seqState->prevOffset[1] = seqState->prevOffset[0];
seqState->prevOffset[0] = offset;
}
seq.offset = offset;
}
seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
BIT_reloadDStream(&seqState->DStream);
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
BIT_reloadDStream(&seqState->DStream);
/* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
if (MEM_32bits())
BIT_reloadDStream(&seqState->DStream);
{ size_t const pos = seqState->pos + seq.litLength;
const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
* No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
seqState->pos = pos + seq.matchLength;
}
/* ANS state update */
FUNCTION(ZSTD_updateFseState)(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
FUNCTION(ZSTD_updateFseState)(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
FUNCTION(ZSTD_updateFseState)(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
return seq;
}
static TARGET void
FUNCTION(ZSTD_initFseState)(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
{
const void* ptr = dt;
const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
(U32)DStatePtr->state, DTableH->tableLog);
BIT_reloadDStream(bitD);
DStatePtr->table = dt + 1;
}
static TARGET
size_t FUNCTION(ZSTD_decompressSequences)(
ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize,
const ZSTD_longOffset_e isLongOffset)
{
const BYTE* ip = (const BYTE*)seqStart;
const BYTE* const iend = ip + seqSize;
BYTE* const ostart = (BYTE* const)dst;
BYTE* const oend = ostart + maxDstSize;
BYTE* op = ostart;
const BYTE* litPtr = dctx->litPtr;
const BYTE* const litEnd = litPtr + dctx->litSize;
const BYTE* const base = (const BYTE*) (dctx->base);
const BYTE* const vBase = (const BYTE*) (dctx->vBase);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
int nbSeq;
DEBUGLOG(5, "ZSTD_decompressSequences");
/* Build Decoding Tables */
{ size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
DEBUGLOG(5, "ZSTD_decodeSeqHeaders: size=%u, nbSeq=%i",
(U32)seqHSize, nbSeq);
if (ZSTD_isError(seqHSize)) return seqHSize;
ip += seqHSize;
}
/* Regen sequences */
if (nbSeq) {
seqState_t seqState;
dctx->fseEntropy = 1;
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
FUNCTION(ZSTD_initFseState)(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
FUNCTION(ZSTD_initFseState)(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
FUNCTION(ZSTD_initFseState)(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
nbSeq--;
{ seq_t const sequence = FUNCTION(ZSTD_decodeSequence)(&seqState, isLongOffset);
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
op += oneSeqSize;
} }
/* check if reached exact end */
DEBUGLOG(5, "ZSTD_decompressSequences: after decode loop, remaining nbSeq : %i", nbSeq);
if (nbSeq) return ERROR(corruption_detected);
/* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
}
/* last literal segment */
{ size_t const lastLLSize = litEnd - litPtr;
if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
memcpy(op, litPtr, lastLLSize);
op += lastLLSize;
}
return op-ostart;
}
static TARGET
size_t FUNCTION(ZSTD_decompressSequencesLong)(
ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize,
const ZSTD_longOffset_e isLongOffset)
{
const BYTE* ip = (const BYTE*)seqStart;
const BYTE* const iend = ip + seqSize;
BYTE* const ostart = (BYTE* const)dst;
BYTE* const oend = ostart + maxDstSize;
BYTE* op = ostart;
const BYTE* litPtr = dctx->litPtr;
const BYTE* const litEnd = litPtr + dctx->litSize;
const BYTE* const prefixStart = (const BYTE*) (dctx->base);
const BYTE* const dictStart = (const BYTE*) (dctx->vBase);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
int nbSeq;
/* Build Decoding Tables */
{ size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
if (ZSTD_isError(seqHSize)) return seqHSize;
ip += seqHSize;
}
/* Regen sequences */
if (nbSeq) {
#define STORED_SEQS 4
#define STOSEQ_MASK (STORED_SEQS-1)
#define ADVANCED_SEQS 4
seq_t sequences[STORED_SEQS];
int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
seqState_t seqState;
int seqNb;
dctx->fseEntropy = 1;
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
seqState.prefixStart = prefixStart;
seqState.pos = (size_t)(op-prefixStart);
seqState.dictEnd = dictEnd;
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
FUNCTION(ZSTD_initFseState)(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
FUNCTION(ZSTD_initFseState)(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
FUNCTION(ZSTD_initFseState)(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
/* prepare in advance */
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
sequences[seqNb] = FUNCTION(ZSTD_decodeSequenceLong)(&seqState, isLongOffset);
}
if (seqNb<seqAdvance) return ERROR(corruption_detected);
/* decode and decompress */
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
seq_t const sequence = FUNCTION(ZSTD_decodeSequenceLong)(&seqState, isLongOffset);
size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
PREFETCH(sequence.match); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
sequences[seqNb&STOSEQ_MASK] = sequence;
op += oneSeqSize;
}
if (seqNb<nbSeq) return ERROR(corruption_detected);
/* finish queue */
seqNb -= seqAdvance;
for ( ; seqNb<nbSeq ; seqNb++) {
size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
op += oneSeqSize;
}
/* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
#undef STORED_SEQS
#undef STOSEQ_MASK
#undef ADVANCED_SEQS
}
/* last literal segment */
{ size_t const lastLLSize = litEnd - litPtr;
if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
memcpy(op, litPtr, lastLLSize);
op += lastLLSize;
}
return op-ostart;
}