unified encoding types

dev
Yann Collet 2016-07-23 16:31:49 +02:00
parent 571a59034a
commit f8e7b5363f
5 changed files with 61 additions and 71 deletions

View File

@ -88,13 +88,13 @@ static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
typedef enum { bt_raw, bt_rle, bt_compressed, bt_end } blockType_t;
typedef enum { bt_raw, bt_rle, bt_compressed, bt_end } blockType_e;
#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
#define HufLog 12
typedef enum { lbt_huffman, lbt_repeat, lbt_raw, lbt_rle } litBlockType_t;
typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
#define LONGNBSEQ 0x7F00
@ -111,11 +111,6 @@ typedef enum { lbt_huffman, lbt_repeat, lbt_raw, lbt_rle } litBlockType_t;
#define LLFSELog 9
#define OffFSELog 8
#define FSE_ENCODING_RAW 0
#define FSE_ENCODING_RLE 1
#define FSE_ENCODING_STATIC 2
#define FSE_ENCODING_DYNAMIC 3
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
13,14,15,16 };

View File

@ -444,14 +444,14 @@ static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void
switch(flSize)
{
case 1: /* 2 - 1 - 5 */
ostart[0] = (BYTE)((U32)lbt_raw + (srcSize<<3));
ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
break;
case 2: /* 2 - 2 - 12 */
MEM_writeLE16(ostart, (U16)((U32)lbt_raw + (1<<2) + (srcSize<<4)));
MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
break;
default: /*note : should not be necessary : flSize is within {1,2,3} */
case 3: /* 2 - 2 - 20 */
MEM_writeLE32(ostart, (U32)((U32)lbt_raw + (3<<2) + (srcSize<<4)));
MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
break;
}
@ -469,14 +469,14 @@ static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, cons
switch(flSize)
{
case 1: /* 2 - 1 - 5 */
ostart[0] = (BYTE)((U32)lbt_rle + (srcSize<<3));
ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
break;
case 2: /* 2 - 2 - 12 */
MEM_writeLE16(ostart, (U16)((U32)lbt_rle + (1<<2) + (srcSize<<4)));
MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
break;
default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */
case 3: /* 2 - 2 - 20 */
MEM_writeLE32(ostart, (U32)((U32)lbt_rle + (3<<2) + (srcSize<<4)));
MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
break;
}
@ -495,7 +495,7 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
BYTE* const ostart = (BYTE*)dst;
U32 singleStream = srcSize < 256;
litBlockType_t hType = lbt_huffman;
symbolEncodingType_e hType = set_compressed;
size_t cLitSize;
@ -507,7 +507,7 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
if (zc->flagStaticTables && (lhSize==3)) {
hType = lbt_repeat;
hType = set_repeat;
singleStream = 1;
cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
} else {
@ -652,12 +652,12 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
*op++ = llCodeTable[0];
FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
LLtype = FSE_ENCODING_RLE;
LLtype = set_rle;
} else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
LLtype = FSE_ENCODING_STATIC;
LLtype = set_repeat;
} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog);
LLtype = FSE_ENCODING_RAW;
LLtype = set_basic;
} else {
size_t nbSeq_1 = nbSeq;
const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
@ -667,7 +667,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
op += NCountSize; }
FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
LLtype = FSE_ENCODING_DYNAMIC;
LLtype = set_compressed;
} }
/* CTable for Offsets */
@ -676,12 +676,12 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
*op++ = ofCodeTable[0];
FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
Offtype = FSE_ENCODING_RLE;
Offtype = set_rle;
} else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
Offtype = FSE_ENCODING_STATIC;
Offtype = set_repeat;
} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog);
Offtype = FSE_ENCODING_RAW;
Offtype = set_basic;
} else {
size_t nbSeq_1 = nbSeq;
const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
@ -691,7 +691,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
op += NCountSize; }
FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
Offtype = FSE_ENCODING_DYNAMIC;
Offtype = set_compressed;
} }
/* CTable for MatchLengths */
@ -700,12 +700,12 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
*op++ = *mlCodeTable;
FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
MLtype = FSE_ENCODING_RLE;
MLtype = set_rle;
} else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
MLtype = FSE_ENCODING_STATIC;
MLtype = set_repeat;
} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog);
MLtype = FSE_ENCODING_RAW;
MLtype = set_basic;
} else {
size_t nbSeq_1 = nbSeq;
const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
@ -715,7 +715,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
if (FSE_isError(NCountSize)) return ERROR(GENERIC);
op += NCountSize; }
FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
MLtype = FSE_ENCODING_DYNAMIC;
MLtype = set_compressed;
} }
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));

View File

@ -120,7 +120,7 @@ struct ZSTD_DCtx_s
size_t expected;
U32 rep[3];
ZSTD_frameParams fParams;
blockType_t bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
blockType_e bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
ZSTD_dStage stage;
U32 litEntropy;
U32 fseEntropy;
@ -427,7 +427,7 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t sr
typedef struct
{
blockType_t blockType;
blockType_e blockType;
U32 origSize;
} blockProperties_t;
@ -438,7 +438,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bp
if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
{ U32 const cBlockHeader = MEM_readLE24(src);
U32 const cSize = cBlockHeader >> 2;
bpPtr->blockType = (blockType_t)(cBlockHeader & 3);
bpPtr->blockType = (blockType_e)(cBlockHeader & 3);
bpPtr->origSize = cSize; /* only useful for RLE */
if (bpPtr->blockType == bt_end) return 0;
if (bpPtr->blockType == bt_rle) return 1;
@ -463,14 +463,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
{ const BYTE* const istart = (const BYTE*) src;
litBlockType_t const litBlockType = (litBlockType_t)(istart[0] & 3);
symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
switch(litBlockType)
switch(litEncType)
{
case lbt_repeat:
case set_repeat:
if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
/* fall-through */
case lbt_huffman:
case set_compressed:
if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
{ size_t lhSize, litSize, litCSize;
U32 singleStream=0;
@ -504,7 +504,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected);
if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
if (HUF_isError((litBlockType==lbt_repeat) ?
if (HUF_isError((litEncType==set_repeat) ?
( singleStream ?
HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTable) :
HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTable) ) :
@ -520,7 +520,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
return litCSize + lhSize;
}
case lbt_raw:
case set_basic:
{ size_t litSize, lhSize;
U32 const lhlCode = ((istart[0]) >> 2) & 3;
switch(lhlCode)
@ -554,7 +554,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
return lhSize+litSize;
}
case lbt_rle:
case set_rle:
{ U32 const lhlCode = ((istart[0]) >> 2) & 3;
size_t litSize, lhSize;
switch(lhlCode)
@ -592,25 +592,25 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
@return : nb bytes read from src,
or an error code if it fails, testable with ZSTD_isError()
*/
FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog,
FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, symbolEncodingType_e type, U32 max, U32 maxLog,
const void* src, size_t srcSize,
const S16* defaultNorm, U32 defaultLog, U32 flagRepeatTable)
{
switch(type)
{
case FSE_ENCODING_RLE :
case set_rle :
if (!srcSize) return ERROR(srcSize_wrong);
if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
FSE_buildDTable_rle(DTable, *(const BYTE*)src); /* if *src > max, data is corrupted */
return 1;
case FSE_ENCODING_RAW :
case set_basic :
FSE_buildDTable(DTable, defaultNorm, max, defaultLog);
return 0;
case FSE_ENCODING_STATIC:
case set_repeat:
if (!flagRepeatTable) return ERROR(corruption_detected);
return 0;
default : /* impossible */
case FSE_ENCODING_DYNAMIC :
case set_compressed :
{ U32 tableLog;
S16 norm[MaxSeq+1];
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
@ -646,9 +646,9 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeqPtr,
}
/* FSE table descriptors */
{ U32 const LLtype = *ip >> 6;
U32 const OFtype = (*ip >> 4) & 3;
U32 const MLtype = (*ip >> 2) & 3;
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
ip++;
/* check */

View File

@ -31,8 +31,9 @@
#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */
#include "mem.h"
#include "zstd_internal.h" /* ZSTD_blockHeaderSize, blockType_e, KB, MB */
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressBegin, ZSTD_compressContinue, etc. */
#include "zstd.h" /* ZSTD_VERSION_STRING */
#include "zstd.h" /* ZSTD_VERSION_STRING */
#define FSE_STATIC_LINKING_ONLY /* FSE_DTABLE_SIZE_U32 */
#include "fse.h"
#include "zbuff.h"
@ -46,10 +47,6 @@
#define AUTHOR "Yann Collet"
#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s (%s) ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION_STRING, (int)(sizeof(void*)*8), AUTHOR, __DATE__
#define KB *(1<<10)
#define MB *(1<<20)
#define NBLOOPS 6
#define TIMELOOP_S 2
@ -110,9 +107,8 @@ static size_t BMK_findMaxMem(U64 requiredMem)
/*_*******************************************************
* Benchmark wrappers
*********************************************************/
typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
typedef struct {
blockType_t blockType;
blockType_e blockType;
U32 unusedBits;
U32 origSize;
} blockProperties_t;
@ -214,8 +210,8 @@ size_t local_ZSTD_decompressContinue(void* dst, size_t dstCapacity, void* buff2,
static size_t benchMem(const void* src, size_t srcSize, U32 benchNb)
{
BYTE* dstBuff;
size_t dstBuffSize;
BYTE* buff2;
size_t const dstBuffSize = ZSTD_compressBound(srcSize);
void* buff2;
const char* benchName;
size_t (*benchFunction)(void* dst, size_t dstSize, void* verifBuff, const void* src, size_t srcSize);
double bestTime = 100000000.;
@ -252,9 +248,8 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb)
}
/* Allocation */
dstBuffSize = ZSTD_compressBound(srcSize);
dstBuff = (BYTE*)malloc(dstBuffSize);
buff2 = (BYTE*)malloc(dstBuffSize);
buff2 = malloc(dstBuffSize);
if ((!dstBuff) || (!buff2)) {
DISPLAY("\nError: not enough memory!\n");
free(dstBuff); free(buff2);
@ -287,7 +282,7 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb)
DISPLAY("ZSTD_decodeLiteralsBlock : impossible to test on this sample (not compressible)\n");
goto _cleanOut;
}
skippedSize = frameHeaderSize + 3 /* ZSTD_blockHeaderSize */;
skippedSize = frameHeaderSize + ZSTD_blockHeaderSize;
memcpy(buff2, dstBuff+skippedSize, g_cSize-skippedSize);
srcSize = srcSize > 128 KB ? 128 KB : srcSize; /* speed relative to block */
break;
@ -309,9 +304,9 @@ static size_t benchMem(const void* src, size_t srcSize, U32 benchNb)
DISPLAY("ZSTD_decodeSeqHeaders : impossible to test on this sample (not compressible)\n");
goto _cleanOut;
}
iend = ip + 3 /* ZSTD_blockHeaderSize */ + cBlockSize; /* End of first block */
ip += 3 /* ZSTD_blockHeaderSize */; /* skip block header */
ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, iend-ip); /* skip literal segment */
iend = ip + ZSTD_blockHeaderSize + cBlockSize; /* End of first block */
ip += ZSTD_blockHeaderSize; /* skip block header */
ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, iend-ip); /* skip literal segment */
g_cSize = iend-ip;
memcpy(buff2, ip, g_cSize); /* copy rest of block (it starts by SeqHeader) */
srcSize = srcSize > 128 KB ? 128 KB : srcSize; /* speed relative to block */

View File

@ -437,19 +437,19 @@ Header is in charge of describing how literals are packed.
It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes,
using little-endian convention.
| BlockType | sizes format | regenerated size | [compressed size] |
| --------- | ------------ | ---------------- | ----------------- |
| 2 bits | 1 - 2 bits | 5 - 20 bits | 0 - 18 bits |
| EncodingType | sizes format | regenerated size | [compressed size] |
| ------------ | ------------ | ---------------- | ----------------- |
| 2 bits | 1 - 2 bits | 5 - 20 bits | 0 - 18 bits |
In this representation, bits on the left are smallest bits.
__Block Type__ :
__Encoding Type__ :
This field uses 2 lowest bits of first byte, describing 4 different block types :
| Value | 0 | 1 | 2 | 3 |
| ---------- | ---------- | ------ | --- | ------- |
| Block Type | Compressed | Repeat | Raw | RLE |
| Value | 0 | 1 | 2 | 3 |
| ---------- | --- | --- | ---------- | ----------- |
| Block Type | Raw | RLE | Compressed | RepeatStats |
- Compressed : This is a standard huffman-compressed block,
starting with a huffman tree description.
@ -764,14 +764,14 @@ Literal Lengths, Offsets and Match Lengths respectively.
They follow the same enumeration :
| Value | 0 | 1 | 2 | 3 |
| ---------------- | ------ | --- | ------ | --- |
| Compression Mode | predef | RLE | Repeat | FSE |
| Value | 0 | 1 | 2 | 3 |
| ---------------- | ------ | --- | ---------- | ------ |
| Compression Mode | predef | RLE | Compressed | Repeat |
- "predef" : uses a pre-defined distribution table.
- "RLE" : it's a single code, repeated `nbSeqs` times.
- "Repeat" : re-use distribution table from previous compressed block.
- "FSE" : standard FSE compression.
- "Compressed" : standard FSE compression.
A distribution table will be present.
It will be described in [next part](#distribution-tables).