From be391438ff62d5d41088db1770093cea9c33ee7c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 22 Mar 2016 23:19:28 +0100 Subject: [PATCH] first working version with both encoder and decode alternate LL + ML coding scheme. decompression speed highly impacted --- lib/zstd_compress.c | 44 ++++------------- lib/zstd_decompress.c | 107 ++++++++++++++++++++++-------------------- lib/zstd_internal.h | 7 +-- lib/zstd_opt.h | 2 +- programs/bench.c | 6 +-- 5 files changed, 71 insertions(+), 95 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index d0cd50ea..81db040c 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -80,7 +80,6 @@ static void ZSTD_resetSeqStore(seqStore_t* ssPtr) ssPtr->lit = ssPtr->litStart; ssPtr->litLength = ssPtr->litLengthStart; ssPtr->matchLength = ssPtr->matchLengthStart; - ssPtr->dumps = ssPtr->dumpsStart; } @@ -184,14 +183,14 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, const size_t blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params.windowLog); const U32 divider = (params.searchLength==3) ? 3 : 4; const size_t maxNbSeq = blockSize / divider; - const size_t tokenSpace = blockSize + 12*maxNbSeq; + const size_t tokenSpace = blockSize + 11*maxNbSeq; const size_t contentSize = (params.strategy == ZSTD_fast) ? 0 : (1 << params.contentLog); const size_t hSize = 1 << params.hashLog; const size_t h3Size = (params.searchLength==3) ? (1 << HASHLOG3) : 0; const size_t tableSpace = (contentSize + hSize + h3Size) * sizeof(U32); /* Check if workSpace is large enough, alloc a new one if needed */ - { size_t const optSpace = ((1<seqStore.mlCodeStart = zc->seqStore.llCodeStart + maxNbSeq; zc->seqStore.offCodeStart = zc->seqStore.mlCodeStart + maxNbSeq; zc->seqStore.litStart = zc->seqStore.offCodeStart + maxNbSeq; - zc->seqStore.dumpsStart = zc->seqStore.litStart + maxNbSeq; if (params.strategy == ZSTD_btopt) { - zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.dumpsStart + maxNbSeq)); + zc->seqStore.litFreq = (U32*)((void*)(zc->seqStore.litStart + blockSize)); zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); - zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1); zc->seqStore.matchTable = (ZSTD_match_t*)((void*)(zc->seqStore.offCodeFreq + (1<seqStore.priceTable = (ZSTD_optimal_t*)((void*)(zc->seqStore.matchTable + ZSTD_OPT_NUM+1)); zc->seqStore.litLengthSum = 0; @@ -599,12 +597,6 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, size_t const nbSeq = offsetTableEnd - offsetTable; BYTE* seqHead; - static U32 blockNb = 0; - blockNb++; - - if (blockNb==79) - blockNb += !nbSeq; - /* Compress literals */ { const BYTE* const literals = seqStorePtr->litStart; size_t const litSize = seqStorePtr->lit - literals; @@ -620,25 +612,8 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; if (nbSeq==0) goto _check_compressibility; - /* dumps : contains rests of large lengths */ - if ((oend-op) < 3 /* dumps */ + 1 /*seqHead*/) - return ERROR(dstSize_tooSmall); - seqHead = op; - { size_t const dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart; - if (dumpsLength < 512) { - op[0] = (BYTE)(dumpsLength >> 8); - op[1] = (BYTE)(dumpsLength); - op += 2; - } else { - op[0] = 2; - op[1] = (BYTE)(dumpsLength>>8); - op[2] = (BYTE)(dumpsLength); - op += 3; - } - if ((size_t)(oend-op) < dumpsLength+6) return ERROR(dstSize_tooSmall); - memcpy(op, seqStorePtr->dumpsStart, dumpsLength); - op += dumpsLength; - } + /* seqHead : flags for FSE encoding type */ + seqHead = op++; #define MIN_SEQ_FOR_DYNAMIC_FSE 64 #define MAX_SEQ_FOR_STATIC_FSE 1000 @@ -714,7 +689,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, /* ML codes */ { static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 20, 31, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, @@ -753,7 +728,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, MLtype = FSE_ENCODING_DYNAMIC; } } - seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); zc->flagStaticTables = 0; /* Encoding Sequences */ @@ -791,6 +766,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, BIT_addBits(&blockStream, mlTable[n], ML_bits[MLCode]); BIT_addBits(&blockStream, offset, nbBits); /* 31 */ /* 61 */ /* 24 bits max in 32-bits mode */ BIT_addBits(&blockStream, llTable[n], LL_bits[LLCode]); + //if (blockStream.bitPos > 63) printf("pb : blockStream.bitPos == %u > 63 \n", blockStream.bitPos); BIT_flushBits(&blockStream); /* 7 */ /* 7 */ } } @@ -824,7 +800,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B static const BYTE* g_start = NULL; const U32 pos = (U32)(literals - g_start); if (g_start==NULL) g_start = literals; - if ((pos > 198618400) && (pos < 198618500)) + if ((pos > 10354000) && (pos < 10355000)) printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); #endif diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index b1f51561..96b8846c 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -559,9 +559,37 @@ FORCE_INLINE size_t ZSTD_buildSeqTableLL(FSE_DTable* DTable, U32 type, U32 max, } -size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, - FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, - const void* src, size_t srcSize) +FORCE_INLINE size_t ZSTD_buildSeqTableML(FSE_DTable* DTable, U32 type, U32 max, U32 maxLog, + const void* src, size_t srcSize) +{ + switch(type) + { + case FSE_ENCODING_RLE : + if (!srcSize) return ERROR(srcSize_wrong); + if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected); + FSE_buildDTable_rle(DTable, *(const BYTE*)src); /* if *src > max, data is corrupted */ + return 1; + case FSE_ENCODING_RAW : + FSE_buildDTable(DTable, ML_defaultNorm, max, ML_defaultNormLog); + return 0; + case FSE_ENCODING_STATIC: + return 0; + default : /* impossible */ + case FSE_ENCODING_DYNAMIC : + { U32 tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + if (FSE_isError(headerSize)) return ERROR(corruption_detected); + if (tableLog > maxLog) return ERROR(corruption_detected); + FSE_buildDTable(DTable, norm, max, tableLog); + return headerSize; + } } +} + + +size_t ZSTD_decodeSeqHeaders(int* nbSeq, + FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, + const void* src, size_t srcSize) { const BYTE* const istart = (const BYTE* const)src; const BYTE* ip = istart; @@ -585,26 +613,13 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen LLtype = *ip >> 6; Offtype = (*ip >> 4) & 3; MLtype = (*ip >> 2) & 3; - { size_t dumpsLength; - if (*ip & 2) { - dumpsLength = ip[2]; - dumpsLength += ip[1] << 8; - ip += 3; - } else { - dumpsLength = ip[1]; - dumpsLength += (ip[0] & 1) << 8; - ip += 2; - } - *dumpsPtr = ip; - ip += dumpsLength; - *dumpsLengthPtr = dumpsLength; - } + ip++; /* check */ if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ /* Build DTables */ - { size_t const bhSize = ZSTD_buildSeqTableLL(DTableLL, LLtype, 35, LLFSELog, ip, iend-ip); + { size_t const bhSize = ZSTD_buildSeqTableLL(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip); if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } @@ -612,7 +627,7 @@ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLen if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } - { size_t const bhSize = ZSTD_buildSeqTable(DTableML, MLtype, MLbits, MLFSELog, ip, iend-ip); + { size_t const bhSize = ZSTD_buildSeqTableML(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip); if (ZSTD_isError(bhSize)) return ERROR(corruption_detected); ip += bhSize; } @@ -633,8 +648,6 @@ typedef struct { FSE_DState_t stateOffb; FSE_DState_t stateML; size_t prevOffset; - const BYTE* dumps; - const BYTE* dumpsEnd; } seqState_t; @@ -662,31 +675,26 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState, const U32 mls) if (offsetCode | !litCode) seqState->prevOffset = seq->offset; /* cmove */ seq->offset = offset; if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */ } - /* Literal length update */ + { static const U32 ML_base[MaxML+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 34, 36, 38, 40, 44, 48, 56, 64, 80, 96, 0x80, 0x100, 0x200, 0x400, 0x800, + 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; + U32 const mlCode = FSE_peakSymbol(&(seqState->stateML)); + seq->matchLength = ML_base[mlCode] + BIT_readBits(&(seqState->DStream), ML_bits[mlCode]) + mls; + } + + /* ANS update */ FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); /* update */ if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); - /* MatchLength */ - { size_t matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); - const BYTE* dumps = seqState->dumps; - if (matchLength == MaxML) { - const BYTE* const de = seqState->dumpsEnd; - const U32 add = *dumps++; - if (add < 255) matchLength += add; - else { - matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */ - if (matchLength&1) matchLength>>=1, dumps += 3; - else matchLength = (U16)(matchLength)>>1, dumps += 2; - } - if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ - } - matchLength += mls; - seq->matchLength = matchLength; - seqState->dumps = dumps; - } + FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream)); /* update */ + if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); + + FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); /* update */ + if (MEM_32bits()) BIT_reloadDStream(&(seqState->DStream)); #if 0 /* debug */ { @@ -781,12 +789,10 @@ static size_t ZSTD_decompressSequences( BYTE* const ostart = (BYTE* const)dst; BYTE* op = ostart; BYTE* const oend = ostart + maxDstSize; - size_t dumpsLength; const BYTE* litPtr = dctx->litPtr; const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8; const BYTE* const litEnd = litPtr + dctx->litSize; int nbSeq; - const BYTE* dumps; U32* DTableLL = dctx->LLTable; U32* DTableML = dctx->MLTable; U32* DTableOffb = dctx->OffTable; @@ -796,7 +802,7 @@ static size_t ZSTD_decompressSequences( const U32 mls = dctx->fParams.mml; /* Build Decoding Tables */ - { size_t const errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, + { size_t const errorCode = ZSTD_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, ip, seqSize); if (ZSTD_isError(errorCode)) return errorCode; @@ -810,8 +816,6 @@ static size_t ZSTD_decompressSequences( memset(&sequence, 0, sizeof(sequence)); sequence.offset = REPCODE_STARTVALUE; - seqState.dumps = dumps; - seqState.dumpsEnd = dumps + dumpsLength; seqState.prevOffset = REPCODE_STARTVALUE; { size_t const errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip); if (ERR_isError(errorCode)) return ERROR(corruption_detected); } @@ -825,7 +829,7 @@ static size_t ZSTD_decompressSequences( ZSTD_decodeSequence(&sequence, &seqState, mls); #if 0 /* for debug */ { U32 pos = (U32)(op-base); - if ((pos > 198618400) && (pos < 198618500)) + if ((pos > 10354000) && (pos < 10355000)) printf("pos %6u : %3u literals & match %3u bytes at distance %6u \n", pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset); } @@ -867,17 +871,16 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, const void* src, size_t srcSize) { /* blockType == blockCompressed */ const BYTE* ip = (const BYTE*)src; - size_t litCSize; if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); ZSTD_LOG_BLOCK("%p: ZSTD_decompressBlock_internal searchLength=%d\n", dctx->base, dctx->params.searchLength); /* Decode literals sub-block */ - litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); - if (ZSTD_isError(litCSize)) return litCSize; - ip += litCSize; - srcSize -= litCSize; + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; } return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); } diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h index 3b83059b..fa6c93ca 100644 --- a/lib/zstd_internal.h +++ b/lib/zstd_internal.h @@ -102,16 +102,15 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t; #define HASHLOG3 17 #define Litbits 8 -#define MLbits 7 #define Offbits 5 #define MaxLit ((1<litLengthSum == 0) { ssPtr->litSum = (2<litLengthSum = MaxLL+1; - ssPtr->matchLengthSum = (1<matchLengthSum = MaxML+1; ssPtr->offCodeSum = (1<matchSum = (2< /* fprintf, fopen, ftello64 */ #include /* stat64 */ #include /* stat64 */ -#include /* clock_t, clock, CLOCKS_PER_SEC */ +#include /* clock_t, clock, CLOCKS_PER_SEC */ /* sleep : posix - windows - others */ #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__))) @@ -65,7 +65,7 @@ #include "mem.h" #include "zstd_static.h" #include "xxhash.h" -#include "datagen.h" /* RDG_genBuffer */ +#include "datagen.h" /* RDG_genBuffer */ /* ************************************* @@ -283,7 +283,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, (double)srcSize / 1000000. / (fastestC / CLOCKS_PER_SEC) ); (void)crcCheck; (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ -#if 0 +#if 1 /* Decompression */ memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */