added prefetch

This commit is contained in:
Yann Collet 2016-11-23 15:43:30 -08:00
parent 50524bf0da
commit 73f88a66f1

View File

@ -775,6 +775,7 @@ typedef struct {
size_t litLength; size_t litLength;
size_t matchLength; size_t matchLength;
size_t offset; size_t offset;
const BYTE* match;
} seq_t; } seq_t;
typedef struct { typedef struct {
@ -783,6 +784,7 @@ typedef struct {
FSE_DState_t stateOffb; FSE_DState_t stateOffb;
FSE_DState_t stateML; FSE_DState_t stateML;
size_t prevOffset[ZSTD_REP_NUM]; size_t prevOffset[ZSTD_REP_NUM];
const BYTE* ptr;
} seqState_t; } seqState_t;
@ -851,11 +853,14 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState)
if (MEM_32bits() || if (MEM_32bits() ||
(totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream); (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream);
seq.match = seqState->ptr + seq.litLength - seq.offset; /* only for single memory segment ! */
/* ANS state update */ /* ANS state update */
FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
seqState->ptr += seq.matchLength + seq.litLength;
return seq; return seq;
} }
@ -919,7 +924,7 @@ size_t ZSTD_execSequence(BYTE* op,
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
const BYTE* const iLitEnd = *litPtr + sequence.litLength; const BYTE* const iLitEnd = *litPtr + sequence.litLength;
const BYTE* match = oLitEnd - sequence.offset; const BYTE* match = sequence.match;
/* check */ /* check */
if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
@ -987,6 +992,8 @@ size_t ZSTD_execSequence(BYTE* op,
return sequenceLength; return sequenceLength;
} }
#include <xmmintrin.h>
#define PREFETCH(ptr) _mm_prefetch(ptr, _MM_HINT_T0);
static size_t ZSTD_decompressSequences( static size_t ZSTD_decompressSequences(
ZSTD_DCtx* dctx, ZSTD_DCtx* dctx,
@ -1018,6 +1025,7 @@ static size_t ZSTD_decompressSequences(
int seqNb; int seqNb;
dctx->fseEntropy = 1; dctx->fseEntropy = 1;
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->rep[i]; } { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->rep[i]; }
seqState.ptr = op;
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
@ -1025,28 +1033,28 @@ static size_t ZSTD_decompressSequences(
/* prepare in advance */ /* prepare in advance */
int const seqAdvance = MIN(nbSeq, 3); int const seqAdvance = MIN(nbSeq, 3);
for (seqNb=0; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<seqAdvance; seqNb++) { for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb<seqAdvance; seqNb++) {
sequences[seqNb] = ZSTD_decodeSequence(&seqState); sequences[seqNb] = ZSTD_decodeSequence(&seqState);
} }
if (seqNb<seqAdvance) return ERROR(corruption_detected); if (seqNb<seqAdvance) return ERROR(corruption_detected);
/* decode and decompress */ /* decode and decompress */
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<nbSeq ; seqNb++) { for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<nbSeq ; seqNb++) {
seq_t const sequence = ZSTD_decodeSequence(&seqState);
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-3) & 3], &litPtr, litEnd, base, vBase, dictEnd); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-3) & 3], &litPtr, litEnd, base, vBase, dictEnd);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize; if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
sequences[seqNb&3] = sequence; sequences[seqNb&3] = ZSTD_decodeSequence(&seqState);
PREFETCH(sequences[seqNb&3].match);
op += oneSeqSize; op += oneSeqSize;
} }
if (seqNb<nbSeq) return ERROR(corruption_detected); if (seqNb<nbSeq) return ERROR(corruption_detected);
/* finish queue */ /* finish queue */
seqNb -=seqAdvance; seqNb -= seqAdvance;
for ( ; seqNb<nbSeq ; seqNb++) { for ( ; seqNb<nbSeq ; seqNb++) {
{ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&3], &litPtr, litEnd, base, vBase, dictEnd); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&3], &litPtr, litEnd, base, vBase, dictEnd);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize; if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
op += oneSeqSize; op += oneSeqSize;
} } }
/* save reps for next block */ /* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->rep[i] = (U32)(seqState.prevOffset[i]); } { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->rep[i] = (U32)(seqState.prevOffset[i]); }