added prefetch
This commit is contained in:
parent
50524bf0da
commit
73f88a66f1
@ -775,6 +775,7 @@ typedef struct {
|
|||||||
size_t litLength;
|
size_t litLength;
|
||||||
size_t matchLength;
|
size_t matchLength;
|
||||||
size_t offset;
|
size_t offset;
|
||||||
|
const BYTE* match;
|
||||||
} seq_t;
|
} seq_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -783,6 +784,7 @@ typedef struct {
|
|||||||
FSE_DState_t stateOffb;
|
FSE_DState_t stateOffb;
|
||||||
FSE_DState_t stateML;
|
FSE_DState_t stateML;
|
||||||
size_t prevOffset[ZSTD_REP_NUM];
|
size_t prevOffset[ZSTD_REP_NUM];
|
||||||
|
const BYTE* ptr;
|
||||||
} seqState_t;
|
} seqState_t;
|
||||||
|
|
||||||
|
|
||||||
@ -851,11 +853,14 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState)
|
|||||||
if (MEM_32bits() ||
|
if (MEM_32bits() ||
|
||||||
(totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream);
|
(totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream);
|
||||||
|
|
||||||
|
seq.match = seqState->ptr + seq.litLength - seq.offset; /* only for single memory segment ! */
|
||||||
|
|
||||||
/* ANS state update */
|
/* ANS state update */
|
||||||
FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
|
||||||
FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
|
||||||
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
|
||||||
FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
|
||||||
|
seqState->ptr += seq.matchLength + seq.litLength;
|
||||||
|
|
||||||
return seq;
|
return seq;
|
||||||
}
|
}
|
||||||
@ -919,7 +924,7 @@ size_t ZSTD_execSequence(BYTE* op,
|
|||||||
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
||||||
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
||||||
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
||||||
const BYTE* match = oLitEnd - sequence.offset;
|
const BYTE* match = sequence.match;
|
||||||
|
|
||||||
/* check */
|
/* check */
|
||||||
if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
|
if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
|
||||||
@ -987,6 +992,8 @@ size_t ZSTD_execSequence(BYTE* op,
|
|||||||
return sequenceLength;
|
return sequenceLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#define PREFETCH(ptr) _mm_prefetch(ptr, _MM_HINT_T0);
|
||||||
|
|
||||||
static size_t ZSTD_decompressSequences(
|
static size_t ZSTD_decompressSequences(
|
||||||
ZSTD_DCtx* dctx,
|
ZSTD_DCtx* dctx,
|
||||||
@ -1018,6 +1025,7 @@ static size_t ZSTD_decompressSequences(
|
|||||||
int seqNb;
|
int seqNb;
|
||||||
dctx->fseEntropy = 1;
|
dctx->fseEntropy = 1;
|
||||||
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->rep[i]; }
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->rep[i]; }
|
||||||
|
seqState.ptr = op;
|
||||||
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
|
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
|
||||||
FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
|
||||||
FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
||||||
@ -1025,28 +1033,28 @@ static size_t ZSTD_decompressSequences(
|
|||||||
|
|
||||||
/* prepare in advance */
|
/* prepare in advance */
|
||||||
int const seqAdvance = MIN(nbSeq, 3);
|
int const seqAdvance = MIN(nbSeq, 3);
|
||||||
for (seqNb=0; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<seqAdvance; seqNb++) {
|
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb<seqAdvance; seqNb++) {
|
||||||
sequences[seqNb] = ZSTD_decodeSequence(&seqState);
|
sequences[seqNb] = ZSTD_decodeSequence(&seqState);
|
||||||
}
|
}
|
||||||
if (seqNb<seqAdvance) return ERROR(corruption_detected);
|
if (seqNb<seqAdvance) return ERROR(corruption_detected);
|
||||||
|
|
||||||
/* decode and decompress */
|
/* decode and decompress */
|
||||||
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<nbSeq ; seqNb++) {
|
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<nbSeq ; seqNb++) {
|
||||||
seq_t const sequence = ZSTD_decodeSequence(&seqState);
|
|
||||||
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-3) & 3], &litPtr, litEnd, base, vBase, dictEnd);
|
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-3) & 3], &litPtr, litEnd, base, vBase, dictEnd);
|
||||||
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
||||||
sequences[seqNb&3] = sequence;
|
sequences[seqNb&3] = ZSTD_decodeSequence(&seqState);
|
||||||
|
PREFETCH(sequences[seqNb&3].match);
|
||||||
op += oneSeqSize;
|
op += oneSeqSize;
|
||||||
}
|
}
|
||||||
if (seqNb<nbSeq) return ERROR(corruption_detected);
|
if (seqNb<nbSeq) return ERROR(corruption_detected);
|
||||||
|
|
||||||
/* finish queue */
|
/* finish queue */
|
||||||
seqNb -=seqAdvance;
|
seqNb -= seqAdvance;
|
||||||
for ( ; seqNb<nbSeq ; seqNb++) {
|
for ( ; seqNb<nbSeq ; seqNb++) {
|
||||||
{ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&3], &litPtr, litEnd, base, vBase, dictEnd);
|
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&3], &litPtr, litEnd, base, vBase, dictEnd);
|
||||||
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
||||||
op += oneSeqSize;
|
op += oneSeqSize;
|
||||||
} }
|
}
|
||||||
|
|
||||||
/* save reps for next block */
|
/* save reps for next block */
|
||||||
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->rep[i] = (U32)(seqState.prevOffset[i]); }
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->rep[i] = (U32)(seqState.prevOffset[i]); }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user