Merge pull request #1411 from facebook/prefetch_dict
Improves decompression speed when using cold dictionary
This commit is contained in:
commit
7b0c551bff
@ -33,7 +33,7 @@ largeNbDicts: util.o bench.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD)
|
|||||||
|
|
||||||
.PHONY: $(LIBZSTD)
|
.PHONY: $(LIBZSTD)
|
||||||
$(LIBZSTD):
|
$(LIBZSTD):
|
||||||
$(MAKE) -C $(LIBDIR) libzstd.a
|
$(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)"
|
||||||
|
|
||||||
bench.o : $(PROGDIR)/bench.c
|
bench.o : $(PROGDIR)/bench.c
|
||||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
|
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
|
||||||
@ -50,4 +50,5 @@ xxhash.o : $(LIBDIR)/common/xxhash.c
|
|||||||
|
|
||||||
clean:
|
clean:
|
||||||
$(RM) *.o
|
$(RM) *.o
|
||||||
|
$(MAKE) -C $(LIBDIR) clean > /dev/null
|
||||||
$(RM) largeNbDicts
|
$(RM) largeNbDicts
|
||||||
|
@ -49,6 +49,7 @@
|
|||||||
|
|
||||||
|
|
||||||
/*--- Macros ---*/
|
/*--- Macros ---*/
|
||||||
|
|
||||||
#define CONTROL(c) { if (!(c)) abort(); }
|
#define CONTROL(c) { if (!(c)) abort(); }
|
||||||
#undef MIN
|
#undef MIN
|
||||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||||
@ -594,6 +595,7 @@ int bench(const char** fileNameTable, unsigned nbFiles,
|
|||||||
if (blockSize)
|
if (blockSize)
|
||||||
DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
|
DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
|
||||||
DISPLAYLEVEL(3, "\n");
|
DISPLAYLEVEL(3, "\n");
|
||||||
|
size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices);
|
||||||
|
|
||||||
|
|
||||||
size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
|
size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
|
||||||
@ -625,8 +627,8 @@ int bench(const char** fileNameTable, unsigned nbFiles,
|
|||||||
|
|
||||||
/* dictionary determination */
|
/* dictionary determination */
|
||||||
buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
|
buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
|
||||||
srcBuffer.ptr,
|
srcs.buffer.ptr,
|
||||||
srcSlices.capacities, nbBlocks,
|
srcs.slices.capacities, srcs.slices.nbSlices,
|
||||||
DICTSIZE);
|
DICTSIZE);
|
||||||
CONTROL(dictBuffer.ptr != NULL);
|
CONTROL(dictBuffer.ptr != NULL);
|
||||||
|
|
||||||
@ -637,7 +639,7 @@ int bench(const char** fileNameTable, unsigned nbFiles,
|
|||||||
CONTROL(cTotalSizeNoDict != 0);
|
CONTROL(cTotalSizeNoDict != 0);
|
||||||
DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n",
|
DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n",
|
||||||
clevel,
|
clevel,
|
||||||
(double)srcSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
|
(double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
|
||||||
|
|
||||||
size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
|
size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
|
||||||
CONTROL(cSizes != NULL);
|
CONTROL(cSizes != NULL);
|
||||||
@ -646,7 +648,7 @@ int bench(const char** fileNameTable, unsigned nbFiles,
|
|||||||
CONTROL(cTotalSize != 0);
|
CONTROL(cTotalSize != 0);
|
||||||
DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n",
|
DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n",
|
||||||
(unsigned)dictBuffer.size,
|
(unsigned)dictBuffer.size,
|
||||||
(double)srcSize / cTotalSize, (unsigned)cTotalSize);
|
(double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize);
|
||||||
|
|
||||||
/* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
|
/* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
|
||||||
shrinkSizes(dstSlices, cSizes);
|
shrinkSizes(dstSlices, cSizes);
|
||||||
|
@ -15,7 +15,6 @@
|
|||||||
* Dependencies
|
* Dependencies
|
||||||
*********************************************************/
|
*********************************************************/
|
||||||
#include <string.h> /* memcpy, memmove, memset */
|
#include <string.h> /* memcpy, memmove, memset */
|
||||||
#include "compiler.h" /* prefetch */
|
|
||||||
#include "cpu.h" /* bmi2 */
|
#include "cpu.h" /* bmi2 */
|
||||||
#include "mem.h" /* low level memory routines */
|
#include "mem.h" /* low level memory routines */
|
||||||
#define FSE_STATIC_LINKING_ONLY
|
#define FSE_STATIC_LINKING_ONLY
|
||||||
|
@ -56,7 +56,6 @@
|
|||||||
* Dependencies
|
* Dependencies
|
||||||
*********************************************************/
|
*********************************************************/
|
||||||
#include <string.h> /* memcpy, memmove, memset */
|
#include <string.h> /* memcpy, memmove, memset */
|
||||||
#include "compiler.h" /* prefetch */
|
|
||||||
#include "cpu.h" /* bmi2 */
|
#include "cpu.h" /* bmi2 */
|
||||||
#include "mem.h" /* low level memory routines */
|
#include "mem.h" /* low level memory routines */
|
||||||
#define FSE_STATIC_LINKING_ONLY
|
#define FSE_STATIC_LINKING_ONLY
|
||||||
|
@ -507,16 +507,6 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* prefetch dictionary content */
|
|
||||||
if (dctx->ddictIsCold) {
|
|
||||||
size_t const dictSize = (const char*)dctx->prefixStart - (const char*)dctx->virtualStart;
|
|
||||||
size_t const psmin = MIN(dictSize, (size_t)(64*nbSeq) /* heuristic */ );
|
|
||||||
size_t const pSize = MIN(psmin, 128 KB /* protection */ );
|
|
||||||
const void* const pStart = (const char*)dctx->dictEnd - pSize;
|
|
||||||
PREFETCH_AREA(pStart, pSize);
|
|
||||||
dctx->ddictIsCold = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ip-istart;
|
return ip-istart;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1046,6 +1036,7 @@ ZSTD_decompressSequencesLong_body(
|
|||||||
/* prepare in advance */
|
/* prepare in advance */
|
||||||
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
|
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
|
||||||
sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
|
sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
|
||||||
|
PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
||||||
}
|
}
|
||||||
if (seqNb<seqAdvance) return ERROR(corruption_detected);
|
if (seqNb<seqAdvance) return ERROR(corruption_detected);
|
||||||
|
|
||||||
@ -1070,9 +1061,6 @@ ZSTD_decompressSequencesLong_body(
|
|||||||
|
|
||||||
/* save reps for next block */
|
/* save reps for next block */
|
||||||
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
||||||
#undef STORED_SEQS
|
|
||||||
#undef STORED_SEQS_MASK
|
|
||||||
#undef ADVANCED_SEQS
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* last literal segment */
|
/* last literal segment */
|
||||||
@ -1213,20 +1201,27 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Build Decoding Tables */
|
/* Build Decoding Tables */
|
||||||
{ int nbSeq;
|
{ int usePrefetchDecoder = dctx->ddictIsCold;
|
||||||
|
int nbSeq;
|
||||||
size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
|
size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
|
||||||
if (ZSTD_isError(seqHSize)) return seqHSize;
|
if (ZSTD_isError(seqHSize)) return seqHSize;
|
||||||
ip += seqHSize;
|
ip += seqHSize;
|
||||||
srcSize -= seqHSize;
|
srcSize -= seqHSize;
|
||||||
|
|
||||||
if ( (!frame || (dctx->fParams.windowSize > (1<<24)))
|
if ( !usePrefetchDecoder
|
||||||
&& (nbSeq>0) ) { /* could probably use a larger nbSeq limit */
|
&& (!frame || (dctx->fParams.windowSize > (1<<24)))
|
||||||
|
&& (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */
|
||||||
U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
|
U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
|
||||||
U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
|
U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
|
||||||
if (shareLongOffsets >= minShare)
|
usePrefetchDecoder = (shareLongOffsets >= minShare);
|
||||||
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dctx->ddictIsCold = 0;
|
||||||
|
|
||||||
|
if (usePrefetchDecoder)
|
||||||
|
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
||||||
|
|
||||||
|
/* else */
|
||||||
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user