Merge pull request #2381 from senhuang42/expand_sequence_extraction_api
Add enum to define ZSTD_Sequence type and update sequence extraction API
This commit is contained in:
commit
f62edf0fe9
@ -2505,6 +2505,7 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
|||||||
for (i = 0; i < seqStoreSeqSize; ++i) {
|
for (i = 0; i < seqStoreSeqSize; ++i) {
|
||||||
outSeqs[i].litLength = seqStoreSeqs[i].litLength;
|
outSeqs[i].litLength = seqStoreSeqs[i].litLength;
|
||||||
outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH;
|
outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH;
|
||||||
|
outSeqs[i].rep = 0;
|
||||||
|
|
||||||
if (i == seqStore->longLengthPos) {
|
if (i == seqStore->longLengthPos) {
|
||||||
if (seqStore->longLengthID == 1) {
|
if (seqStore->longLengthID == 1) {
|
||||||
@ -2549,8 +2550,8 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
|||||||
zc->seqCollector.seqIndex += seqStoreSeqSize;
|
zc->seqCollector.seqIndex += seqStoreSeqSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
||||||
size_t outSeqsSize, const void* src, size_t srcSize)
|
size_t outSeqsSize, const void* src, size_t srcSize)
|
||||||
{
|
{
|
||||||
const size_t dstCapacity = ZSTD_compressBound(srcSize);
|
const size_t dstCapacity = ZSTD_compressBound(srcSize);
|
||||||
void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
|
void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
|
||||||
@ -2569,6 +2570,22 @@ size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
|||||||
return zc->seqCollector.seqIndex;
|
return zc->seqCollector.seqIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) {
|
||||||
|
size_t in = 0;
|
||||||
|
size_t out = 0;
|
||||||
|
for (; in < seqsSize; ++in) {
|
||||||
|
if (sequences[in].offset == 0 && sequences[in].matchLength == 0) {
|
||||||
|
if (in != seqsSize - 1) {
|
||||||
|
sequences[in+1].litLength += sequences[in].litLength;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sequences[out] = sequences[in];
|
||||||
|
++out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
/* Returns true if the given block is a RLE block */
|
/* Returns true if the given block is a RLE block */
|
||||||
static int ZSTD_isRLE(const BYTE *ip, size_t length) {
|
static int ZSTD_isRLE(const BYTE *ip, size_t length) {
|
||||||
size_t i;
|
size_t i;
|
||||||
|
33
lib/zstd.h
33
lib/zstd.h
@ -1149,7 +1149,7 @@ typedef struct {
|
|||||||
* rep == 2 --> offset == repeat_offset_3
|
* rep == 2 --> offset == repeat_offset_3
|
||||||
* rep == 3 --> offset == repeat_offset_1 - 1
|
* rep == 3 --> offset == repeat_offset_1 - 1
|
||||||
*
|
*
|
||||||
* Note: This field is optional. ZSTD_getSequences() will calculate the value of
|
* Note: This field is optional. ZSTD_generateSequences() will calculate the value of
|
||||||
* 'rep', but repeat offsets do not necessarily need to be calculated from an external
|
* 'rep', but repeat offsets do not necessarily need to be calculated from an external
|
||||||
* sequence provider's perspective.
|
* sequence provider's perspective.
|
||||||
*/
|
*/
|
||||||
@ -1297,17 +1297,36 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS
|
|||||||
* or an error code (if srcSize is too small) */
|
* or an error code (if srcSize is too small) */
|
||||||
ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
|
ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
|
||||||
|
|
||||||
/*! ZSTD_getSequences() :
|
typedef enum {
|
||||||
* Extract sequences from the sequence store.
|
ZSTD_sf_explicitBlockDelimiters, /* Representation of ZSTD_Sequence contains explicit block delimiters */
|
||||||
* Each block will end with a dummy sequence with offset == 0, matchLength == 0, and litLength == length of last literals.
|
ZSTD_sf_noBlockDelimiters /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
|
||||||
|
} ZSTD_sequenceFormat_e;
|
||||||
|
|
||||||
|
/*! ZSTD_generateSequences() :
|
||||||
|
* Generate sequences using ZSTD_compress2, given a source buffer.
|
||||||
|
*
|
||||||
|
* Each block will end with a dummy sequence
|
||||||
|
* with offset == 0, matchLength == 0, and litLength == length of last literals.
|
||||||
|
* litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
|
||||||
|
* simply acts as a block delimiter.
|
||||||
*
|
*
|
||||||
* zc can be used to insert custom compression params.
|
* zc can be used to insert custom compression params.
|
||||||
* This function invokes ZSTD_compress2
|
* This function invokes ZSTD_compress2
|
||||||
* @return : number of sequences extracted
|
* @return : number of sequences generated
|
||||||
*/
|
*/
|
||||||
ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
|
||||||
size_t outSeqsSize, const void* src, size_t srcSize);
|
|
||||||
|
|
||||||
|
ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
||||||
|
size_t outSeqsSize, const void* src, size_t srcSize);
|
||||||
|
|
||||||
|
/*! ZSTD_mergeBlockDelimiters() :
|
||||||
|
* Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
|
||||||
|
* by merging them into into the literals of the next sequence.
|
||||||
|
*
|
||||||
|
* As such, the final generated result has no explicit representation of block boundaries,
|
||||||
|
* and the final last literals segment is not represented in the sequences.
|
||||||
|
* @return : number of sequences left after merging
|
||||||
|
*/
|
||||||
|
ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
|
||||||
|
|
||||||
/***************************************
|
/***************************************
|
||||||
* Memory management
|
* Memory management
|
||||||
|
@ -305,13 +305,17 @@ static int FUZ_mallocTests(unsigned seed, double compressibility, unsigned part)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize, BYTE* src, size_t size)
|
static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize,
|
||||||
|
BYTE* src, size_t size, ZSTD_sequenceFormat_e format)
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i;
|
||||||
size_t j;
|
size_t j;
|
||||||
for(i = 0; i < seqsSize; ++i) {
|
for(i = 0; i < seqsSize; ++i) {
|
||||||
assert(dst + seqs[i].litLength + seqs[i].matchLength <= dst + size);
|
assert(dst + seqs[i].litLength + seqs[i].matchLength <= dst + size);
|
||||||
assert(src + seqs[i].litLength + seqs[i].matchLength <= src + size);
|
assert(src + seqs[i].litLength + seqs[i].matchLength <= src + size);
|
||||||
|
if (format == ZSTD_sf_noBlockDelimiters) {
|
||||||
|
assert(seqs[i].matchLength != 0 || seqs[i].offset != 0);
|
||||||
|
}
|
||||||
|
|
||||||
memcpy(dst, src, seqs[i].litLength);
|
memcpy(dst, src, seqs[i].litLength);
|
||||||
dst += seqs[i].litLength;
|
dst += seqs[i].litLength;
|
||||||
@ -326,6 +330,9 @@ static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize,
|
|||||||
size -= seqs[i].matchLength;
|
size -= seqs[i].matchLength;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (format == ZSTD_sf_noBlockDelimiters) {
|
||||||
|
memcpy(dst, src, size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*=============================================
|
/*=============================================
|
||||||
@ -2702,9 +2709,9 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
|||||||
DISPLAYLEVEL(3, "OK \n");
|
DISPLAYLEVEL(3, "OK \n");
|
||||||
}
|
}
|
||||||
|
|
||||||
DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences decode from sequences test : ", testNb++);
|
DISPLAYLEVEL(3, "test%3i : ZSTD_generateSequences decode from sequences test : ", testNb++);
|
||||||
{
|
{
|
||||||
size_t srcSize = 100 KB;
|
size_t srcSize = 150 KB;
|
||||||
BYTE* src = (BYTE*)CNBuffer;
|
BYTE* src = (BYTE*)CNBuffer;
|
||||||
BYTE* decoded = (BYTE*)compressedBuffer;
|
BYTE* decoded = (BYTE*)compressedBuffer;
|
||||||
|
|
||||||
@ -2718,11 +2725,14 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
|||||||
/* Populate src with random data */
|
/* Populate src with random data */
|
||||||
RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);
|
RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed);
|
||||||
|
|
||||||
/* get the sequences */
|
/* Test with block delimiters roundtrip */
|
||||||
seqsSize = ZSTD_getSequences(cctx, seqs, srcSize, src, srcSize);
|
seqsSize = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize);
|
||||||
|
FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize, ZSTD_sf_explicitBlockDelimiters);
|
||||||
|
assert(!memcmp(CNBuffer, compressedBuffer, srcSize));
|
||||||
|
|
||||||
/* "decode" and compare the sequences */
|
/* Test no block delimiters roundtrip */
|
||||||
FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize);
|
seqsSize = ZSTD_mergeBlockDelimiters(seqs, seqsSize);
|
||||||
|
FUZ_decodeSequences(decoded, seqs, seqsSize, src, srcSize, ZSTD_sf_noBlockDelimiters);
|
||||||
assert(!memcmp(CNBuffer, compressedBuffer, srcSize));
|
assert(!memcmp(CNBuffer, compressedBuffer, srcSize));
|
||||||
|
|
||||||
ZSTD_freeCCtx(cctx);
|
ZSTD_freeCCtx(cctx);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user